From 085937b30558f76138219332214b498f5d16b3a5 Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Tue, 16 Jun 2026 09:18:47 +0200 Subject: [PATCH] Fix get_httptype contenttype overflow; bound the mime/normalize APIs get_httptype() took the caller buffer as a bare char* and raw-strcpy'd the MIME string into it, so crawling a URL ending in .docx/.pptx/.xlsx (whose table MIME types reach 73 chars) overflowed the 64-byte htsblk.contenttype that the htsback and htslib callers pass, corrupting the adjacent struct fields. Remotely triggerable. * Widen htsblk contenttype/charset/contentencoding to HTS_MIMETYPE_SIZE (128, a new named constant holding the longest registered MIME type). This changes the installed htsblk layout, so bump the library soname (VERSION_INFO 2:49:0 -> 3:0:0). * Add bounded get_httptype_sized(), guess_httptype_sized() and adr_normalized_sized() that take the destination size and use strlcpybuff/snprintf. The old get_httptype(), guess_httptype() and adr_normalized() stay as wrappers, now marked HTS_DEPRECATED (portable: GCC/Clang attribute, MSVC __declspec, nothing elsewhere). Internal callers pass the real buffer size; the deprecated wrappers bound to the implicit contract their old callers relied on (HTS_MIMETYPE_SIZE for the mime buffer, HTS_URLMAXSIZE*2 for the URL buffer) rather than staying unbounded, so they abort on overflow instead of silently corrupting memory. * get_httptype_sized(), guess_httptype_sized() and give_mimext() now report whether a type/extension was written; callers check the result and bail rather than use a possibly-empty buffer (e.g. the is_hypertext_mime helpers). A user "--assume cgi=" rule (empty value) matches but writes nothing, so get_httptype_sized() returns the buffer's emptiness, matching the old callers' strnotempty(s) test rather than reporting a bogus recognized type. * -#7 basic_selftests: a .pptx MIME (73 chars) is stored whole into a real htsblk.contenttype (a [64] field makes the bounded copy abort); give_mimext and get_httptype_sized return values; the octet-stream fallback; the empty --assume rule; plus fil_normalized "//"-in-query preservation and cut_path trailing-slash / single-char branches. Signed-off-by: Xavier Roche --- configure | 4 +- configure.ac | 4 +- src/htsback.c | 5 ++- src/htscore.c | 4 +- src/htscoremain.c | 91 ++++++++++++++++++++++++++++++++++++------- src/htsglobal.h | 23 ++++++++++- src/htsjava.c | 5 +-- src/htslib.c | 84 ++++++++++++++++++++++++++------------- src/htslib.h | 11 ++++-- src/htsname.c | 28 +++++++------ src/htsopt.h | 6 +-- src/htsparse.c | 9 +++-- src/httrack-library.h | 9 +++++ 13 files changed, 207 insertions(+), 76 deletions(-) diff --git a/configure b/configure index e6d3291..ded4b8c 100755 --- a/configure +++ b/configure @@ -3685,7 +3685,9 @@ fi -VERSION_INFO="2:49:0" +# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to +# 128), an incompatible ABI break, so bump current and reset revision/age. +VERSION_INFO="3:0:0" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 printf %s "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } diff --git a/configure.ac b/configure.ac index 17c94c4..ca969d8 100644 --- a/configure.ac +++ b/configure.ac @@ -29,7 +29,9 @@ AC_CONFIG_SRCDIR(src/httrack.c) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_HEADERS(config.h) AM_INIT_AUTOMAKE([subdir-objects]) -VERSION_INFO="2:49:0" +# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to +# 128), an incompatible ABI break, so bump current and reset revision/age. +VERSION_INFO="3:0:0" AM_MAINTAINER_MODE AC_USE_SYSTEM_EXTENSIONS diff --git a/src/htsback.c b/src/htsback.c index e98494b..3394f07 100644 --- a/src/htsback.c +++ b/src/htsback.c @@ -3584,8 +3584,9 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache, back[i].r.is_file = 1; back[i].r.totalsize = back[i].r.size = fsize_utf8(back[i].url_sav); - get_httptype(opt, back[i].r.contenttype, - back[i].url_sav, 1); + get_httptype_sized(opt, back[i].r.contenttype, + sizeof(back[i].r.contenttype), + back[i].url_sav, 1); hts_log_print(opt, LOG_DEBUG, "Not-modified status without cache guessed: %s%s", back[i].url_adr, back[i].url_fil); diff --git a/src/htscore.c b/src/htscore.c index 51105b3..8e4c64f 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -1734,7 +1734,7 @@ int httpmirror(char *url1, httrackp * opt) { { char buff[256]; - guess_httptype(opt, buff, urlfil()); + guess_httptype_sized(opt, buff, sizeof(buff), urlfil()); if (strcmp(buff, "image/gif") == 0) create_gif_warning = 1; } @@ -3150,7 +3150,7 @@ static void postprocess_file(httrackp * opt, const char *save, const char *adr, /* CID */ make_content_id(adr, fil, cid, sizeof(cid)); - guess_httptype(opt, mimebuff, save); + guess_httptype_sized(opt, mimebuff, sizeof(mimebuff), save); fprintf(opt->state.mimefp, "--%s\r\n", StringBuff(opt->state.mimemid)); /*if (first) diff --git a/src/htscoremain.c b/src/htscoremain.c index 3d97f22..80ad7e4 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -295,16 +295,19 @@ static void basic_selftests(void) { assertf(strcmp(fil_normalized("/p?b=2&a=1&c=3", norm), "/p?a=1&b=2&c=3") == 0); assertf(strcmp(fil_normalized("/a//b", norm), "/a/b") == 0); + // "//" is collapsed only before the query; inside the query it is kept + assertf(strcmp(fil_normalized("/a//b?x=c//d", norm), "/a/b?x=c//d") == 0); } // give_mimext(): mime type -> file extension, bounded into the caller buffer. + // Returns 1 when an extension was written, 0 otherwise. { char ext[16]; - give_mimext(ext, sizeof(ext), "image/gif"); + assertf(give_mimext(ext, sizeof(ext), "image/gif") == 1); assertf(strcmp(ext, "gif") == 0); - give_mimext(ext, sizeof(ext), "text/html"); + assertf(give_mimext(ext, sizeof(ext), "text/html") == 1); assertf(strcmp(ext, "html") == 0); - give_mimext(ext, sizeof(ext), "no/such-mime-type"); + assertf(give_mimext(ext, sizeof(ext), "no/such-mime-type") == 0); assertf(ext[0] == '\0'); } // convtolower(): lower-cases into the caller buffer (bounded by its size). @@ -317,13 +320,78 @@ static void basic_selftests(void) { // cut_path(): splits a path into directory (with trailing '/') and basename, // each bounded by its buffer size. { - char full[] = "/dir/sub/file.html"; char path[256]; char pname[256]; - cut_path(full, path, sizeof(path), pname, sizeof(pname)); - assertf(strcmp(path, "/dir/sub/") == 0); - assertf(strcmp(pname, "file.html") == 0); + { + char full[] = "/dir/sub/file.html"; + + cut_path(full, path, sizeof(path), pname, sizeof(pname)); + assertf(strcmp(path, "/dir/sub/") == 0); + assertf(strcmp(pname, "file.html") == 0); + } + { // a trailing slash is trimmed before the split + char full[] = "/dir/sub/"; + + cut_path(full, path, sizeof(path), pname, sizeof(pname)); + assertf(strcmp(path, "/dir/") == 0); + assertf(strcmp(pname, "sub") == 0); + } + { // a path of length <= 1 yields empty results + char full[] = "/"; + + cut_path(full, path, sizeof(path), pname, sizeof(pname)); + assertf(path[0] == '\0' && pname[0] == '\0'); + } + } + // get_httptype_sized(): a long MIME type (Office OOXML reaches 73 chars) is + // written whole into a contenttype-sized buffer; returns 1 on a match, 0 when + // flag==0 and nothing matched. Regression for the old contenttype[64] + // overflow. + { + httrackp *opt = hts_create_opt(); + htsblk r; // write into the real struct field, not a stand-in + + assertf(opt != NULL); + // a long MIME (Office OOXML reaches 73 chars) must fit htsblk.contenttype + // whole: a [64] field would make this bounded copy abort. + assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), + "deck.pptx", 0) == 1); + assertf(strcmp(r.contenttype, + "application/vnd.openxmlformats-officedocument." + "presentationml.presentation") == 0); + assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), + "x.gif", 0) == 1); + assertf(strcmp(r.contenttype, "image/gif") == 0); + // no extension and flag==0: nothing written, returns 0 + assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), + "noextfile", 0) == 0); + assertf(r.contenttype[0] == '\0'); + // no extension and flag==1: octet-stream fallback, returns 1 + assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), + "noextfile", 1) == 1); + assertf(strcmp(r.contenttype, "application/octet-stream") == 0); + // a user --assume rule with an empty value matches but writes nothing: + // get_userhttptype returns 1 with the buffer empty, so get_httptype_sized + // must still report 0 (callers test the return like the old + // strnotempty(s)). + StringCopy(opt->mimedefs, "\ncgi=\n"); + assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), + "/x.cgi", 0) == 0); + assertf(r.contenttype[0] == '\0'); + StringCopy(opt->mimedefs, "\ncgi=text/html\n"); + assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), + "/x.cgi", 0) == 1); + assertf(strcmp(r.contenttype, "text/html") == 0); + hts_free_opt(opt); + } + // adr_normalized_sized(): bounded host normalization (passthrough when + // already normal). + { + char n[HTS_URLMAXSIZE]; + + assertf(strcmp(adr_normalized_sized("example.com", n, sizeof(n)), + "example.com") == 0); } } @@ -2638,15 +2706,12 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) { // initialiser mimedefs //get_userhttptype(opt,1,opt->mimedefs,NULL); // check - mime[0] = '\0'; - get_httptype(opt, mime, argv[na + 1], 0); - if (mime[0] != '\0') { + if (get_httptype_sized(opt, mime, sizeof(mime), argv[na + 1], + 0)) { char ext[256]; printf("%s is '%s'\n", argv[na + 1], mime); - ext[0] = '\0'; - give_mimext(ext, sizeof(ext), mime); - if (ext[0]) { + if (give_mimext(ext, sizeof(ext), mime)) { printf("and its local type is '.%s'\n", ext); } } else { diff --git a/src/htsglobal.h b/src/htsglobal.h index 24683c5..46546fc 100644 --- a/src/htsglobal.h +++ b/src/htsglobal.h @@ -197,10 +197,13 @@ Please visit our Website: http://www.httrack.com #endif -/* Taille max d'une URL */ +/* Max URL length */ #define HTS_URLMAXSIZE 1024 -/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */ +/* Max command-line length (>=HTS_URLMAXSIZE*2) */ #define HTS_CDLMAXSIZE 1024 +/* MIME-type buffer contract (htsblk.contenttype/charset/contentencoding); holds + the longest registered MIME type, the Office OOXML ones reaching 73 chars */ +#define HTS_MIMETYPE_SIZE 128 /* Copyright (C) 1998 Xavier Roche and other contributors */ #define HTTRACK_AFF_AUTHORS "[XR&CO'2014]" @@ -250,6 +253,22 @@ Please visit our Website: http://www.httrack.com #endif #endif +/** + * Mark a function deprecated, with a message pointing at the replacement. + * Placed before the declaration so both the GCC/Clang attribute and the MSVC + * __declspec sit in a position both accept. Degrades to nothing elsewhere. + */ +#if defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#define HTS_DEPRECATED(msg) __attribute__((deprecated(msg))) +#elif defined(__GNUC__) +#define HTS_DEPRECATED(msg) __attribute__((deprecated)) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +#define HTS_DEPRECATED(msg) __declspec(deprecated(msg)) +#else +#define HTS_DEPRECATED(msg) +#endif + #ifndef HTS_LONGLONG #ifdef HTS_NO_64_BIT #define HTS_LONGLONG 0 diff --git a/src/htsjava.c b/src/htsjava.c index 7c1831b..8e7f42b 100644 --- a/src/htsjava.c +++ b/src/htsjava.c @@ -472,9 +472,8 @@ static int tris(httrackp * opt, char *buffer) { { char type[256]; - type[0] = '\0'; - get_httptype(opt, type, buffer, 0); - if (strnotempty(type)) // type reconnu! + if (get_httptype_sized(opt, type, sizeof(type), buffer, + 0)) // recognized type return 1; // ajout RX 05/2001 else if (is_dyntype(get_ext(catbuff, sizeof(catbuff), buffer))) // asp,cgi... diff --git a/src/htslib.c b/src/htslib.c index 2d6c01e..ae454f5 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -754,7 +754,8 @@ T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect, if (soc != INVALID_SOCKET) { retour->statuscode = HTTP_OK; // OK strcpybuff(retour->msg, "OK"); - guess_httptype(opt, retour->contenttype, fil); + guess_httptype_sized(opt, retour->contenttype, + sizeof(retour->contenttype), fil); } else if (strnotempty(retour->msg) == 0) strcpybuff(retour->msg, "Unable to open local file"); return soc; // renvoyer @@ -3466,12 +3467,19 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) { } #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 ); -HTSEXT_API char *adr_normalized(const char *source, char *dest) { +HTSEXT_API char *adr_normalized_sized(const char *source, char *dest, + size_t destsize) { /* not yet too aggressive (no com<->net<->org checkings) */ - strcpybuff(dest, jump_normalized_const(source)); + strlcpybuff(dest, jump_normalized_const(source), destsize); return dest; } +// deprecated variant; kept for ABI compatibility. Bounds to the implicit +// contract the old callers relied on (an HTS_URLMAXSIZE*2 URL buffer). +HTSEXT_API char *adr_normalized(const char *source, char *dest) { + return adr_normalized_sized(source, dest, HTS_URLMAXSIZE * 2); +} + #undef endwith // find port (:80) or NULL if not found @@ -3921,22 +3929,34 @@ void hts_replace(char *s, char from, char to) { } } -// deviner type d'un fichier local.. -// ex: fil="toto.gif" -> s="image/gif" -void guess_httptype(httrackp * opt, char *s, const char *fil) { - get_httptype(opt, s, fil, 1); +// guess a local file's mime type (e.g. fil="toto.gif" -> s="image/gif") +// returns 1 if a type was written to s, 0 otherwise +int guess_httptype_sized(httrackp *opt, char *s, size_t ssize, + const char *fil) { + return get_httptype_sized(opt, s, ssize, fil, 1); } -// idem -// flag: 1 si toujours renvoyer un type -HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag) { - // userdef overrides get_httptype +// deprecated variant; kept for ABI compatibility. Bounds to the implicit +// contract the old callers relied on (a contenttype-sized buffer). +void guess_httptype(httrackp * opt, char *s, const char *fil) { + (void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, 1); +} + +// write the mime type for fil into s (capacity ssize) +// flag: 1 to always return a type (the "application/..." / octet-stream +// fallback) returns 1 if a type was written to s, 0 otherwise +HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize, + const char *fil, int flag) { + // userdef overrides get_httptype (a rule with an empty value, e.g. "--assume + // cgi=", matches but writes nothing: report it as "no type" like the old + // code, whose callers tested strnotempty(s)) if (get_userhttptype(opt, s, fil)) { - return; + return s[0] != '\0'; } // regular tests if (ishtml(opt, fil) == 1) { - strcpybuff(s, "text/html"); + strlcpybuff(s, "text/html", ssize); + return 1; } else { /* Check html -> text/html */ const char *a = fil + strlen(fil) - 1; @@ -3949,21 +3969,33 @@ HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag) a++; while(strnotempty(hts_mime[j][1])) { if (strfield2(hts_mime[j][1], a)) { - if (hts_mime[j][0][0] != '*') { // Une correspondance existe - strcpybuff(s, hts_mime[j][0]); - return; + if (hts_mime[j][0][0] != '*') { // a match exists + strlcpybuff(s, hts_mime[j][0], ssize); + return 1; } } j++; } - if (flag) - sprintf(s, "application/%s", a); + if (flag) { + snprintf(s, ssize, "application/%s", a); + return 1; + } } else { - if (flag) - strcpybuff(s, "application/octet-stream"); + if (flag) { + strlcpybuff(s, "application/octet-stream", ssize); + return 1; + } } } + return 0; +} + +// deprecated variant; kept for ABI compatibility. Bounds to the implicit +// contract the old callers relied on (a contenttype-sized buffer). +HTSEXT_API void get_httptype(httrackp *opt, char *s, const char *fil, + int flag) { + (void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, flag); } // get type of fil (php) @@ -4073,9 +4105,9 @@ int get_userhttptype(httrackp * opt, char *s, const char *fil) { return 0; } -// renvoyer extesion d'un type mime.. -// ex: "image/gif" -> gif -void give_mimext(char *s, size_t ssize, const char *st) { +// give the file extension for a mime type (e.g. "image/gif" -> "gif") +// returns 1 if an extension was found (and written to s), 0 otherwise +int give_mimext(char *s, size_t ssize, const char *st) { int ok = 0; int j = 0; @@ -4110,6 +4142,7 @@ void give_mimext(char *s, size_t ssize, const char *st) { } } } + return ok; } // extension connue?.. @@ -4207,9 +4240,8 @@ int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) { if (strfield2(hts_mime_bogus_multiple[j], mime)) { /* found mime type in suspicious list */ char ext[64]; - ext[0] = '\0'; - give_mimext(ext, sizeof(ext), mime); - if (ext[0] != 0) { /* we have an extension for that */ + if (give_mimext(ext, sizeof(ext), + mime)) { /* we have an extension for that */ const size_t ext_size = strlen(ext); const char *file = strrchr(filename, '/'); /* fetch terminal filename */ diff --git a/src/htslib.h b/src/htslib.h index 5736b75..927497b 100644 --- a/src/htslib.h +++ b/src/htslib.h @@ -252,7 +252,7 @@ int ishtml_ext(const char *a); int ishttperror(int err); int get_userhttptype(httrackp * opt, char *s, const char *fil); -void give_mimext(char *s, size_t ssize, const char *st); +int give_mimext(char *s, size_t ssize, const char *st); int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename); int may_unknown2(httrackp * opt, const char *mime, const char *filename); @@ -500,7 +500,8 @@ HTS_STATIC int is_hypertext_mime(httrackp * opt, const char *mime, char guessed[256]; guessed[0] = '\0'; - guess_httptype(opt, guessed, file); + if (!guess_httptype_sized(opt, guessed, sizeof(guessed), file)) + return 0; return is_hypertext_mime__(guessed); } return 0; @@ -515,7 +516,8 @@ HTS_STATIC int may_be_hypertext_mime(httrackp * opt, const char *mime, char guessed[256]; guessed[0] = '\0'; - guess_httptype(opt, guessed, file); + if (!guess_httptype_sized(opt, guessed, sizeof(guessed), file)) + return 0; return may_be_hypertext_mime__(guessed); } return 0; @@ -530,7 +532,8 @@ HTS_STATIC int compare_mime(httrackp * opt, const char *mime, const char *file, char guessed[256]; guessed[0] = '\0'; - guess_httptype(opt, guessed, file); + if (!guess_httptype_sized(opt, guessed, sizeof(guessed), file)) + return 0; return strfield2(guessed, reference); } return 0; diff --git a/src/htsname.c b/src/htsname.c index 22beba7..339b058 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -200,7 +200,7 @@ int url_savename(lien_adrfilsave *const afs, // foo.com/bar//foobar -> foo.com/bar/foobar if (opt->urlhack) { // copy of adr (without protocol), used for lookups (see urlhack) - normadr = adr_normalized(adr, normadr_); + normadr = adr_normalized_sized(adr, normadr_, sizeof(normadr_)); normfil = fil_normalized(fil_complete, normfil_); } else { if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder @@ -344,8 +344,7 @@ int url_savename(lien_adrfilsave *const afs, mime[0] = ext[0] = '\0'; get_userhttptype(opt, mime, fil); if (strnotempty(mime)) { - give_mimext(ext, sizeof(ext), mime); - if (strnotempty(ext)) { + if (give_mimext(ext, sizeof(ext), mime)) { ext_chg = 1; } } @@ -378,8 +377,8 @@ int url_savename(lien_adrfilsave *const afs, ext_chg = 2; /* change filename */ strcpybuff(ext, r.cdispo); } else if (!may_unknown2(opt, r.contenttype, fil)) { // on peut patcher à priori? - give_mimext(s, sizeof(s), r.contenttype); // get extension - if (strnotempty(s) > 0) { // on a reconnu l'extension + if (give_mimext(s, sizeof(s), + r.contenttype)) { // recognized extension ext_chg = 1; strcpybuff(ext, s); } @@ -403,8 +402,7 @@ int url_savename(lien_adrfilsave *const afs, mime[0] = ext[0] = '\0'; get_userhttptype(opt, mime, fil); if (strnotempty(mime)) { - give_mimext(ext, sizeof(ext), mime); - if (strnotempty(ext)) { + if (give_mimext(ext, sizeof(ext), mime)) { ext_chg = 1; } } @@ -420,10 +418,9 @@ int url_savename(lien_adrfilsave *const afs, strcpybuff(ext, headers->r.cdispo); } else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type) char s[16]; - s[0] = '\0'; - give_mimext(s, sizeof(s), - headers->r.contenttype); // get extension - if (strnotempty(s) > 0) { // on a reconnu l'extension + if (give_mimext( + s, sizeof(s), + headers->r.contenttype)) { // recognized extension ext_chg = 1; strcpybuff(ext, s); } @@ -438,7 +435,8 @@ int url_savename(lien_adrfilsave *const afs, char mime_from_file[128]; mime_from_file[0] = 0; - get_httptype(opt, mime_from_file, fil, 1); + get_httptype_sized(opt, mime_from_file, sizeof(mime_from_file), + fil, 1); if (!strnotempty(mime_from_file) || strcasecmp(mime_type, mime_from_file) != 0) { /* different mime for this type */ /* type change not forbidden (or no extension at all) */ if (!may_unknown2(opt, mime_type, fil)) { @@ -647,9 +645,9 @@ int url_savename(lien_adrfilsave *const afs, ext_chg = 2; /* change filename */ strcpybuff(ext, back[b].r.cdispo); } else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type) - give_mimext(s, sizeof(s), - back[b].r.contenttype); // get extension - if (strnotempty(s) > 0) { // on a reconnu l'extension + if (give_mimext( + s, sizeof(s), + back[b].r.contenttype)) { // recognized extension ext_chg = 1; strcpybuff(ext, s); } diff --git a/src/htsopt.h b/src/htsopt.h index ad0a340..ed4438e 100644 --- a/src/htsopt.h +++ b/src/htsopt.h @@ -499,9 +499,9 @@ struct htsblk { FILE *out; // écriture directe sur disque (si is_write=1) LLint size; // taille fichier char msg[80]; // message éventuel si échec ("\0"=non précisé) - char contenttype[64]; // content-type ("text/html" par exemple) - char charset[64]; // charset ("iso-8859-1" par exemple) - char contentencoding[64]; // content-encoding ("gzip" par exemple) + char contenttype[HTS_MIMETYPE_SIZE]; // content-type (e.g. "text/html") + char charset[HTS_MIMETYPE_SIZE]; // charset (e.g. "iso-8859-1") + char contentencoding[HTS_MIMETYPE_SIZE]; // content-encoding (e.g. "gzip") char *location; // on copie dedans éventuellement la véritable 'location' LLint totalsize; // taille totale à télécharger (-1=inconnue) short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1 diff --git a/src/htsparse.c b/src/htsparse.c index 41f6adb..fbb879a 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -1649,8 +1649,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // Prendre si extension reconnue if (!url_ok) { - get_httptype(opt, type, tempo, 0); - if (strnotempty(type)) // type reconnu! + if (get_httptype_sized(opt, type, + sizeof(type), tempo, + 0)) // recognized type url_ok = 1; else if (is_dyntype(get_ext(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), tempo))) // reconnu php,cgi,asp.. url_ok = 1; @@ -3507,9 +3508,9 @@ int hts_mirror_check_moved(htsmoduleStruct * str, char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2]; n_adr[0] = n_fil[0] = '\0'; - (void) adr_normalized(moved->adr, n_adr); + (void) adr_normalized_sized(moved->adr, n_adr, sizeof(n_adr)); (void) fil_normalized(moved->fil, n_fil); - (void) adr_normalized(urladr(), pn_adr); + (void) adr_normalized_sized(urladr(), pn_adr, sizeof(pn_adr)); (void) fil_normalized(urlfil(), pn_fil); if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) { diff --git a/src/httrack-library.h b/src/httrack-library.h index b8f2050..e1b7ef1 100644 --- a/src/httrack-library.h +++ b/src/httrack-library.h @@ -207,6 +207,9 @@ HTSEXT_API const char *jump_normalized_const(const char *); HTSEXT_API char *jump_toport(char *); HTSEXT_API const char *jump_toport_const(const char *); HTSEXT_API char *fil_normalized(const char *source, char *dest); +HTSEXT_API char *adr_normalized_sized(const char *source, char *dest, + size_t destsize); +HTS_DEPRECATED("use adr_normalized_sized(source, dest, destsize)") HTSEXT_API char *adr_normalized(const char *source, char *dest); HTSEXT_API const char *hts_rootdir(char *file); @@ -244,6 +247,9 @@ HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size, co HTSEXT_API char *antislash_unescaped(char *catbuff, const char *s); HTSEXT_API void escape_remove_control(char *s); +HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize, + const char *fil, int flag); +HTS_DEPRECATED("use get_httptype_sized(opt, s, ssize, fil, flag)") HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag); HTSEXT_API int is_knowntype(httrackp * opt, const char *fil); @@ -251,6 +257,9 @@ HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil); HTSEXT_API int is_dyntype(const char *fil); HTSEXT_API const char *get_ext(char *catbuff, size_t size, const char *fil); HTSEXT_API int may_unknown(httrackp * opt, const char *st); +HTSEXT_API int guess_httptype_sized(httrackp *opt, char *s, size_t ssize, + const char *fil); +HTS_DEPRECATED("use guess_httptype_sized(opt, s, ssize, fil)") HTSEXT_API void guess_httptype(httrackp * opt, char *s, const char *fil); /* Ugly string tools */