Merge pull request #365 from xroche/cleanup/htslib-bounds

Bound htslib.c pointer-destination buffer writes (batch 9)
2026-06-16 07:13:45 +03:00 · 2026-06-16 03:54:38 +02:00 · 2026-06-16 03:48:52 +02:00 · 2026-06-15 17:04:09 +02:00 · 2026-06-15 14:46:04 +02:00 · 2026-06-15 10:28:29 +02:00
12 changed files with 184 additions and 83 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -171,8 +171,16 @@ jobs:
        # Leaks at exit are out of scope (the CLI frees little on the way out);
        # we want memory-safety errors, so turn leak detection off and make every
        # other finding abort the run.
+        #
+        # Poison fresh allocations with 0xCA and freed blocks with 0xCB (decimal
+        # 202/203) so memory never reads back as accidental zeros: a missing-NUL
+        # fread buffer then runs strlen off into the redzone instead of stopping
+        # at a lucky zero. Distinct bytes tell the two apart in a dump (0xCA =
+        # uninitialized, 0xCB = use-after-free). ASan caps its malloc fill at 4096
+        # bytes by default, so max_malloc_fill_size lifts it to cover large cache
+        # buffers; free_fill flags use-after-free reads.
        env:
-          ASAN_OPTIONS: detect_leaks=0:abort_on_error=1:halt_on_error=1:strict_string_checks=1
+          ASAN_OPTIONS: detect_leaks=0:abort_on_error=1:halt_on_error=1:strict_string_checks=1:malloc_fill_byte=202:max_malloc_fill_size=2147483647:free_fill_byte=203:max_free_fill_size=2147483647
          UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1
        run: make check

--- a/src/coucal
+++ b/src/coucal
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -939,7 +939,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
                  FILE *const fp = FOPEN(fconv(catbuff, sizeof(catbuff), previous_save), "rb");

                  if (fp != NULL) {
-                    r.adr = (char *) malloct((int) r.size + 4);
+                    r.adr = (char *) malloct((int) r.size + 1);
                    if (r.adr != NULL) {
                      if (r.size > 0
                          && fread(r.adr, 1, (int) r.size, fp) != r.size) {
@@ -966,7 +966,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
              // Data in cache.
              else {
                // lire fichier (d'un coup)
-                r.adr = (char *) malloct((int) r.size + 4);
+                r.adr = (char *) malloct((int) r.size + 1);
                if (r.adr != NULL) {
                  if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (int) r.size) != r.size) {   // erreur
                    freet(r.adr);
@@ -1246,7 +1246,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
                    FILE *fp = FOPEN(fconv(catbuff, sizeof(catbuff), return_save), "rb");

                    if (fp != NULL) {
-                      r.adr = (char *) malloct((size_t) r.size + 4);
+                      r.adr = (char *) malloct((size_t) r.size + 1);
                      if (r.adr != NULL) {
                        if (r.size > 0
                            && fread(r.adr, 1, (size_t) r.size, fp) != r.size) {
@@ -1268,7 +1268,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
                }
              } else {
                // lire fichier (d'un coup)
-                r.adr = (char *) malloct((size_t) r.size + 4);
+                r.adr = (char *) malloct((size_t) r.size + 1);
                if (r.adr != NULL) {
                  if (fread(r.adr, 1, (size_t) r.size, cache->olddat) != r.size) {      // erreur
                    freet(r.adr);
@@ -1371,7 +1371,7 @@ int cache_readdata(cache_back * cache, const char *str1, const char *str2,

        cache_rint(cache->olddat, &len);
        if (len > 0) {
-          char *mem_buff = (char *) malloct(len + 4);   /* Plus byte 0 */
+          char *mem_buff = (char *) malloct(len + 1); /* trailing \0 */

          if (mem_buff) {
            if (fread(mem_buff, 1, len, cache->olddat) == len) {        // lire tout (y compris statuscode etc)*/
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -633,13 +633,12 @@ int httpmirror(char *url1, httrackp * opt) {
    // c'est plus propre et plus logique que d'entrer à la main les liens dans la pile
    // on bénéficie ainsi des vérifications et des tests du robot pour les liens "primaires"
    primary = (char *) malloct(primary_len);
-    if (primary) {
-      primary[0] = '\0';
-    } else {
+    if (!primary) {
      printf("PANIC! : Not enough memory [%d]\n", __LINE__);
      XH_extuninit;
      return 0;
    }
+    htsbuff primarybuff = htsbuff_ptr(primary, primary_len);

    while(*a) {
      int i;
@@ -687,11 +686,11 @@ int httpmirror(char *url1, httrackp * opt) {
              strcatbuff(tempo, "*");   // ajouter un *
            }
          }
-          if (type)
-            strcpybuff(filters[filptr], "+");
-          else
-            strcpybuff(filters[filptr], "-");
-          strcatbuff(filters[filptr], tempo);
+          {
+            htsbuff fb = htsbuff_ptr(filters[filptr], HTS_URLMAXSIZE * 2);
+            htsbuff_cpy(&fb, type ? "+" : "-");
+            htsbuff_cat(&fb, tempo);
+          }
          filptr++;

          /* sanity check */
@@ -726,12 +725,10 @@ int httpmirror(char *url1, httrackp * opt) {
        }
        url[i++] = '\0';

-        //strcatbuff(primary,"<PRIMARY=\"");
        if (strstr(url, ":/") == NULL)
-          strcatbuff(primary, "http://");
-        strcatbuff(primary, url);
-        //strcatbuff(primary,"\">");
-        strcatbuff(primary, "\n");
+          htsbuff_cat(&primarybuff, "http://");
+        htsbuff_cat(&primarybuff, url);
+        htsbuff_cat(&primarybuff, "\n");
      }
    }                           // while

@@ -762,7 +759,6 @@ int httpmirror(char *url1, httrackp * opt) {
        int filelist_ptr = 0;
        int n = 0;
        char BIGSTK line[HTS_URLMAXSIZE * 2];
-        char *primary_ptr = primary + strlen(primary);

        while(filelist_ptr < filelist_sz) {
          int count =
@@ -771,13 +767,10 @@ int httpmirror(char *url1, httrackp * opt) {
          if (count && line[0]) {
            n++;
            if (strstr(line, ":/") == NULL) {
-              strcpybuff(primary_ptr, "http://");
-              primary_ptr += strlen(primary_ptr);
+              htsbuff_cat(&primarybuff, "http://");
            }
-            strcpybuff(primary_ptr, line);
-            primary_ptr += strlen(primary_ptr);
-            strcpybuff(primary_ptr, "\n");
-            primary_ptr += 1;
+            htsbuff_cat(&primarybuff, line);
+            htsbuff_cat(&primarybuff, "\n");
          }
        }
        // fclose(fp);
@@ -2453,9 +2446,10 @@ void host_ban(httrackp * opt, int ptr,
  // interdire host
  assertf((*_FILTERS_PTR) < opt->maxfilter);
  if (*_FILTERS_PTR < opt->maxfilter) {
-    strcpybuff(_FILTERS[*_FILTERS_PTR], "-");
-    strcatbuff(_FILTERS[*_FILTERS_PTR], host);
-    strcatbuff(_FILTERS[*_FILTERS_PTR], "/*");  // host/ * interdit
+    htsbuff fb = htsbuff_ptr(_FILTERS[*_FILTERS_PTR], HTS_URLMAXSIZE * 2);
+    htsbuff_cpy(&fb, "-");
+    htsbuff_cat(&fb, host);
+    htsbuff_cat(&fb, "/*"); // forbid host/*
    (*_FILTERS_PTR)++;
  }
  // oups
@@ -3518,7 +3512,7 @@ char *next_token(char *p, int flag) {
  p--;
  do {
    p++;
-    if (flag && (*p == '\\')) { // sauter \x ou \"
+    if (flag && (*p == '\\')) { // skip \x or \"
      if (quote) {
        char c = '\0';

@@ -3527,20 +3521,14 @@ char *next_token(char *p, int flag) {
        else if (*(p + 1) == '"')
          c = '"';
        if (c) {
-          char BIGSTK tempo[8192];
-
-          tempo[0] = c;
-          tempo[1] = '\0';
-          strcatbuff(tempo, p + 2);
-          strcpybuff(p, tempo);
+          /* unescape the 2 chars to one, shifting left in place */
+          *p = c;
+          memmove(p + 1, p + 2, strlen(p + 2) + 1);
        }
      }
-    } else if (*p == 34) {      // guillemets (de fin)
-      char BIGSTK tempo[8192];
-
-      tempo[0] = '\0';
-      strcatbuff(tempo, p + 1);
-      strcpybuff(p, tempo);     /* wipe "" */
+    } else if (*p == 34) { // closing quote
+      /* drop the quote, shifting the rest left in place */
+      memmove(p, p + 1, strlen(p + 1) + 1);
      p--;
      /* */
      quote = !quote;
@@ -3880,7 +3868,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
                                afs.af.adr, afs.save, savename(), tempo);
                  if (str->localLink
                      && str->localLinkSize > (int) strlen(tempo) + 1) {
-                    strcpybuff(str->localLink, tempo);
+                    strlcpybuff(str->localLink, tempo, str->localLinkSize);
                  }
                }
              }
@@ -3892,11 +3880,11 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
                          lien);
            if (str->localLink
                && str->localLinkSize > (int) (strlen(afs.af.adr) + strlen(afs.af.fil) + 8)) {
-              str->localLink[0] = '\0';
+              htsbuff lb = htsbuff_ptr(str->localLink, str->localLinkSize);
              if (!link_has_authority(afs.af.adr))
-                strcpybuff(str->localLink, "http://");
-              strcatbuff(str->localLink, afs.af.adr);
-              strcatbuff(str->localLink, afs.af.fil);
+                htsbuff_cat(&lb, "http://");
+              htsbuff_cat(&lb, afs.af.adr);
+              htsbuff_cat(&lb, afs.af.fil);
            }
            r = -1;
          }
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -236,6 +236,95 @@ static void basic_selftests(void) {
    }
    freet(slots);
  }
+  // next_token(): in-place token scanner. Strips surrounding quotes, unescapes
+  // \" and \\ when flag is set, and returns the token terminator (the space, or
+  // NULL at end of string). The unquote/unescape rewrites the string in place
+  // by shifting left, so the result is always shorter -- regression for that
+  // compaction.
+  {
+    char tok[64];
+
+    // plain token: unchanged, returns a pointer AT the separating space (exact
+    // position, not just any space -- a strchr-style impl would land elsewhere
+    // once quotes shift the content)
+    strcpybuff(tok, "abc def");
+    {
+      char *const end = next_token(tok, 0);
+      assertf(end == tok + 3 && *end == ' ' && strcmp(tok, "abc def") == 0);
+    }
+    // surrounding quotes stripped, returns the (post-shift) trailing space
+    strcpybuff(tok, "\"ab\" cd");
+    {
+      char *const end = next_token(tok, 1);
+      assertf(end == tok + 2 && *end == ' ' && strcmp(tok, "ab cd") == 0);
+    }
+    // a space inside quotes does not end the token; end of string returns NULL
+    strcpybuff(tok, "\"a b\"c");
+    {
+      char *const end = next_token(tok, 1);
+      assertf(end == NULL && strcmp(tok, "a bc") == 0);
+    }
+    // \" and \\ are unescaped to literal " and \ in place
+    strcpybuff(tok, "\"a\\\"b\\\\c\"");
+    {
+      char *const end = next_token(tok, 1);
+      assertf(end == NULL && strcmp(tok, "a\"b\\c") == 0);
+    }
+    // unterminated quote: the opening quote is dropped, the rest survives, and
+    // the scan runs to the NUL (returns NULL)
+    strcpybuff(tok, "\"ab");
+    {
+      char *const end = next_token(tok, 1);
+      assertf(end == NULL && strcmp(tok, "ab") == 0);
+    }
+    // trailing lone backslash in a quote: *(p+1) is the NUL, not an escape, so
+    // the backslash is kept intact (and there is no over-read past the NUL)
+    strcpybuff(tok, "\"a\\");
+    {
+      char *const end = next_token(tok, 1);
+      assertf(end == NULL && strcmp(tok, "a\\") == 0);
+    }
+  }
+  // fil_normalized(): canonicalizes a URL path. Query arguments are sorted
+  // alphabetically (by the text after each '?'/'&') and the query is rebuilt
+  // through a bounded builder; outside the query, "//" collapses to "/".
+  // Regression for that builder.
+  {
+    char norm[256];
+
+    assertf(strcmp(fil_normalized("/p?b=2&a=1&c=3", norm), "/p?a=1&b=2&c=3") ==
+            0);
+    assertf(strcmp(fil_normalized("/a//b", norm), "/a/b") == 0);
+  }
+  // give_mimext(): mime type -> file extension, bounded into the caller buffer.
+  {
+    char ext[16];
+
+    give_mimext(ext, sizeof(ext), "image/gif");
+    assertf(strcmp(ext, "gif") == 0);
+    give_mimext(ext, sizeof(ext), "text/html");
+    assertf(strcmp(ext, "html") == 0);
+    give_mimext(ext, sizeof(ext), "no/such-mime-type");
+    assertf(ext[0] == '\0');
+  }
+  // convtolower(): lower-cases into the caller buffer (bounded by its size).
+  {
+    char low[64];
+
+    assertf(strcmp(convtolower(low, sizeof(low), "ABC/Def.HTML"),
+                   "abc/def.html") == 0);
+  }
+  // cut_path(): splits a path into directory (with trailing '/') and basename,
+  // each bounded by its buffer size.
+  {
+    char full[] = "/dir/sub/file.html";
+    char path[256];
+    char pname[256];
+
+    cut_path(full, path, sizeof(path), pname, sizeof(pname));
+    assertf(strcmp(path, "/dir/sub/") == 0);
+    assertf(strcmp(pname, "file.html") == 0);
+  }
 }

 /* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
@@ -2556,7 +2645,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {

                    printf("%s is '%s'\n", argv[na + 1], mime);
                    ext[0] = '\0';
-                    give_mimext(ext, mime);
+                    give_mimext(ext, sizeof(ext), mime);
                    if (ext[0]) {
                      printf("and its local type is '.%s'\n", ext);
                    }
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -76,7 +76,7 @@ static coucal_key key_duphandler(void *arg, coucal_key_const name) {
 /* Key sav hashes are using case-insensitive version */
 static coucal_hashkeys key_sav_hashes(void *arg, coucal_key_const key) {
  hash_struct *const hash = (hash_struct*) arg;
-  convtolower(hash->catbuff, (const char*) key);
+  convtolower(hash->catbuff, sizeof(hash->catbuff), (const char *) key);
  return coucal_hash_string(hash->catbuff);
 }

--- a/src/htsindex.c
+++ b/src/htsindex.c
@@ -334,7 +334,7 @@ void index_finish(const char *indexpath, int mode) {
    if (fp_tmpproject) {
      tab = (char **) malloct(sizeof(char *) * (hts_primindex_size + 2));
      if (tab) {
-        blk = malloct(size + 4);
+        blk = malloct(size + 1);
        if (blk) {
          fseek(fp_tmpproject, 0, SEEK_SET);
          if ((INTsys) fread(blk, 1, size, fp_tmpproject) == size) {
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -1530,8 +1530,9 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
      if (retour->location) {
        while(is_realspace(*(rcvd + p)))
          p++;                  // sauter espaces
-        if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE)    // pas trop long?
-          strcpybuff(retour->location, rcvd + p);
+        if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // not too long?
+          /* location aliases location_buffer[HTS_URLMAXSIZE * 2] */
+          strlcpybuff(retour->location, rcvd + p, HTS_URLMAXSIZE * 2);
        else                    // erreur.. ignorer
          retour->location[0] = '\0';
      }
@@ -3444,16 +3445,17 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
    /* Replace query by sorted query */
    copyBuff = malloct(qLen + 1);
    assertf(copyBuff != NULL);
-    copyBuff[0] = '\0';
-    for(i = 0; i < ampargs; i++) {
-      if (i == 0)
-        strcatbuff(copyBuff, "?");
-      else
-        strcatbuff(copyBuff, "&");
-      strcatbuff(copyBuff, amps[i] + 1);
+    {
+      htsbuff cb = htsbuff_ptr(copyBuff, qLen + 1);
+
+      for (i = 0; i < ampargs; i++) {
+        htsbuff_cat(&cb, i == 0 ? "?" : "&");
+        htsbuff_cat(&cb, amps[i] + 1);
+      }
+      assertf(cb.len == qLen);
    }
-    assertf(strlen(copyBuff) == qLen);
-    strcpybuff(query, copyBuff);
+    /* query points into dest where the original qLen-byte query was */
+    strlcpybuff(query, copyBuff, qLen + 1);

    /* Cleanup */
    freet(amps);
@@ -3894,9 +3896,9 @@ HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const de

 #undef ADD_CHAR

-// conversion minuscules, avec buffer
-char *convtolower(char *catbuff, const char *a) {
-  strcpybuff(catbuff, a);
+// lower-case conversion into caller buffer (capacity catbuffsize)
+char *convtolower(char *catbuff, size_t catbuffsize, const char *a) {
+  strlcpybuff(catbuff, a, catbuffsize);
  hts_lowcase(catbuff);         // lower case
  return catbuff;
 }
@@ -4073,15 +4075,15 @@ int get_userhttptype(httrackp * opt, char *s, const char *fil) {

 // renvoyer extesion d'un type mime..
 // ex: "image/gif" -> gif
-void give_mimext(char *s, const char *st) {
+void give_mimext(char *s, size_t ssize, const char *st) {
  int ok = 0;
  int j = 0;

  s[0] = '\0';
  while((!ok) && (strnotempty(hts_mime[j][1]))) {
    if (strfield2(hts_mime[j][0], st)) {
-      if (hts_mime[j][1][0] != '*') {   // Une correspondance existe
-        strcpybuff(s, hts_mime[j][1]);
+      if (hts_mime[j][1][0] != '*') { // a match exists
+        strlcpybuff(s, hts_mime[j][1], ssize);
        ok = 1;
      }
    }
@@ -4102,7 +4104,7 @@ void give_mimext(char *s, const char *st) {
    if (a) {
      if ((int) strlen(a) >= 1) {
        if ((int) strlen(a) <= 4) {
-          strcpybuff(s, a);
+          strlcpybuff(s, a, ssize);
          ok = 1;
        }
      }
@@ -4206,7 +4208,7 @@ int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) {
      char ext[64];

      ext[0] = '\0';
-      give_mimext(ext, mime);
+      give_mimext(ext, sizeof(ext), mime);
      if (ext[0] != 0) {        /* we have an extension for that */
        const size_t ext_size = strlen(ext);
        const char *file = strrchr(filename, '/');      /* fetch terminal filename */
@@ -4930,7 +4932,8 @@ void hts_freeall(void) {

 // cut path and project name
 // patch also initial path
-void cut_path(char *fullpath, char *path, char *pname) {
+void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
+              size_t pname_size) {
  path[0] = pname[0] = '\0';
  if (strnotempty(fullpath)) {
    if ((fullpath[strlen(fullpath) - 1] == '/')
@@ -4946,8 +4949,8 @@ void cut_path(char *fullpath, char *path, char *pname) {
        a--;
      if (*a == '/')
        a++;
-      strcpybuff(pname, a);
-      strncatbuff(path, fullpath, (int) (a - fullpath));
+      strlcpybuff(pname, a, pname_size);
+      strlncatbuff(path, fullpath, path_size, (size_t) (a - fullpath));
    }
  }
 }
--- a/src/htslib.h
+++ b/src/htslib.h
@@ -252,7 +252,7 @@ int ishtml_ext(const char *a);
 int ishttperror(int err);

 int get_userhttptype(httrackp * opt, char *s, const char *fil);
-void give_mimext(char *s, const char *st);
+void give_mimext(char *s, size_t ssize, const char *st);

 int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename);
 int may_unknown2(httrackp * opt, const char *mime, const char *filename);
@@ -264,7 +264,7 @@ void code64(unsigned char *a, int size_a, unsigned char *b, int crlf);

 #define copychar(catbuff,a) concat(catbuff,(a),NULL)

-char *convtolower(char *catbuff, const char *a);
+char *convtolower(char *catbuff, size_t catbuffsize, const char *a);
 void hts_lowcase(char *s);
 void hts_replace(char *s, char from, char to);
 int multipleStringMatch(const char *s, const char *match);
@@ -276,7 +276,8 @@ void fprintfio(FILE * fp, const char *buff, const char *prefix);
 int sig_ignore_flag(int setflag);  // flag ignore
 #endif

-void cut_path(char *fullpath, char *path, char *pname);
+void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
+              size_t pname_size);
 int fexist(const char *s);
 int fexist_utf8(const char *s);

--- a/src/htsname.c
+++ b/src/htsname.c
@@ -344,7 +344,7 @@ int url_savename(lien_adrfilsave *const afs,
        mime[0] = ext[0] = '\0';
        get_userhttptype(opt, mime, fil);
        if (strnotempty(mime)) {
-          give_mimext(ext, mime);
+          give_mimext(ext, sizeof(ext), mime);
          if (strnotempty(ext)) {
            ext_chg = 1;
          }
@@ -378,7 +378,7 @@ int url_savename(lien_adrfilsave *const afs,
                ext_chg = 2;      /* change filename */
                strcpybuff(ext, r.cdispo);
              } else if (!may_unknown2(opt, r.contenttype, fil)) {        // on peut patcher à priori?
-                give_mimext(s, r.contenttype);    // obtenir extension
+                give_mimext(s, sizeof(s), r.contenttype); // get extension
                if (strnotempty(s) > 0) { // on a reconnu l'extension
                  ext_chg = 1;
                  strcpybuff(ext, s);
@@ -403,7 +403,7 @@ int url_savename(lien_adrfilsave *const afs,
            mime[0] = ext[0] = '\0';
            get_userhttptype(opt, mime, fil);
            if (strnotempty(mime)) {
-              give_mimext(ext, mime);
+              give_mimext(ext, sizeof(ext), mime);
              if (strnotempty(ext)) {
                ext_chg = 1;
              }
@@ -421,7 +421,8 @@ int url_savename(lien_adrfilsave *const afs,
              } else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) {    // on peut patcher à priori? (pas interdit ou pas de type)
                char s[16];
                s[0] = '\0';
-                give_mimext(s, headers->r.contenttype);    // obtenir extension
+                give_mimext(s, sizeof(s),
+                            headers->r.contenttype); // get extension
                if (strnotempty(s) > 0) { // on a reconnu l'extension
                  ext_chg = 1;
                  strcpybuff(ext, s);
@@ -431,7 +432,7 @@ int url_savename(lien_adrfilsave *const afs,
            else if (mime_type != NULL) {
              ext[0] = '\0';
              if (*mime_type) {
-                give_mimext(ext, mime_type);
+                give_mimext(ext, sizeof(ext), mime_type);
              }
              if (strnotempty(ext)) {
                char mime_from_file[128];
@@ -646,7 +647,8 @@ int url_savename(lien_adrfilsave *const afs,
                      ext_chg = 2;      /* change filename */
                      strcpybuff(ext, back[b].r.cdispo);
                    } else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) {    // on peut patcher à priori? (pas interdit ou pas de type)
-                      give_mimext(s, back[b].r.contenttype);    // obtenir extension
+                      give_mimext(s, sizeof(s),
+                                  back[b].r.contenttype); // get extension
                      if (strnotempty(s) > 0) { // on a reconnu l'extension
                        ext_chg = 1;
                        strcpybuff(ext, s);
--- a/src/htssafe.h
+++ b/src/htssafe.h
@@ -237,6 +237,15 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
  HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
  "overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__)

+/**
+ * Append at most "N" characters of "B" to "A", "A" having a maximum capacity
+ * of "S".
+ */
+#define strlncatbuff(A, B, S, N)                                               \
+  strncat_safe_(A, S, B, HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B),  \
+                N, "overflow while appending '" #B "' to '" #A "'", __FILE__,  \
+                __LINE__)
+
 /**
 * Copy characters of "B" to "A", "A" having a maximum capacity of "S".
 */
--- a/src/proxy/store.c
+++ b/src/proxy/store.c
@@ -1162,7 +1162,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
                    FILE *fp = fopen(file_convert(catbuff, sizeof(catbuff), previous_save), "rb");

                    if (fp != NULL) {
-                      r->adr = (char *) malloc(r->size + 4);
+                      r->adr = (char *) malloc(r->size + 1);
                      if (r->adr != NULL) {
                        if (r->size > 0
                            && fread(r->adr, 1, r->size, fp) != r->size) {
@@ -1172,6 +1172,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
                          sprintf(r->msg, "Read error in cache disk data: %s",
                                  strerror(last_errno));
                        }
+                        r->adr[r->size] = '\0';
                      } else {
                        r->statuscode = STATUSCODE_INVALID;
                        strcpy(r->msg,
Author	SHA1	Message	Date
Xavier Roche	36a9f5a827	Merge pull request #365 from xroche/cleanup/htslib-bounds Bound htslib.c pointer-destination buffer writes (batch 9)	2026-06-16 03:54:38 +02:00
Xavier Roche	20880c1a4d	Bound htslib.c pointer-destination buffer writes (batch 9) Continues the htssafe.h pointer-destination migration (X1), where the strcpybuff/strcatbuff macros silently fall back to a raw strcpy/strcat when the destination is a bare char* rather than a sized array. In htslib.c: * fil_normalized() rebuilds the sorted query through an htsbuff bounded builder over the malloc'd copyBuff, then copies it back with strlcpybuff (capacity is the known qLen + 1). * treathead() bounds the Location: copy with strlcpybuff against the location_buffer[HTS_URLMAXSIZE2] contract. give_mimext(), convtolower() and cut_path() are internal (hidden, not HTSEXT_API), so they take an explicit destination size and the callers pass it: give_mimext in htsname.c/htscoremain.c/htslib.c, convtolower in htshash.c. cut_path has no callers. Add strlncatbuff(dst, src, size, n) to htssafe.h: a bounded n-limited append with explicit capacity, the missing parallel to strlcatbuff. Cover fil_normalized query-sort, give_mimext, convtolower and cut_path with the -#7 basic_selftests. get_httptype() and adr_normalized() are left for a follow-up: both are exported (HTSEXT_API), and get_httptype() exposes a real latent overflow (a .docx/.pptx/.xlsx URL writes a 65-73 char mime type into 64-byte contenttype callers) whose fix is a public-ABI decision. htslib.c pointer-destination warnings: 14 -> 4. Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-16 03:48:52 +02:00
Xavier Roche	a6fc0e9dab	Merge pull request #361 from xroche/chore/bump-coucal-shift-ub Bump src/coucal to fadf29b (MurmurHash3 signed-shift UB fix)	2026-06-15 17:04:09 +02:00
Xavier Roche	f227135d16	Bump src/coucal to fadf29b (MurmurHash3 signed-shift UB fix) Picks up coucal PR #6: the MurmurHash3 tail mixing shifted a byte promoted to int left by 24, overflowing signed int once the byte had its high bit set (UBSan). A sanitized live crawl hashing arbitrary URL keys aborted on it. Verified: the ASan+UBSan www.edf.fr crawl that previously aborted at murmurhash3.h:123 now completes clean (100 pages, no findings). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-15 14:46:04 +02:00
Xavier Roche	223564eaca	Merge pull request #360 from xroche/cleanup/htscore-bounds Bound htscore.c pointer-destination buffer writes (batch 8)	2026-06-15 10:28:29 +02:00
Xavier Roche	7db49a64b6	Bound htscore.c pointer-destination buffer writes (batch 8) Convert htscore.c's 18 pointer-destination strcpybuff/strcatbuff sites (which silently degrade to unchecked strcpy/strcat per the htssafe.h diagnostic) to bounded forms: - httpmirror(): one htsbuff over the malloc'd primary buffer drives the whole link accumulation, replacing the manual "primary_ptr += strlen" cursor in the filelist loop; the +/- filter slots build through htsbuff over their known HTS_URLMAXSIZE2 capacity. - host_ban(): the "-host/" filter slot builds through htsbuff. - htsAddLink(): str->localLink builds through htsbuff / strlcpybuff bounded by str->localLinkSize. - next_token(): the in-place unquote/unescape copied the (always shorter) result back through an 8KB temp buffer, which both relied on an unchecked pointer copy and aborted on tokens over 8KB. Replace with memmove left-shift compaction: no capacity guess, no size cap. Add a next_token() regression test to basic_selftests (httrack -#7) covering plain tokens, quote stripping, and \" / \\ unescaping; teeth verified. htscore.c pointer-destination sites 18 -> 0. Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-15 10:16:06 +02:00
Xavier Roche	f1c04c10eb	Merge pull request #359 from xroche/fix/malloc-size-plus4 Allocate exactly one extra byte for cache-buffer NUL terminators	2026-06-15 09:33:26 +02:00
Xavier Roche	17fc54869d	Allocate exactly one extra byte for cache-buffer NUL terminators These fread buffers were over-allocated as size+4, a superstitious margin that never bought anything: every site writes a single trailing NUL at [size], so size+1 is exactly right. Trim them all to size+1. The proxytrack disk-fallback read in PT_ReadCache__New_u never wrote that NUL at all, unlike its sibling read paths in the same file; add the missing r->adr[r->size] = '\0' so the spare byte is actually used and the buffer is a valid C string. Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-15 09:30:34 +02:00
Xavier Roche	d2e43549d8	Merge pull request #358 from xroche/ci/asan-poison-fill ci: poison the ASan allocator to surface missing-NUL bugs	2026-06-15 09:19:04 +02:00
Xavier Roche	a9b16d96ea	ci: poison the ASan allocator to surface missing-NUL bugs Fill malloc'd and freed memory with 0xCA in the sanitize job so a buffer fread into without NUL termination, then used as a C string, runs off into the redzone instead of stopping at an accidental zero byte. ASan caps its malloc fill at the first 4096 bytes by default, which lets large cache buffers escape; max_malloc_fill_size lifts the cap. No rebuild, no source change -- purely the test environment. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-15 09:16:48 +02:00
Xavier Roche	4ed828ff78	Merge pull request #357 from xroche/audit/fread-nul-termination Fix more un-NUL-terminated fread buffers used as C strings	2026-06-15 09:07:37 +02:00