Compare commits

...

7 Commits

Author SHA1 Message Date
Xavier Roche
223564eaca Merge pull request #360 from xroche/cleanup/htscore-bounds
Bound htscore.c pointer-destination buffer writes (batch 8)
2026-06-15 10:28:29 +02:00
Xavier Roche
7db49a64b6 Bound htscore.c pointer-destination buffer writes (batch 8)
Convert htscore.c's 18 pointer-destination strcpybuff/strcatbuff sites (which
silently degrade to unchecked strcpy/strcat per the htssafe.h diagnostic) to
bounded forms:

- httpmirror(): one htsbuff over the malloc'd primary buffer drives the whole
  link accumulation, replacing the manual "primary_ptr += strlen" cursor in the
  filelist loop; the +/- filter slots build through htsbuff over their known
  HTS_URLMAXSIZE*2 capacity.
- host_ban(): the "-host/*" filter slot builds through htsbuff.
- htsAddLink(): str->localLink builds through htsbuff / strlcpybuff bounded by
  str->localLinkSize.
- next_token(): the in-place unquote/unescape copied the (always shorter) result
  back through an 8KB temp buffer, which both relied on an unchecked pointer copy
  and aborted on tokens over 8KB. Replace with memmove left-shift compaction: no
  capacity guess, no size cap.

Add a next_token() regression test to basic_selftests (httrack -#7) covering
plain tokens, quote stripping, and \" / \\ unescaping; teeth verified.

htscore.c pointer-destination sites 18 -> 0.

Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-15 10:16:06 +02:00
Xavier Roche
f1c04c10eb Merge pull request #359 from xroche/fix/malloc-size-plus4
Allocate exactly one extra byte for cache-buffer NUL terminators
2026-06-15 09:33:26 +02:00
Xavier Roche
17fc54869d Allocate exactly one extra byte for cache-buffer NUL terminators
These fread buffers were over-allocated as size+4, a superstitious margin
that never bought anything: every site writes a single trailing NUL at
[size], so size+1 is exactly right. Trim them all to size+1.

The proxytrack disk-fallback read in PT_ReadCache__New_u never wrote that
NUL at all, unlike its sibling read paths in the same file; add the missing
r->adr[r->size] = '\0' so the spare byte is actually used and the buffer is
a valid C string.

Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-15 09:30:34 +02:00
Xavier Roche
d2e43549d8 Merge pull request #358 from xroche/ci/asan-poison-fill
ci: poison the ASan allocator to surface missing-NUL bugs
2026-06-15 09:19:04 +02:00
Xavier Roche
a9b16d96ea ci: poison the ASan allocator to surface missing-NUL bugs
Fill malloc'd and freed memory with 0xCA in the sanitize job so a buffer
fread into without NUL termination, then used as a C string, runs off into
the redzone instead of stopping at an accidental zero byte. ASan caps its
malloc fill at the first 4096 bytes by default, which lets large cache
buffers escape; max_malloc_fill_size lifts the cap. No rebuild, no source
change -- purely the test environment.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-15 09:16:48 +02:00
Xavier Roche
4ed828ff78 Merge pull request #357 from xroche/audit/fread-nul-termination
Fix more un-NUL-terminated fread buffers used as C strings
2026-06-15 09:07:37 +02:00
6 changed files with 95 additions and 49 deletions

View File

@@ -171,8 +171,16 @@ jobs:
# Leaks at exit are out of scope (the CLI frees little on the way out);
# we want memory-safety errors, so turn leak detection off and make every
# other finding abort the run.
#
# Poison fresh allocations with 0xCA and freed blocks with 0xCB (decimal
# 202/203) so memory never reads back as accidental zeros: a missing-NUL
# fread buffer then runs strlen off into the redzone instead of stopping
# at a lucky zero. Distinct bytes tell the two apart in a dump (0xCA =
# uninitialized, 0xCB = use-after-free). ASan caps its malloc fill at 4096
# bytes by default, so max_malloc_fill_size lifts it to cover large cache
# buffers; free_fill flags use-after-free reads.
env:
ASAN_OPTIONS: detect_leaks=0:abort_on_error=1:halt_on_error=1:strict_string_checks=1
ASAN_OPTIONS: detect_leaks=0:abort_on_error=1:halt_on_error=1:strict_string_checks=1:malloc_fill_byte=202:max_malloc_fill_size=2147483647:free_fill_byte=203:max_free_fill_size=2147483647
UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1
run: make check

View File

@@ -939,7 +939,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
FILE *const fp = FOPEN(fconv(catbuff, sizeof(catbuff), previous_save), "rb");
if (fp != NULL) {
r.adr = (char *) malloct((int) r.size + 4);
r.adr = (char *) malloct((int) r.size + 1);
if (r.adr != NULL) {
if (r.size > 0
&& fread(r.adr, 1, (int) r.size, fp) != r.size) {
@@ -966,7 +966,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
// Data in cache.
else {
// lire fichier (d'un coup)
r.adr = (char *) malloct((int) r.size + 4);
r.adr = (char *) malloct((int) r.size + 1);
if (r.adr != NULL) {
if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (int) r.size) != r.size) { // erreur
freet(r.adr);
@@ -1246,7 +1246,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
FILE *fp = FOPEN(fconv(catbuff, sizeof(catbuff), return_save), "rb");
if (fp != NULL) {
r.adr = (char *) malloct((size_t) r.size + 4);
r.adr = (char *) malloct((size_t) r.size + 1);
if (r.adr != NULL) {
if (r.size > 0
&& fread(r.adr, 1, (size_t) r.size, fp) != r.size) {
@@ -1268,7 +1268,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
}
} else {
// lire fichier (d'un coup)
r.adr = (char *) malloct((size_t) r.size + 4);
r.adr = (char *) malloct((size_t) r.size + 1);
if (r.adr != NULL) {
if (fread(r.adr, 1, (size_t) r.size, cache->olddat) != r.size) { // erreur
freet(r.adr);
@@ -1371,7 +1371,7 @@ int cache_readdata(cache_back * cache, const char *str1, const char *str2,
cache_rint(cache->olddat, &len);
if (len > 0) {
char *mem_buff = (char *) malloct(len + 4); /* Plus byte 0 */
char *mem_buff = (char *) malloct(len + 1); /* trailing \0 */
if (mem_buff) {
if (fread(mem_buff, 1, len, cache->olddat) == len) { // lire tout (y compris statuscode etc)*/

View File

@@ -633,13 +633,12 @@ int httpmirror(char *url1, httrackp * opt) {
// c'est plus propre et plus logique que d'entrer à la main les liens dans la pile
// on bénéficie ainsi des vérifications et des tests du robot pour les liens "primaires"
primary = (char *) malloct(primary_len);
if (primary) {
primary[0] = '\0';
} else {
if (!primary) {
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
XH_extuninit;
return 0;
}
htsbuff primarybuff = htsbuff_ptr(primary, primary_len);
while(*a) {
int i;
@@ -687,11 +686,11 @@ int httpmirror(char *url1, httrackp * opt) {
strcatbuff(tempo, "*"); // ajouter un *
}
}
if (type)
strcpybuff(filters[filptr], "+");
else
strcpybuff(filters[filptr], "-");
strcatbuff(filters[filptr], tempo);
{
htsbuff fb = htsbuff_ptr(filters[filptr], HTS_URLMAXSIZE * 2);
htsbuff_cpy(&fb, type ? "+" : "-");
htsbuff_cat(&fb, tempo);
}
filptr++;
/* sanity check */
@@ -726,12 +725,10 @@ int httpmirror(char *url1, httrackp * opt) {
}
url[i++] = '\0';
//strcatbuff(primary,"<PRIMARY=\"");
if (strstr(url, ":/") == NULL)
strcatbuff(primary, "http://");
strcatbuff(primary, url);
//strcatbuff(primary,"\">");
strcatbuff(primary, "\n");
htsbuff_cat(&primarybuff, "http://");
htsbuff_cat(&primarybuff, url);
htsbuff_cat(&primarybuff, "\n");
}
} // while
@@ -762,7 +759,6 @@ int httpmirror(char *url1, httrackp * opt) {
int filelist_ptr = 0;
int n = 0;
char BIGSTK line[HTS_URLMAXSIZE * 2];
char *primary_ptr = primary + strlen(primary);
while(filelist_ptr < filelist_sz) {
int count =
@@ -771,13 +767,10 @@ int httpmirror(char *url1, httrackp * opt) {
if (count && line[0]) {
n++;
if (strstr(line, ":/") == NULL) {
strcpybuff(primary_ptr, "http://");
primary_ptr += strlen(primary_ptr);
htsbuff_cat(&primarybuff, "http://");
}
strcpybuff(primary_ptr, line);
primary_ptr += strlen(primary_ptr);
strcpybuff(primary_ptr, "\n");
primary_ptr += 1;
htsbuff_cat(&primarybuff, line);
htsbuff_cat(&primarybuff, "\n");
}
}
// fclose(fp);
@@ -2453,9 +2446,10 @@ void host_ban(httrackp * opt, int ptr,
// interdire host
assertf((*_FILTERS_PTR) < opt->maxfilter);
if (*_FILTERS_PTR < opt->maxfilter) {
strcpybuff(_FILTERS[*_FILTERS_PTR], "-");
strcatbuff(_FILTERS[*_FILTERS_PTR], host);
strcatbuff(_FILTERS[*_FILTERS_PTR], "/*"); // host/ * interdit
htsbuff fb = htsbuff_ptr(_FILTERS[*_FILTERS_PTR], HTS_URLMAXSIZE * 2);
htsbuff_cpy(&fb, "-");
htsbuff_cat(&fb, host);
htsbuff_cat(&fb, "/*"); // forbid host/*
(*_FILTERS_PTR)++;
}
// oups
@@ -3518,7 +3512,7 @@ char *next_token(char *p, int flag) {
p--;
do {
p++;
if (flag && (*p == '\\')) { // sauter \x ou \"
if (flag && (*p == '\\')) { // skip \x or \"
if (quote) {
char c = '\0';
@@ -3527,20 +3521,14 @@ char *next_token(char *p, int flag) {
else if (*(p + 1) == '"')
c = '"';
if (c) {
char BIGSTK tempo[8192];
tempo[0] = c;
tempo[1] = '\0';
strcatbuff(tempo, p + 2);
strcpybuff(p, tempo);
/* unescape the 2 chars to one, shifting left in place */
*p = c;
memmove(p + 1, p + 2, strlen(p + 2) + 1);
}
}
} else if (*p == 34) { // guillemets (de fin)
char BIGSTK tempo[8192];
tempo[0] = '\0';
strcatbuff(tempo, p + 1);
strcpybuff(p, tempo); /* wipe "" */
} else if (*p == 34) { // closing quote
/* drop the quote, shifting the rest left in place */
memmove(p, p + 1, strlen(p + 1) + 1);
p--;
/* */
quote = !quote;
@@ -3880,7 +3868,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
afs.af.adr, afs.save, savename(), tempo);
if (str->localLink
&& str->localLinkSize > (int) strlen(tempo) + 1) {
strcpybuff(str->localLink, tempo);
strlcpybuff(str->localLink, tempo, str->localLinkSize);
}
}
}
@@ -3892,11 +3880,11 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
lien);
if (str->localLink
&& str->localLinkSize > (int) (strlen(afs.af.adr) + strlen(afs.af.fil) + 8)) {
str->localLink[0] = '\0';
htsbuff lb = htsbuff_ptr(str->localLink, str->localLinkSize);
if (!link_has_authority(afs.af.adr))
strcpybuff(str->localLink, "http://");
strcatbuff(str->localLink, afs.af.adr);
strcatbuff(str->localLink, afs.af.fil);
htsbuff_cat(&lb, "http://");
htsbuff_cat(&lb, afs.af.adr);
htsbuff_cat(&lb, afs.af.fil);
}
r = -1;
}

View File

@@ -236,6 +236,55 @@ static void basic_selftests(void) {
}
freet(slots);
}
// next_token(): in-place token scanner. Strips surrounding quotes, unescapes
// \" and \\ when flag is set, and returns the token terminator (the space, or
// NULL at end of string). The unquote/unescape rewrites the string in place
// by shifting left, so the result is always shorter -- regression for that
// compaction.
{
char tok[64];
// plain token: unchanged, returns a pointer AT the separating space (exact
// position, not just any space -- a strchr-style impl would land elsewhere
// once quotes shift the content)
strcpybuff(tok, "abc def");
{
char *const end = next_token(tok, 0);
assertf(end == tok + 3 && *end == ' ' && strcmp(tok, "abc def") == 0);
}
// surrounding quotes stripped, returns the (post-shift) trailing space
strcpybuff(tok, "\"ab\" cd");
{
char *const end = next_token(tok, 1);
assertf(end == tok + 2 && *end == ' ' && strcmp(tok, "ab cd") == 0);
}
// a space inside quotes does not end the token; end of string returns NULL
strcpybuff(tok, "\"a b\"c");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "a bc") == 0);
}
// \" and \\ are unescaped to literal " and \ in place
strcpybuff(tok, "\"a\\\"b\\\\c\"");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "a\"b\\c") == 0);
}
// unterminated quote: the opening quote is dropped, the rest survives, and
// the scan runs to the NUL (returns NULL)
strcpybuff(tok, "\"ab");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "ab") == 0);
}
// trailing lone backslash in a quote: *(p+1) is the NUL, not an escape, so
// the backslash is kept intact (and there is no over-read past the NUL)
strcpybuff(tok, "\"a\\");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "a\\") == 0);
}
}
}
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).

View File

@@ -334,7 +334,7 @@ void index_finish(const char *indexpath, int mode) {
if (fp_tmpproject) {
tab = (char **) malloct(sizeof(char *) * (hts_primindex_size + 2));
if (tab) {
blk = malloct(size + 4);
blk = malloct(size + 1);
if (blk) {
fseek(fp_tmpproject, 0, SEEK_SET);
if ((INTsys) fread(blk, 1, size, fp_tmpproject) == size) {

View File

@@ -1162,7 +1162,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
FILE *fp = fopen(file_convert(catbuff, sizeof(catbuff), previous_save), "rb");
if (fp != NULL) {
r->adr = (char *) malloc(r->size + 4);
r->adr = (char *) malloc(r->size + 1);
if (r->adr != NULL) {
if (r->size > 0
&& fread(r->adr, 1, r->size, fp) != r->size) {
@@ -1172,6 +1172,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
sprintf(r->msg, "Read error in cache disk data: %s",
strerror(last_errno));
}
r->adr[r->size] = '\0';
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg,