Compare commits

..

2 Commits

Author SHA1 Message Date
Xavier Roche
7db49a64b6 Bound htscore.c pointer-destination buffer writes (batch 8)
Convert htscore.c's 18 pointer-destination strcpybuff/strcatbuff sites (which
silently degrade to unchecked strcpy/strcat per the htssafe.h diagnostic) to
bounded forms:

- httpmirror(): one htsbuff over the malloc'd primary buffer drives the whole
  link accumulation, replacing the manual "primary_ptr += strlen" cursor in the
  filelist loop; the +/- filter slots build through htsbuff over their known
  HTS_URLMAXSIZE*2 capacity.
- host_ban(): the "-host/*" filter slot builds through htsbuff.
- htsAddLink(): str->localLink builds through htsbuff / strlcpybuff bounded by
  str->localLinkSize.
- next_token(): the in-place unquote/unescape copied the (always shorter) result
  back through an 8KB temp buffer, which both relied on an unchecked pointer copy
  and aborted on tokens over 8KB. Replace with memmove left-shift compaction: no
  capacity guess, no size cap.

Add a next_token() regression test to basic_selftests (httrack -#7) covering
plain tokens, quote stripping, and \" / \\ unescaping; teeth verified.

htscore.c pointer-destination sites 18 -> 0.

Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-15 10:16:06 +02:00
Xavier Roche
f1c04c10eb Merge pull request #359 from xroche/fix/malloc-size-plus4
Allocate exactly one extra byte for cache-buffer NUL terminators
2026-06-15 09:33:26 +02:00
2 changed files with 78 additions and 41 deletions

View File

@@ -633,13 +633,12 @@ int httpmirror(char *url1, httrackp * opt) {
// c'est plus propre et plus logique que d'entrer à la main les liens dans la pile
// on bénéficie ainsi des vérifications et des tests du robot pour les liens "primaires"
primary = (char *) malloct(primary_len);
if (primary) {
primary[0] = '\0';
} else {
if (!primary) {
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
XH_extuninit;
return 0;
}
htsbuff primarybuff = htsbuff_ptr(primary, primary_len);
while(*a) {
int i;
@@ -687,11 +686,11 @@ int httpmirror(char *url1, httrackp * opt) {
strcatbuff(tempo, "*"); // ajouter un *
}
}
if (type)
strcpybuff(filters[filptr], "+");
else
strcpybuff(filters[filptr], "-");
strcatbuff(filters[filptr], tempo);
{
htsbuff fb = htsbuff_ptr(filters[filptr], HTS_URLMAXSIZE * 2);
htsbuff_cpy(&fb, type ? "+" : "-");
htsbuff_cat(&fb, tempo);
}
filptr++;
/* sanity check */
@@ -726,12 +725,10 @@ int httpmirror(char *url1, httrackp * opt) {
}
url[i++] = '\0';
//strcatbuff(primary,"<PRIMARY=\"");
if (strstr(url, ":/") == NULL)
strcatbuff(primary, "http://");
strcatbuff(primary, url);
//strcatbuff(primary,"\">");
strcatbuff(primary, "\n");
htsbuff_cat(&primarybuff, "http://");
htsbuff_cat(&primarybuff, url);
htsbuff_cat(&primarybuff, "\n");
}
} // while
@@ -762,7 +759,6 @@ int httpmirror(char *url1, httrackp * opt) {
int filelist_ptr = 0;
int n = 0;
char BIGSTK line[HTS_URLMAXSIZE * 2];
char *primary_ptr = primary + strlen(primary);
while(filelist_ptr < filelist_sz) {
int count =
@@ -771,13 +767,10 @@ int httpmirror(char *url1, httrackp * opt) {
if (count && line[0]) {
n++;
if (strstr(line, ":/") == NULL) {
strcpybuff(primary_ptr, "http://");
primary_ptr += strlen(primary_ptr);
htsbuff_cat(&primarybuff, "http://");
}
strcpybuff(primary_ptr, line);
primary_ptr += strlen(primary_ptr);
strcpybuff(primary_ptr, "\n");
primary_ptr += 1;
htsbuff_cat(&primarybuff, line);
htsbuff_cat(&primarybuff, "\n");
}
}
// fclose(fp);
@@ -2453,9 +2446,10 @@ void host_ban(httrackp * opt, int ptr,
// interdire host
assertf((*_FILTERS_PTR) < opt->maxfilter);
if (*_FILTERS_PTR < opt->maxfilter) {
strcpybuff(_FILTERS[*_FILTERS_PTR], "-");
strcatbuff(_FILTERS[*_FILTERS_PTR], host);
strcatbuff(_FILTERS[*_FILTERS_PTR], "/*"); // host/ * interdit
htsbuff fb = htsbuff_ptr(_FILTERS[*_FILTERS_PTR], HTS_URLMAXSIZE * 2);
htsbuff_cpy(&fb, "-");
htsbuff_cat(&fb, host);
htsbuff_cat(&fb, "/*"); // forbid host/*
(*_FILTERS_PTR)++;
}
// oups
@@ -3518,7 +3512,7 @@ char *next_token(char *p, int flag) {
p--;
do {
p++;
if (flag && (*p == '\\')) { // sauter \x ou \"
if (flag && (*p == '\\')) { // skip \x or \"
if (quote) {
char c = '\0';
@@ -3527,20 +3521,14 @@ char *next_token(char *p, int flag) {
else if (*(p + 1) == '"')
c = '"';
if (c) {
char BIGSTK tempo[8192];
tempo[0] = c;
tempo[1] = '\0';
strcatbuff(tempo, p + 2);
strcpybuff(p, tempo);
/* unescape the 2 chars to one, shifting left in place */
*p = c;
memmove(p + 1, p + 2, strlen(p + 2) + 1);
}
}
} else if (*p == 34) { // guillemets (de fin)
char BIGSTK tempo[8192];
tempo[0] = '\0';
strcatbuff(tempo, p + 1);
strcpybuff(p, tempo); /* wipe "" */
} else if (*p == 34) { // closing quote
/* drop the quote, shifting the rest left in place */
memmove(p, p + 1, strlen(p + 1) + 1);
p--;
/* */
quote = !quote;
@@ -3880,7 +3868,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
afs.af.adr, afs.save, savename(), tempo);
if (str->localLink
&& str->localLinkSize > (int) strlen(tempo) + 1) {
strcpybuff(str->localLink, tempo);
strlcpybuff(str->localLink, tempo, str->localLinkSize);
}
}
}
@@ -3892,11 +3880,11 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
lien);
if (str->localLink
&& str->localLinkSize > (int) (strlen(afs.af.adr) + strlen(afs.af.fil) + 8)) {
str->localLink[0] = '\0';
htsbuff lb = htsbuff_ptr(str->localLink, str->localLinkSize);
if (!link_has_authority(afs.af.adr))
strcpybuff(str->localLink, "http://");
strcatbuff(str->localLink, afs.af.adr);
strcatbuff(str->localLink, afs.af.fil);
htsbuff_cat(&lb, "http://");
htsbuff_cat(&lb, afs.af.adr);
htsbuff_cat(&lb, afs.af.fil);
}
r = -1;
}

View File

@@ -236,6 +236,55 @@ static void basic_selftests(void) {
}
freet(slots);
}
// next_token(): in-place token scanner. Strips surrounding quotes, unescapes
// \" and \\ when flag is set, and returns the token terminator (the space, or
// NULL at end of string). The unquote/unescape rewrites the string in place
// by shifting left, so the result is always shorter -- regression for that
// compaction.
{
char tok[64];
// plain token: unchanged, returns a pointer AT the separating space (exact
// position, not just any space -- a strchr-style impl would land elsewhere
// once quotes shift the content)
strcpybuff(tok, "abc def");
{
char *const end = next_token(tok, 0);
assertf(end == tok + 3 && *end == ' ' && strcmp(tok, "abc def") == 0);
}
// surrounding quotes stripped, returns the (post-shift) trailing space
strcpybuff(tok, "\"ab\" cd");
{
char *const end = next_token(tok, 1);
assertf(end == tok + 2 && *end == ' ' && strcmp(tok, "ab cd") == 0);
}
// a space inside quotes does not end the token; end of string returns NULL
strcpybuff(tok, "\"a b\"c");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "a bc") == 0);
}
// \" and \\ are unescaped to literal " and \ in place
strcpybuff(tok, "\"a\\\"b\\\\c\"");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "a\"b\\c") == 0);
}
// unterminated quote: the opening quote is dropped, the rest survives, and
// the scan runs to the NUL (returns NULL)
strcpybuff(tok, "\"ab");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "ab") == 0);
}
// trailing lone backslash in a quote: *(p+1) is the NUL, not an escape, so
// the backslash is kept intact (and there is no over-read past the NUL)
strcpybuff(tok, "\"a\\");
{
char *const end = next_token(tok, 1);
assertf(end == NULL && strcmp(tok, "a\\") == 0);
}
}
}
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).