Compare commits

...

1 Commits

Author SHA1 Message Date
Xavier Roche
b0d131f084 Harden three parser buffer copies against overflow
Width-less sscanf("%s %s %s") parsed a request line into fixed method/url/
protocol buffers in the catch-URL proxy (htscatchurl.c) and the postfile
sender (htslib.c), so a long token could overflow the smallest one
(method[32] / [256]). Add field widths matching each buffer.

The makeindex title decoder copied the UTF-8-expanded title into s[] with a
raw strcpy; a charset that expands past the buffer would overflow it.
Truncate with snprintf and free through freet.

The entity/URL unescapers guarded the copy with `j + 1 > max`, which reserves
nothing for the trailing NUL: an input filling dest to exactly `max` chars
wrote dest[max], one byte past a max-sized buffer. Every production caller
passes max = buffer capacity (strlen+1 or sizeof), so this was a real 1-byte
OOB, latent only because output is usually shorter than the buffer. Use
`>=`. The st_entities self-test passed max = strlen (relying on strdup's
extra byte); correct it to strlen+1 to match the callers.

Self-test unescape-bounds exercises the exact-fill boundary on both
unescapers; it aborts under ASan on the pre-fix guard.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-07-02 20:46:30 +02:00
7 changed files with 46 additions and 14 deletions

View File

@@ -175,7 +175,9 @@ HTSEXT_API hts_boolean catch_url(T_SOC soc, char *url, char *method,
//
socinput(soc, line, 1000);
if (strnotempty(line)) {
if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
/* widths bound the caller buffers: method[32], url[HTS_URLMAXSIZE*2],
protocol[256] */
if (sscanf(line, "%31s %2047s %255s", method, url, protocol) == 3) {
lien_adrfil af;
// méthode en majuscule

View File

@@ -190,9 +190,9 @@ int hts_unescapeEntitiesWithCharset(const char *src, char *dest, const size_t ma
}
}
}
/* copy */
if (j + 1 > max) {
/* reserve one byte for the trailing NUL written after the loop */
if (j + 1 >= max) {
/* overflow */
return -1;
}
@@ -314,8 +314,8 @@ int hts_unescapeUrlSpecial(const char *src, char *dest, const size_t max,
}
}
/* Check for overflow */
if (j + 1 > max) {
/* reserve one byte for the trailing NUL written after the loop */
if (j + 1 >= max) {
return -1;
}

View File

@@ -1149,7 +1149,8 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
char BIGSTK protocol[256], url[HTS_URLMAXSIZE * 2], method[256];
linput(fp, line, 1000);
if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
/* widths bound method[256], url[HTS_URLMAXSIZE*2], protocol[256] */
if (sscanf(line, "%255s %2047s %255s", method, url, protocol) == 3) {
size_t ret;
// selon que l'on a ou pas un proxy
if (retour->req.proxy.active) {

View File

@@ -604,13 +604,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// Decode title with encoding
if (str->page_charset_ != NULL
&& *str->page_charset_ != '\0') {
char *const sUtf =
hts_convertStringToUTF8(s, strlen(s), str->page_charset_);
if (str->page_charset_ != NULL &&
*str->page_charset_ != '\0') {
char *sUtf = hts_convertStringToUTF8(
s, strlen(s), str->page_charset_);
if (sUtf != NULL) {
strcpy(s, sUtf);
free(sUtf);
/* UTF-8 can expand past s[]; truncate to fit */
snprintf(s, sizeof(s), "%s", sUtf);
freet(sUtf);
}
}

View File

@@ -708,7 +708,8 @@ static int st_entities(httrackp *opt, int argc, char **argv) {
}
s = strdupt(argv[0]);
enc = argc >= 2 ? argv[1] : "UTF-8";
if (s != NULL && hts_unescapeEntitiesWithCharset(s, s, strlen(s), enc) == 0) {
if (s != NULL &&
hts_unescapeEntitiesWithCharset(s, s, strlen(s) + 1, enc) == 0) {
printf("%s\n", s);
freet(s);
} else {
@@ -717,6 +718,23 @@ static int st_entities(httrackp *opt, int argc, char **argv) {
return 0;
}
/* The unescapers must reserve one byte for the trailing NUL: a 'max'-byte
dest holding 'max' output chars pre-fix wrote dest[max] (1-byte OOB, caught
by ASan). Both unescapeEntities and unescapeUrl share the guard. */
static int st_unescape_bounds(httrackp *opt, int argc, char **argv) {
char dest[4];
(void) opt;
(void) argc;
(void) argv;
assertf(hts_unescapeEntities("abcd", dest, sizeof(dest)) == -1);
assertf(hts_unescapeUrl("abcd", dest, sizeof(dest)) == -1);
assertf(hts_unescapeEntities("abc", dest, sizeof(dest)) == 0);
assertf(strcmp(dest, "abc") == 0);
printf("unescape-bounds self-test OK\n");
return 0;
}
static int st_hashtable(httrackp *opt, int argc, char **argv) {
char *snum;
unsigned long count = 0;
@@ -1799,6 +1817,8 @@ static const struct selftest_entry {
{"idna-decode", "<host>", "decode an IDNA/punycode hostname",
st_idna_decode},
{"entities", "<string> [encoding]", "unescape HTML entities", st_entities},
{"unescape-bounds", "", "unescapers reserve the NUL byte (no 1-byte OOB)",
st_unescape_bounds},
{"hashtable", "<count|file>", "coucal hashtable stress test", st_hashtable},
{"strsafe", "[overflow|overflow-buff [str]]", "bounded string-op self-test",
st_strsafe},

View File

@@ -0,0 +1,7 @@
#!/bin/bash
#
set -euo pipefail
# Entity/URL unescapers reserve one byte for the trailing NUL (no 1-byte OOB).
httrack -O /dev/null -#test=unescape-bounds run | grep -q "unescape-bounds self-test OK"

View File

@@ -54,6 +54,7 @@ TESTS = \
01_engine-stripquery.test \
01_engine-strsafe.test \
01_engine-urlhack.test \
01_engine-unescape-bounds.test \
01_engine-useragent.test \
01_zlib-acceptencoding.test \
01_zlib-cache.test \