Compare commits

..

1 Commits

Author SHA1 Message Date
Xavier Roche
02180549f6 Don't read an uninitialized buffer on an empty Content-Type
treathead() parses the Content-Type value with sscanf("%s") into a local
`tempo` buffer, then calls strlen(tempo) and stores the result. A response
whose Content-Type header has an empty or whitespace-only value yields no
token: sscanf leaves `tempo` uninitialized, so strlen reads uninitialized
stack and can over-read past the buffer. A hostile server triggers this with
a bare `Content-Type:` line.

Guard on sscanf's return: adopt the value, and mark the type as server-given,
only when a token was actually read. An empty value now falls back to the
default type with contenttype_given left false, i.e. it is treated like a
missing header and the URL extension is kept -- which is also the correct
naming behavior.

Found while reviewing #409, which added contenttype_given right beside this
parse; the bug itself predates it. tests/17_local-empty-ct.test exercises the
empty-Content-Type path, and the ASan/UBSan CI job is what catches the
uninitialized read.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-20 20:48:55 +02:00
4 changed files with 30 additions and 9 deletions

View File

@@ -1591,13 +1591,17 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
}
}
}
sscanf(rcvd + p, "%s", tempo);
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
strcpybuff(retour->contenttype, tempo);
else
strcpybuff(retour->contenttype, "application/octet-stream-unknown"); // erreur
retour->contenttype_given =
HTS_TRUE; /* the server declared a Content-Type */
// An empty/whitespace Content-Type value yields no token; keep the
// default type and the "not given" flag instead of reading uninit tempo.
if (sscanf(rcvd + p, "%s", tempo) == 1) {
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
strcpybuff(retour->contenttype, tempo);
else
strcpybuff(retour->contenttype,
"application/octet-stream-unknown"); // erreur
retour->contenttype_given =
HTS_TRUE; /* server declared a usable type */
}
}
} else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
// Content-Range: bytes 0-70870/70871

View File

@@ -0,0 +1,12 @@
#!/bin/bash
#
# An empty "Content-Type:" header value must be treated as "no usable type"
# (keep the URL extension), not parsed from an uninitialized buffer. The crawl
# also runs under ASan/UBSan in CI, which catches the uninitialized read this
# guards against.
: "${top_srcdir:=..}"
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
--found 'types/emptyct.png' --not-found 'types/emptyct.html' \
httrack 'BASEURL/types/index.html'

View File

@@ -53,6 +53,7 @@ TESTS = \
13_local-cookies.test \
14_local-https.test \
15_local-types.test \
16_local-assume.test
16_local-assume.test \
17_local-empty-ct.test
CLEANFILES = check-network_sh.cache

View File

@@ -135,13 +135,15 @@ class Handler(SimpleHTTPRequestHandler):
FAKE_PNG = b"\x89PNG\r\n\x1a\n" + b"\x00" * 64
FAKE_PDF = b"%PDF-1.4\n" + b"\x00" * 64
# path -> (body, content_type); content_type None means no header at all.
# path -> (body, content_type); None sends no header, "" sends an empty
# Content-Type value (no usable type, must be treated like None).
TYPE_MATRIX = {
"/types/control.php": (b"<html><body>control</body></html>", "text/html"),
"/types/photo.png": (FAKE_PNG, "image/png"),
"/types/doc.pdf": (FAKE_PDF, "application/pdf"),
"/types/notype.png": (FAKE_PNG, None),
"/types/notype.pdf": (FAKE_PDF, None),
"/types/emptyct.png": (FAKE_PNG, ""),
"/types/lie.png": (FAKE_PNG, "text/html"),
"/types/report.pdf": (b"<html><body>real page</body></html>", "text/html"),
"/types/page.htm": (b"<html><body>htm page</body></html>", "text/html"),
@@ -158,6 +160,7 @@ class Handler(SimpleHTTPRequestHandler):
'\t<a href="doc.pdf">doc</a>\n'
'\t<img src="notype.png" />\n'
'\t<a href="notype.pdf">notypepdf</a>\n'
'\t<img src="emptyct.png" />\n'
'\t<img src="lie.png" />\n'
'\t<a href="report.pdf">report</a>\n'
'\t<a href="page.htm">htm</a>\n'
@@ -184,6 +187,7 @@ class Handler(SimpleHTTPRequestHandler):
"/types/doc.pdf": route_types,
"/types/notype.png": route_types,
"/types/notype.pdf": route_types,
"/types/emptyct.png": route_types,
"/types/lie.png": route_types,
"/types/report.pdf": route_types,
"/types/page.htm": route_types,