Compare commits

...

2 Commits

Author SHA1 Message Date
Xavier Roche
a16a7bdc10 htslib: fix global-buffer-overflow in get_httptype_sized on empty filename
get_httptype_sized() set a = fil + strlen(fil) - 1, then dereferenced *a
in the extension scan before the a > fil bound was checked, so an empty
fil ("") read one byte before the string. istoobig() passes a literal ""
to is_hypertext_mime() whenever it classifies by mime alone (the quota
check in back_checksize), so any octet-stream-ish download hit it. Bound
the loop and the dot test before dereferencing.

Latent (an OOB read of one .rodata byte); surfaced under ASan by the new
22_local-broken-size.test, whose oversize.bin is application/octet-stream.
Adds a direct empty-fil case to the -#7 basic_selftests block as a fast,
deterministic leaf-level regression (it aborts under ASan on the old code).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-25 10:34:31 +02:00
Xavier Roche
af476dad00 tests: lock --tolerant (-%B) behavior on broken Content-Length (#32/#41)
A response whose Content-Length disagrees with the bytes actually sent
warns "bogus state (broken size)" and is skipped from the cache, so it is
re-fetched and re-warned on every run. --tolerant (-%B) already accepts
such responses; either way the file reaches disk. Pin that contract with a
local-server /size route (declares a length two bytes short of the body)
and a test asserting the warning fires by default and is silenced under
-%B, with the file present in both passes.

Adds --log-found/--log-not-found ERE assertions on hts-log.txt to
local-crawl.sh for the warning checks.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-25 09:33:42 +02:00
6 changed files with 66 additions and 4 deletions

View File

@@ -353,6 +353,14 @@ static void basic_selftests(void) {
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
"noextfile", 1) == 1);
assertf(strcmp(r.contenttype, "application/octet-stream") == 0);
// empty fil: no extension to scan; must not over-read before the string.
// flag==0 -> 0 (nothing written), flag==1 -> octet-stream.
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), "",
0) == 0);
assertf(r.contenttype[0] == '\0');
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype), "",
1) == 1);
assertf(strcmp(r.contenttype, "application/octet-stream") == 0);
// a user --assume rule with an empty value matches but writes nothing:
// get_userhttptype returns 1 with the buffer empty, so get_httptype_sized
// must still report 0 (callers test the return like the old

View File

@@ -4177,9 +4177,10 @@ HTSEXT_API hts_boolean get_httptype_sized(httrackp *opt, char *s, size_t ssize,
/* Check html -> text/html */
const char *a = fil + strlen(fil) - 1;
while((*a != '.') && (*a != '/') && (a > fil))
/* a < fil when fil is empty: bound before dereferencing */
while ((a > fil) && (*a != '.') && (*a != '/'))
a--;
if (*a == '.' && strlen(a) < 32) {
if (a >= fil && *a == '.' && strlen(a) < 32) {
int j = 0;
a++;

17
tests/22_local-broken-size.test Executable file
View File

@@ -0,0 +1,17 @@
#!/bin/bash
# Issues #32/#41: a Content-Length that disagrees with the body warns "bogus
# state (broken size)" and skips the cache; -%B (tolerant) accepts it.
: "${top_srcdir:=..}"
# Default: warn, but the file is still written.
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
--found 'size/oversize.bin' \
--log-found 'bogus state \(broken size' \
httrack 'BASEURL/size/index.html'
# -%B (tolerant): no warning, file written.
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
--found 'size/oversize.bin' \
--log-not-found 'bogus state' \
httrack 'BASEURL/size/index.html' '-%B'

View File

@@ -61,6 +61,7 @@ TESTS = \
18_local-update.test \
19_local-connect-fallback.test \
20_local-resume-loop.test \
21_local-intl-update.test
21_local-intl-update.test \
22_local-broken-size.test
CLEANFILES = check-network_sh.cache

View File

@@ -14,7 +14,9 @@
# Usage:
# bash local-crawl.sh [--tls] [--root DIR] \
# --errors N --files N --found PATH ... --directory PATH ... \
# --log-found REGEX ... --log-not-found REGEX ... \
# httrack BASEURL/some/path [httrack-args...]
# --log-found/--log-not-found grep (ERE) the crawl's hts-log.txt.
set -u
@@ -107,7 +109,7 @@ while test "$pos" -lt "$nargs"; do
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
pos=$((pos + 1))
;;
--found | --not-found | --directory)
--found | --not-found | --directory | --log-found | --log-not-found)
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
pos=$((pos + 1))
;;
@@ -257,6 +259,22 @@ while test "$i" -lt "${#audit[@]}"; do
exit 1
fi
;;
--log-found)
i=$((i + 1))
info "checking log matches ${audit[$i]}"
if grep -aqE "${audit[$i]}" "${out}/hts-log.txt"; then result "OK"; else
result "not in log"
exit 1
fi
;;
--log-not-found)
i=$((i + 1))
info "checking log lacks ${audit[$i]}"
if grep -aqE "${audit[$i]}" "${out}/hts-log.txt"; then
result "present in log"
exit 1
else result "OK"; fi
;;
esac
i=$((i + 1))
done

View File

@@ -225,6 +225,21 @@ class Handler(SimpleHTTPRequestHandler):
self.send_header("Content-Length", "0")
self.end_headers()
# broken Content-Length (#32/#41): declared size != bytes sent. httrack
# warns "bogus state (broken size)" and skips the cache unless -%B.
def route_size_index(self):
self.send_html('\t<a href="oversize.bin">over</a>\n')
def route_size_oversize(self):
body = b"A" * 100
self.send_response(200)
self.send_header("Content-Type", "application/octet-stream")
self.send_header("Content-Length", str(len(body) - 2)) # lie: too short
self.send_header("Connection", "close")
self.end_headers()
if self.command != "HEAD":
self.wfile.write(body)
ROUTES = {
"/cookies/entrance.php": route_entrance,
"/cookies/second.php": route_second,
@@ -248,6 +263,8 @@ class Handler(SimpleHTTPRequestHandler):
"/intl/" + INTL_NAME: route_intl_page,
"/resume/index.html": route_resume_index,
"/resume/blob.txt": route_resume,
"/size/index.html": route_size_index,
"/size/oversize.bin": route_size_oversize,
}
# --- dispatch ----------------------------------------------------------