Compare commits

...

3 Commits

Author SHA1 Message Date
Xavier Roche
055e17b057 Merge pull request #328 from xroche/cli/header-ua-length-152
Raise the user-agent and custom-header length limits
2026-06-14 01:43:31 +02:00
Xavier Roche
d7bb97d697 Merge pull request #329 from xroche/parser/lock-background-image-237
Lock CSS background-image url() rewriting in the parser test
2026-06-14 01:37:51 +02:00
Xavier Roche
d741188980 Raise the user-agent and custom-header length limits
The -F user-agent value was rejected past 126 bytes and the -%X header line
past 256. Both are stored in dynamically grown String buffers, so the caps were
arbitrary. Drop them; every argument is still bounded by the general
per-argument check in htscoremain.c (HTS_CDLMAXSIZE), which lifts the usable
limit to just under 1 KB.

optalias_check copied a long-form option value (--user-agent, --headers, ...)
into a fixed 1000-byte scratch buffer, smaller than that general cap, so a value
of 1000..1023 bytes aborted the process through the guarded-copy overflow check.
Size command and param to HTS_CDLMAXSIZE so the long form matches the cap; an
over-cap value is now refused with the normal "argument too long" message
instead of crashing. Grow the request-head buffer to 16384 for the larger
aggregate header set.

closes #152
2026-06-14 01:32:07 +02:00
6 changed files with 97 additions and 14 deletions

View File

@@ -271,8 +271,11 @@ int optalias_check(int argc, const char *const *argv, int n_arg,
*return_argc = 1;
if (argv[n_arg][0] == '-')
if (argv[n_arg][1] == '-') {
char command[1000];
char param[1000];
/* sized to HTS_CDLMAXSIZE: a long-form option value (--user-agent,
--headers, ...) is copied into param, and the value is bounded by the
general per-argument check in htscoremain.c (HTS_CDLMAXSIZE) */
char command[HTS_CDLMAXSIZE];
char param[HTS_CDLMAXSIZE];
char addcommand[256];
/* */

View File

@@ -1787,10 +1787,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
HTS_PANIC_PRINTF("Empty string given");
htsmain_free();
return -1;
} else if (strlen(argv[na]) >= 256) {
HTS_PANIC_PRINTF("Header line string too long");
htsmain_free();
return -1;
}
StringCat(opt->headers, argv[na]);
StringCat(opt->headers, "\r\n"); /* separator */
@@ -2691,11 +2687,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
return -1;
} else {
na++;
if (strlen(argv[na]) >= 126) {
HTS_PANIC_PRINTF("User-agent length too long");
htsmain_free();
return -1;
}
StringCopy(opt->user_agent, argv[na]);
if (StringNotEmpty(opt->user_agent))
opt->user_agent_send = 1;

View File

@@ -878,7 +878,7 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
const char *xsend, const char *adr, const char *fil,
const char *referer_adr, const char *referer_fil,
htsblk * retour) {
char BIGSTK buffer_head_request[8192];
char BIGSTK buffer_head_request[16384];
buff_struct bstr = { buffer_head_request, sizeof(buffer_head_request), 0 };
//int use_11=0; // HTTP 1.1 utilisé

71
tests/01_engine-cmdline.test Executable file
View File

@@ -0,0 +1,71 @@
#!/bin/bash
#
# Offline command-line option tests (no network). The -F user-agent and -%X
# raw-header values used to be rejected past 126 / 256 bytes (#152); they are
# now bounded only by the general per-argument cap (HTS_CDLMAXSIZE). A value up
# to that cap is accepted on both the short (-F, -%X) and long (--user-agent,
# --headers) forms, and an over-cap value is refused cleanly rather than
# overrunning a fixed scratch buffer.
set -u
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_cmdline.XXXXXX") || exit 1
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
echo '<html><body>hello</body></html>' >"$tmp/index.html"
# a string of N repeated 'A' characters
nchars() {
printf 'A%.0s' $(seq 1 "$1")
}
# crawl the local fixture with the given extra args; leaves the exit status in RC
run() {
local out="$1"
shift
rm -rf "$out"
mkdir -p "$out"
httrack "file://$tmp/index.html" -O "$out" --quiet -n "$@" >"$out/.log" 2>&1
RC=$?
}
# assert the value was accepted: clean exit and the fixture was mirrored
accepted() {
{ test "$RC" -eq 0 && test -n "$(find "$1" -type f -path '*/index.html' -print -quit)"; } ||
! echo "FAIL: $2 (exit $RC)" || exit 1
}
# assert the value was refused cleanly: a normal error exit, never a crash
# (a SIGABRT from an overflowed scratch buffer would surface as exit 134)
refused() {
{ test "$RC" -ne 0 && test "$RC" -ne 134; } ||
! echo "FAIL: $1 (exit $RC)" || exit 1
}
# a value past the old 126/256 caps but within the cap is accepted, on both the
# short and long form of each option
long=$(nchars 900)
run "$tmp/ua-s" -F "$long"
accepted "$tmp/ua-s" "#152: long -F user-agent rejected or crashed"
run "$tmp/ua-l" --user-agent "$long"
accepted "$tmp/ua-l" "#152: long --user-agent rejected or crashed"
run "$tmp/hd-s" "-%X" "X-A: $long"
accepted "$tmp/hd-s" "#152: long -%X header rejected or crashed"
run "$tmp/hd-l" --headers "X-B: $long"
accepted "$tmp/hd-l" "#152: long --headers rejected or crashed"
# a value just under the cap (>1000) must not overflow the long-form alias
# scratch buffer (the param[] copy in optalias_check)
run "$tmp/ua-n" --user-agent "$(nchars 1010)"
accepted "$tmp/ua-n" "#152: near-cap --user-agent overflowed the param[] buffer"
# a value over the cap is refused cleanly (graceful error, not a SIGABRT), on
# both forms
over=$(nchars 1100)
run "$tmp/ov-s" -F "$over"
refused "#152: over-cap -F not refused cleanly"
run "$tmp/ov-l" --user-agent "$over"
refused "#152: over-cap --user-agent not refused cleanly"
exit 0

View File

@@ -9,6 +9,24 @@ TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
TEST_EXTENSIONS = .test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
TESTS = \
00_runnable.test \
01_engine-charset.test \
01_engine-cmdline.test \
01_engine-entities.test \
01_engine-filter.test \
01_engine-hashtable.test \
01_engine-idna.test \
01_engine-mime.test \
01_engine-parse.test \
01_engine-simplify.test \
02_manpage-regen.test \
10_crawl-simple.test \
11_crawl-cookies.test \
11_crawl-idna.test \
11_crawl-international.test \
11_crawl-longurl.test \
11_crawl-parsing.test \
12_crawl_https.test
CLEANFILES = check-network_sh.cache

View File

@@ -472,7 +472,7 @@ TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS) \
HTTPS_SUPPORT=$(HTTPS_SUPPORT) top_srcdir=$(top_srcdir)
TEST_EXTENSIONS = .test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-cmdline.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
CLEANFILES = check-network_sh.cache
all: all-am