mirror of
https://github.com/xroche/httrack.git
synced 2026-06-15 06:43:34 +03:00
Compare commits
1 Commits
parser/loc
...
cli/header
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d741188980 |
@@ -271,8 +271,11 @@ int optalias_check(int argc, const char *const *argv, int n_arg,
|
||||
*return_argc = 1;
|
||||
if (argv[n_arg][0] == '-')
|
||||
if (argv[n_arg][1] == '-') {
|
||||
char command[1000];
|
||||
char param[1000];
|
||||
/* sized to HTS_CDLMAXSIZE: a long-form option value (--user-agent,
|
||||
--headers, ...) is copied into param, and the value is bounded by the
|
||||
general per-argument check in htscoremain.c (HTS_CDLMAXSIZE) */
|
||||
char command[HTS_CDLMAXSIZE];
|
||||
char param[HTS_CDLMAXSIZE];
|
||||
char addcommand[256];
|
||||
|
||||
/* */
|
||||
|
||||
@@ -1787,10 +1787,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
HTS_PANIC_PRINTF("Empty string given");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
} else if (strlen(argv[na]) >= 256) {
|
||||
HTS_PANIC_PRINTF("Header line string too long");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
}
|
||||
StringCat(opt->headers, argv[na]);
|
||||
StringCat(opt->headers, "\r\n"); /* separator */
|
||||
@@ -2691,11 +2687,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return -1;
|
||||
} else {
|
||||
na++;
|
||||
if (strlen(argv[na]) >= 126) {
|
||||
HTS_PANIC_PRINTF("User-agent length too long");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
}
|
||||
StringCopy(opt->user_agent, argv[na]);
|
||||
if (StringNotEmpty(opt->user_agent))
|
||||
opt->user_agent_send = 1;
|
||||
|
||||
@@ -878,7 +878,7 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||
const char *xsend, const char *adr, const char *fil,
|
||||
const char *referer_adr, const char *referer_fil,
|
||||
htsblk * retour) {
|
||||
char BIGSTK buffer_head_request[8192];
|
||||
char BIGSTK buffer_head_request[16384];
|
||||
buff_struct bstr = { buffer_head_request, sizeof(buffer_head_request), 0 };
|
||||
|
||||
//int use_11=0; // HTTP 1.1 utilisé
|
||||
|
||||
71
tests/01_engine-cmdline.test
Executable file
71
tests/01_engine-cmdline.test
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# Offline command-line option tests (no network). The -F user-agent and -%X
|
||||
# raw-header values used to be rejected past 126 / 256 bytes (#152); they are
|
||||
# now bounded only by the general per-argument cap (HTS_CDLMAXSIZE). A value up
|
||||
# to that cap is accepted on both the short (-F, -%X) and long (--user-agent,
|
||||
# --headers) forms, and an over-cap value is refused cleanly rather than
|
||||
# overrunning a fixed scratch buffer.
|
||||
|
||||
set -u
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_cmdline.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
echo '<html><body>hello</body></html>' >"$tmp/index.html"
|
||||
|
||||
# a string of N repeated 'A' characters
|
||||
nchars() {
|
||||
printf 'A%.0s' $(seq 1 "$1")
|
||||
}
|
||||
|
||||
# crawl the local fixture with the given extra args; leaves the exit status in RC
|
||||
run() {
|
||||
local out="$1"
|
||||
shift
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack "file://$tmp/index.html" -O "$out" --quiet -n "$@" >"$out/.log" 2>&1
|
||||
RC=$?
|
||||
}
|
||||
|
||||
# assert the value was accepted: clean exit and the fixture was mirrored
|
||||
accepted() {
|
||||
{ test "$RC" -eq 0 && test -n "$(find "$1" -type f -path '*/index.html' -print -quit)"; } ||
|
||||
! echo "FAIL: $2 (exit $RC)" || exit 1
|
||||
}
|
||||
|
||||
# assert the value was refused cleanly: a normal error exit, never a crash
|
||||
# (a SIGABRT from an overflowed scratch buffer would surface as exit 134)
|
||||
refused() {
|
||||
{ test "$RC" -ne 0 && test "$RC" -ne 134; } ||
|
||||
! echo "FAIL: $1 (exit $RC)" || exit 1
|
||||
}
|
||||
|
||||
# a value past the old 126/256 caps but within the cap is accepted, on both the
|
||||
# short and long form of each option
|
||||
long=$(nchars 900)
|
||||
run "$tmp/ua-s" -F "$long"
|
||||
accepted "$tmp/ua-s" "#152: long -F user-agent rejected or crashed"
|
||||
run "$tmp/ua-l" --user-agent "$long"
|
||||
accepted "$tmp/ua-l" "#152: long --user-agent rejected or crashed"
|
||||
run "$tmp/hd-s" "-%X" "X-A: $long"
|
||||
accepted "$tmp/hd-s" "#152: long -%X header rejected or crashed"
|
||||
run "$tmp/hd-l" --headers "X-B: $long"
|
||||
accepted "$tmp/hd-l" "#152: long --headers rejected or crashed"
|
||||
|
||||
# a value just under the cap (>1000) must not overflow the long-form alias
|
||||
# scratch buffer (the param[] copy in optalias_check)
|
||||
run "$tmp/ua-n" --user-agent "$(nchars 1010)"
|
||||
accepted "$tmp/ua-n" "#152: near-cap --user-agent overflowed the param[] buffer"
|
||||
|
||||
# a value over the cap is refused cleanly (graceful error, not a SIGABRT), on
|
||||
# both forms
|
||||
over=$(nchars 1100)
|
||||
run "$tmp/ov-s" -F "$over"
|
||||
refused "#152: over-cap -F not refused cleanly"
|
||||
run "$tmp/ov-l" --user-agent "$over"
|
||||
refused "#152: over-cap --user-agent not refused cleanly"
|
||||
|
||||
exit 0
|
||||
@@ -99,25 +99,17 @@ grep -Eq 'srcset="j\.gif 2x"' "$saved" ||
|
||||
! grep -Eq 'srcset="[^"]*file://' "$saved" ||
|
||||
! echo "FAIL: a file:// URL survived inside a rewritten srcset attribute" || exit 1
|
||||
|
||||
# xlink:href (#298) and CSS background-image (#237): detected and rewritten to
|
||||
# local. background-image is covered in both an external <style> block and an
|
||||
# inline style attribute, with the URL unquoted, double-quoted and single-quoted
|
||||
# (the quote style is preserved on rewrite). No-detect attributes (title, alt,
|
||||
# ...) are left untouched. Asserted by rewrite (deterministic), not download.
|
||||
# data-* (#201/#203) is omitted: its detection is currently nondeterministic and
|
||||
# can't be locked yet.
|
||||
# xlink:href (#298) and inline background-image (#237): detected and rewritten
|
||||
# to local; no-detect attributes (title, alt, ...) left untouched. Asserted by
|
||||
# rewrite (deterministic), not download. data-* (#201/#203) is omitted: its
|
||||
# detection is currently nondeterministic and can't be locked yet.
|
||||
site2="$tmp/attrs"
|
||||
mkdir -p "$site2"
|
||||
for f in xl ibg ibgs cex cexd cexs tt; do gif "$site2/$f.gif"; done
|
||||
for f in xl ibg tt; do gif "$site2/$f.gif"; done
|
||||
cat >"$site2/index.html" <<EOF
|
||||
<html><head><style>
|
||||
.a { background-image: url(file://$site2/cex.gif); }
|
||||
.b { background-image: url("file://$site2/cexd.gif"); }
|
||||
.c { background-image: url('file://$site2/cexs.gif'); }
|
||||
</style></head><body>
|
||||
<html><body>
|
||||
<a xlink:href="file://$site2/xl.gif">xlink:href (#298)</a>
|
||||
<div style="background-image:url(file://$site2/ibg.gif)"></div>
|
||||
<div style="background-image:url('file://$site2/ibgs.gif')"></div>
|
||||
<span title="file://$site2/tt.gif">excluded attribute</span>
|
||||
</body></html>
|
||||
EOF
|
||||
@@ -129,24 +121,8 @@ test -n "$saved2" || ! echo "FAIL: saved attrs page not found" || exit 1
|
||||
# detected attributes: the absolute URL is rewritten to a local link
|
||||
grep -Eq 'xlink:href="xl\.gif"' "$saved2" ||
|
||||
! echo "FAIL #298: xlink:href not detected/rewritten" || exit 1
|
||||
|
||||
# #237 external <style> block, each quoting form, quote style preserved
|
||||
grep -Eq 'url\(cex\.gif\)' "$saved2" ||
|
||||
! echo "FAIL #237: unquoted background-image in <style> not rewritten" || exit 1
|
||||
grep -Eq 'url\("cexd\.gif"\)' "$saved2" ||
|
||||
! echo "FAIL #237: double-quoted background-image in <style> not rewritten" || exit 1
|
||||
grep -Eq "url\('cexs\.gif'\)" "$saved2" ||
|
||||
! echo "FAIL #237: single-quoted background-image in <style> not rewritten" || exit 1
|
||||
|
||||
# #237 inline style attribute, unquoted and single-quoted url()
|
||||
grep -Eq 'style="background-image:url\(ibg\.gif\)"' "$saved2" ||
|
||||
! echo "FAIL #237: inline unquoted background-image not rewritten" || exit 1
|
||||
grep -Eq "style=\"background-image:url\('ibgs\.gif'\)\"" "$saved2" ||
|
||||
! echo "FAIL #237: inline single-quoted background-image not rewritten" || exit 1
|
||||
|
||||
# no file:// URL survived inside any rewritten background-image
|
||||
! grep -Eq 'background-image:[^;"]*file://' "$saved2" ||
|
||||
! echo "FAIL #237: a file:// URL survived inside a rewritten background-image" || exit 1
|
||||
! echo "FAIL #237: inline background-image url() not detected/rewritten" || exit 1
|
||||
|
||||
# excluded attribute: title is on the no-detect list, so its value is left as-is
|
||||
grep -q 'title="file://' "$saved2" ||
|
||||
|
||||
@@ -9,6 +9,24 @@ TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
|
||||
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
|
||||
|
||||
TEST_EXTENSIONS = .test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
TESTS = \
|
||||
00_runnable.test \
|
||||
01_engine-charset.test \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
01_engine-idna.test \
|
||||
01_engine-mime.test \
|
||||
01_engine-parse.test \
|
||||
01_engine-simplify.test \
|
||||
02_manpage-regen.test \
|
||||
10_crawl-simple.test \
|
||||
11_crawl-cookies.test \
|
||||
11_crawl-idna.test \
|
||||
11_crawl-international.test \
|
||||
11_crawl-longurl.test \
|
||||
11_crawl-parsing.test \
|
||||
12_crawl_https.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
@@ -472,7 +472,7 @@ TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
|
||||
ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS) \
|
||||
HTTPS_SUPPORT=$(HTTPS_SUPPORT) top_srcdir=$(top_srcdir)
|
||||
TEST_EXTENSIONS = .test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-cmdline.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
CLEANFILES = check-network_sh.cache
|
||||
all: all-am
|
||||
|
||||
|
||||
Reference in New Issue
Block a user