mirror of
https://github.com/xroche/httrack.git
synced 2026-06-25 11:37:28 +03:00
The test scripts mostly ran with no error flags, so a failing command in
the middle would be ignored and the script would limp on to a misleading
result. Turn on strict mode everywhere, guarding the spots that legitimately
expect a non-zero exit:
- the htssafe overflow probes (-#8) deliberately abort, and the strsafe/
cmdline crawls capture an exit code to assert on, so those are run with
`|| true` / `|| rc=$?` rather than letting set -e kill the script first;
- the parser fixture crawl ignores httrack's own exit (it checks the mirrored
files), so it keeps `|| true`;
- 02_update-cache replaced `find ... | grep -q .` with a `-print -quit`
command substitution: under pipefail grep -q can close the pipe early and
leave find killed by SIGPIPE, which would spuriously fail an existing file;
- 12_crawl_https guards $HTTPS_SUPPORT with `${...:-}` for set -u.
02_manpage-regen and 01_engine-cache stay on `set -eu` (no pipefail): both are
run via $(BASH), which can be a plain POSIX /bin/sh where `set -o pipefail`
does not exist.
shellcheck clean; make check: 15 PASS, 7 SKIP (offline).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
63 lines
1.9 KiB
Bash
Executable File
63 lines
1.9 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
|
|
# Update path: re-mirroring a site reads the cache (cache_readex) to decide what
|
|
# is up to date -- a path the one-shot crawl tests never exercise. Offline
|
|
# (file://), so it always runs.
|
|
#
|
|
# 1. mirror, then re-mirror unchanged -> the cache-read pass must complete clean
|
|
# (guards against a crash/abort/error in cache_readex).
|
|
# 2. change a source file, re-mirror -> the update must pick up the new content
|
|
# (guards the update decision that reads the cached metadata).
|
|
|
|
set -euo pipefail
|
|
|
|
site=$(mktemp -d)
|
|
out=$(mktemp -d)
|
|
trap 'rm -rf "$site" "$out"' EXIT
|
|
|
|
cat >"$site/index.html" <<EOF
|
|
<a href="a.html">a</a> <a href="sub/b.html">b</a>
|
|
EOF
|
|
echo 'OLDCONTENT' >"$site/a.html"
|
|
mkdir -p "$site/sub"
|
|
echo '<p>bbb</p>' >"$site/sub/b.html"
|
|
|
|
url="file://$site/index.html"
|
|
|
|
# count Error: lines in the log (grep -c exits 1 on zero matches: guard it)
|
|
errors() { grep -ciE '^[0-9:]*[[:space:]]Error:' "$out/hts-log.txt" || true; }
|
|
|
|
# 1. fresh mirror writes the cache
|
|
httrack "$url" -O "$out" -q -%v0 -r3 >/dev/null 2>&1
|
|
test -e "$out/hts-cache/new.zip" || {
|
|
echo "no cache was written" >&2
|
|
exit 1
|
|
}
|
|
|
|
# 2. re-mirror unchanged: the update reads the cache and must complete cleanly
|
|
httrack "$url" -O "$out" -q -%v0 -r3 >/dev/null 2>&1
|
|
test "$(errors)" = 0 || {
|
|
echo "update (unchanged) reported errors" >&2
|
|
exit 1
|
|
}
|
|
for suffix in a.html sub/b.html; do
|
|
test -n "$(find "$out" -path "*/$suffix" -print -quit)" || {
|
|
echo "missing $suffix after update" >&2
|
|
exit 1
|
|
}
|
|
done
|
|
|
|
# 3. change a source file: the update must pick up the new content
|
|
sleep 1
|
|
echo 'NEWCONTENT' >"$site/a.html"
|
|
httrack "$url" -O "$out" -q -%v0 -r3 >/dev/null 2>&1
|
|
test "$(errors)" = 0 || {
|
|
echo "update (changed) reported errors" >&2
|
|
exit 1
|
|
}
|
|
grep -q NEWCONTENT "$(find "$out" -path '*/a.html')" || {
|
|
echo "update did not pick up the changed source" >&2
|
|
exit 1
|
|
}
|