Compare commits

...

2 Commits

Author SHA1 Message Date
Xavier Roche
c177923fa1 htscoremain/htsalias: share the cmdl_* argv-block macros from htsalias.h
cmdl_room/cmdl_add/cmdl_ins were copy-pasted between htscoremain.c (the CLI
parser) and htsalias.c (config-file alias expansion), tagged "COPY OF cmdl_ins
in htscoremain.c". The copies had already drifted: htscoremain advanced the
pack offset by strlen+2, htsalias by strlen+1. Both are correct (a token plus
its NUL is L+1 bytes; +2 just leaves a one-byte gap), so the argv content was
identical either way, but two definitions of the same thing is one too many.

Move all three into htsalias.h (internal, gated by HTS_INTERNAL_BYTECODE,
already included by both translation units) and unify on the tight +1. This
only shrinks the inter-token gap in htscoremain's x_argvblk; every argv[] entry
is still an independently NUL-terminated string read through its own pointer,
so behavior is unchanged and the +32768 slack is untouched.

Adds 01_engine-doitlog.test for the doit.log reprise path, which drives
htscoremain's cmdl_ins (re-running httrack with no url re-inserts each recorded
argument) and had no coverage: 02_update-cache always passes a url, and
01_engine-rcfile exercises only the htsalias.c side. The test mirrors a file://
fixture, re-runs with no url, and asserts the reprise re-mirrors cleanly and
re-crawls the inserted url after a source change. Teeth-checked: dropping the
+1 makes the inserted tokens run together and the test fails on the resulting
crawl error.

make check: 16 PASS, 7 SKIP (offline). shellcheck clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-17 06:55:52 +02:00
Xavier Roche
7091f85104 Merge pull request #379 from xroche/tests/strict-mode
tests: run the test scripts under strict mode (set -euo pipefail)
2026-06-17 06:22:09 +02:00
5 changed files with 121 additions and 44 deletions

View File

@@ -41,25 +41,6 @@ Please visit our Website: http://www.httrack.com
#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
// COPY OF cmdl_ins in htscoremain.c
/* Bytes left in x_argvblk from offset ptr. The offset can in principle outrun
the block (alias/doit.log expansion), so the copy aborts cleanly instead of
the subtraction wrapping to a huge unbounded size. */
#define cmdl_room(bufsize, ptr) \
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
// Insert a command in the argc/argv (buff has total capacity bufsize)
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
{ \
int i; \
for (i = argc; i > 0; i--) \
argv[i] = argv[i - 1]; \
} \
argv[0] = (buff + ptr); \
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
ptr += (int) (strlen(argv[0]) + 1); \
argc++
// END OF COPY OF cmdl_ins in htscoremain.c
/*
Aliases for command-line and config file definitions
These definitions can be used:

View File

@@ -52,6 +52,34 @@ const char *opttype_value(int p);
const char *opthelp_value(int p);
const char *hts_gethome(void);
void expand_home(String * str);
/* Command-line argv-block builders, shared by htscoremain.c (the CLI parser)
and htsalias.c (config-file alias expansion). Tokens are packed back-to-back
into x_argvblk (total capacity bufsize); each argv[] entry points into the
block. cmdl_room bounds every copy: the running offset ptr can outrun the
block (alias / doit.log expansion outpacing the +32768 slack), so it yields
0 rather than a wrapped size_t and the bounded copy aborts cleanly. */
#define cmdl_room(bufsize, ptr) \
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
/* Append a token as a new argv[argc]. */
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
argv[argc] = (buff + ptr); \
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
ptr += (int) (strlen(argv[argc]) + 1); \
argc++
/* Insert a token at argv[0], shifting the existing argc entries up by one. */
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
{ \
int i; \
for (i = argc; i > 0; i--) \
argv[i] = argv[i - 1]; \
} \
argv[0] = (buff + ptr); \
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
ptr += (int) (strlen(argv[0]) + 1); \
argc++
#endif
#endif

View File

@@ -69,31 +69,6 @@ Please visit our Website: http://www.httrack.com
/* Resolver */
extern int IPV6_resolver;
/* Remaining room in the argv block; 0 once it is exhausted (alias expansion or
doit.log insertion can outrun the +32768 slack), so the copy aborts cleanly
instead of the subtraction wrapping to a huge unbounded size. */
#define cmdl_room(bufsize, ptr) \
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
// Add a command in the argc/argv (buff has total capacity bufsize)
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
argv[argc] = (buff + ptr); \
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
ptr += (int) (strlen(argv[argc]) + 2); \
argc++
// Insert a command in the argc/argv (buff has total capacity bufsize)
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
{ \
int i; \
for (i = argc; i > 0; i--) \
argv[i] = argv[i - 1]; \
} \
argv[0] = (buff + ptr); \
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
ptr += (int) (strlen(argv[0]) + 2); \
argc++
#define htsmain_free() do { \
if (url != NULL) { \
free(url); \

View File

@@ -0,0 +1,92 @@
#!/bin/bash
#
# doit.log reprise (no network). Re-running httrack in a mirror directory with
# NO url makes the engine read hts-cache/doit.log and re-insert every recorded
# argument into the command line through htscoremain.c's cmdl_ins macro (the
# x_argvblk builder). That path is distinct from the rc-file one in
# 01_engine-rcfile.test (htsalias.c) and from the url-on-command-line update in
# 02_update-cache.test, and nothing else exercises it. Two properties:
# 1. A multi-token reprise re-mirrors cleanly: every token (the url and each
# option) survives the back-to-back packing, so the no-url run reproduces
# the file set with no errors. A packing/bound bug corrupts a later token
# and surfaces as an error or a missing file.
# 2. The reprise actually re-crawls through the inserted url: changing a source
# file and re-running with no url picks up the new content.
set -euo pipefail
# Resolve httrack to an absolute path before we cd: PATH may hold a
# build-relative entry that would not resolve from the temp directory.
bin=$(command -v httrack) || {
echo "FAIL: httrack not found on PATH"
exit 1
}
case "$bin" in
/*) ;;
*) bin="$(cd "$(dirname "$bin")" && pwd)/$(basename "$bin")" ;;
esac
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_doitlog.XXXXXX") || exit 1
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
site="$tmp/site"
out="$tmp/out"
mkdir -p "$site/sub"
printf '<a href="a.html">a</a> <a href="sub/b.html">b</a>' >"$site/index.html"
echo 'OLDCONTENT' >"$site/a.html"
echo '<p>bbb</p>' >"$site/sub/b.html"
url="file://$site/index.html"
# count Error: lines in the log (grep -c exits 1 on zero matches: guard it)
errors() { grep -ciE '^[0-9:]*[[:space:]]Error:' "$out/hts-log.txt" || true; }
# initial mirror with the url and a handful of options, so doit.log records a
# multi-token command line for cmdl_ins to re-insert one token at a time.
rc=0
"$bin" "$url" -O "$out" --quiet -n -%v0 -r3 >/dev/null 2>&1 || rc=$?
test "$rc" -eq 0 || {
echo "FAIL: initial mirror exited $rc"
exit 1
}
test -f "$out/hts-cache/doit.log" || {
echo "FAIL: doit.log not written by the initial mirror"
exit 1
}
# --- 1. no-url reprise re-mirrors cleanly -----------------------------------
# No url on the command line, so the engine loads doit.log and re-inserts the
# recorded arguments (cmdl_ins). -O selects the mirror; argv carries no url.
rc=0
"$bin" -O "$out" --quiet >/dev/null 2>&1 || rc=$?
test "$rc" -eq 0 || {
echo "FAIL: doit.log reprise exited $rc"
exit 1
}
test "$(errors)" = 0 || {
echo "FAIL: doit.log reprise reported errors (a token may have been corrupted)"
grep -iE 'Error:' "$out/hts-log.txt" | head -3
exit 1
}
for suffix in a.html sub/b.html; do
test -n "$(find "$out" -path "*/$suffix" -print -quit)" || {
echo "FAIL: $suffix missing after the no-url reprise"
exit 1
}
done
# --- 2. the reprise re-crawls through the inserted url -----------------------
sleep 1
echo 'NEWCONTENT' >"$site/a.html"
rc=0
"$bin" -O "$out" --quiet >/dev/null 2>&1 || rc=$?
test "$rc" -eq 0 || {
echo "FAIL: second reprise exited $rc"
exit 1
}
grep -q NEWCONTENT "$(find "$out" -path '*/a.html' -print -quit)" || {
echo "FAIL: reprise did not pick up the changed source (inserted url not re-crawled)"
exit 1
}
exit 0

View File

@@ -19,6 +19,7 @@ TESTS = \
01_engine-cache.test \
01_engine-charset.test \
01_engine-cmdline.test \
01_engine-doitlog.test \
01_engine-entities.test \
01_engine-filter.test \
01_engine-hashtable.test \