mirror of
https://github.com/xroche/httrack.git
synced 2026-06-18 00:04:12 +03:00
Compare commits
2 Commits
tests/stri
...
cleanup/cm
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c177923fa1 | ||
|
|
7091f85104 |
@@ -41,25 +41,6 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
|
||||
|
||||
// COPY OF cmdl_ins in htscoremain.c
|
||||
/* Bytes left in x_argvblk from offset ptr. The offset can in principle outrun
|
||||
the block (alias/doit.log expansion), so the copy aborts cleanly instead of
|
||||
the subtraction wrapping to a huge unbounded size. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
// Insert a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 1); \
|
||||
argc++
|
||||
// END OF COPY OF cmdl_ins in htscoremain.c
|
||||
|
||||
/*
|
||||
Aliases for command-line and config file definitions
|
||||
These definitions can be used:
|
||||
|
||||
@@ -52,6 +52,34 @@ const char *opttype_value(int p);
|
||||
const char *opthelp_value(int p);
|
||||
const char *hts_gethome(void);
|
||||
void expand_home(String * str);
|
||||
|
||||
/* Command-line argv-block builders, shared by htscoremain.c (the CLI parser)
|
||||
and htsalias.c (config-file alias expansion). Tokens are packed back-to-back
|
||||
into x_argvblk (total capacity bufsize); each argv[] entry points into the
|
||||
block. cmdl_room bounds every copy: the running offset ptr can outrun the
|
||||
block (alias / doit.log expansion outpacing the +32768 slack), so it yields
|
||||
0 rather than a wrapped size_t and the bounded copy aborts cleanly. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
|
||||
/* Append a token as a new argv[argc]. */
|
||||
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
|
||||
argv[argc] = (buff + ptr); \
|
||||
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[argc]) + 1); \
|
||||
argc++
|
||||
|
||||
/* Insert a token at argv[0], shifting the existing argc entries up by one. */
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 1); \
|
||||
argc++
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -69,31 +69,6 @@ Please visit our Website: http://www.httrack.com
|
||||
/* Resolver */
|
||||
extern int IPV6_resolver;
|
||||
|
||||
/* Remaining room in the argv block; 0 once it is exhausted (alias expansion or
|
||||
doit.log insertion can outrun the +32768 slack), so the copy aborts cleanly
|
||||
instead of the subtraction wrapping to a huge unbounded size. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
|
||||
// Add a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
|
||||
argv[argc] = (buff + ptr); \
|
||||
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[argc]) + 2); \
|
||||
argc++
|
||||
|
||||
// Insert a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 2); \
|
||||
argc++
|
||||
|
||||
#define htsmain_free() do { \
|
||||
if (url != NULL) { \
|
||||
free(url); \
|
||||
|
||||
92
tests/01_engine-doitlog.test
Normal file
92
tests/01_engine-doitlog.test
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# doit.log reprise (no network). Re-running httrack in a mirror directory with
|
||||
# NO url makes the engine read hts-cache/doit.log and re-insert every recorded
|
||||
# argument into the command line through htscoremain.c's cmdl_ins macro (the
|
||||
# x_argvblk builder). That path is distinct from the rc-file one in
|
||||
# 01_engine-rcfile.test (htsalias.c) and from the url-on-command-line update in
|
||||
# 02_update-cache.test, and nothing else exercises it. Two properties:
|
||||
# 1. A multi-token reprise re-mirrors cleanly: every token (the url and each
|
||||
# option) survives the back-to-back packing, so the no-url run reproduces
|
||||
# the file set with no errors. A packing/bound bug corrupts a later token
|
||||
# and surfaces as an error or a missing file.
|
||||
# 2. The reprise actually re-crawls through the inserted url: changing a source
|
||||
# file and re-running with no url picks up the new content.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Resolve httrack to an absolute path before we cd: PATH may hold a
|
||||
# build-relative entry that would not resolve from the temp directory.
|
||||
bin=$(command -v httrack) || {
|
||||
echo "FAIL: httrack not found on PATH"
|
||||
exit 1
|
||||
}
|
||||
case "$bin" in
|
||||
/*) ;;
|
||||
*) bin="$(cd "$(dirname "$bin")" && pwd)/$(basename "$bin")" ;;
|
||||
esac
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_doitlog.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
site="$tmp/site"
|
||||
out="$tmp/out"
|
||||
mkdir -p "$site/sub"
|
||||
printf '<a href="a.html">a</a> <a href="sub/b.html">b</a>' >"$site/index.html"
|
||||
echo 'OLDCONTENT' >"$site/a.html"
|
||||
echo '<p>bbb</p>' >"$site/sub/b.html"
|
||||
url="file://$site/index.html"
|
||||
|
||||
# count Error: lines in the log (grep -c exits 1 on zero matches: guard it)
|
||||
errors() { grep -ciE '^[0-9:]*[[:space:]]Error:' "$out/hts-log.txt" || true; }
|
||||
|
||||
# initial mirror with the url and a handful of options, so doit.log records a
|
||||
# multi-token command line for cmdl_ins to re-insert one token at a time.
|
||||
rc=0
|
||||
"$bin" "$url" -O "$out" --quiet -n -%v0 -r3 >/dev/null 2>&1 || rc=$?
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: initial mirror exited $rc"
|
||||
exit 1
|
||||
}
|
||||
test -f "$out/hts-cache/doit.log" || {
|
||||
echo "FAIL: doit.log not written by the initial mirror"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# --- 1. no-url reprise re-mirrors cleanly -----------------------------------
|
||||
# No url on the command line, so the engine loads doit.log and re-inserts the
|
||||
# recorded arguments (cmdl_ins). -O selects the mirror; argv carries no url.
|
||||
rc=0
|
||||
"$bin" -O "$out" --quiet >/dev/null 2>&1 || rc=$?
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: doit.log reprise exited $rc"
|
||||
exit 1
|
||||
}
|
||||
test "$(errors)" = 0 || {
|
||||
echo "FAIL: doit.log reprise reported errors (a token may have been corrupted)"
|
||||
grep -iE 'Error:' "$out/hts-log.txt" | head -3
|
||||
exit 1
|
||||
}
|
||||
for suffix in a.html sub/b.html; do
|
||||
test -n "$(find "$out" -path "*/$suffix" -print -quit)" || {
|
||||
echo "FAIL: $suffix missing after the no-url reprise"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
|
||||
# --- 2. the reprise re-crawls through the inserted url -----------------------
|
||||
sleep 1
|
||||
echo 'NEWCONTENT' >"$site/a.html"
|
||||
rc=0
|
||||
"$bin" -O "$out" --quiet >/dev/null 2>&1 || rc=$?
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: second reprise exited $rc"
|
||||
exit 1
|
||||
}
|
||||
grep -q NEWCONTENT "$(find "$out" -path '*/a.html' -print -quit)" || {
|
||||
echo "FAIL: reprise did not pick up the changed source (inserted url not re-crawled)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
exit 0
|
||||
@@ -19,6 +19,7 @@ TESTS = \
|
||||
01_engine-cache.test \
|
||||
01_engine-charset.test \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
|
||||
Reference in New Issue
Block a user