Compare commits

...

1 Commits

Author SHA1 Message Date
Xavier Roche
74d6326413 htscore: report why a -%L URL list could not be loaded (#49)
A missing, unreadable, or non-regular -%L file all collapsed into one
reasonless "Could not include URL list: <name>", which is what left the
#49 reporter unable to tell why the list was rejected. Open and stat()
the file explicitly so the log carries the cause: the errno text (no
such file, permission denied), "not a regular file", or "file too
large". The loader keeps the original regular-file guard, so it still
won't open a directory or FIFO.

Covered by an offline file:// test: a readable list loads with a
non-zero count, while a missing file, an unreadable file, and a
directory each fail with a distinct reason instead of the bare message.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-26 20:48:05 +02:00
3 changed files with 92 additions and 13 deletions

View File

@@ -736,26 +736,39 @@ int httpmirror(char *url1, httrackp * opt) {
/* OPTIMIZED for fast load */
if (StringNotEmpty(opt->filelist)) {
char *filelist_buff = NULL;
const size_t filelist_sz = off_t_to_size_t(fsize(StringBuff(opt->filelist)));
size_t filelist_sz = 0;
const char *filelist_err = NULL; /* failure reason, NULL on success */
const off_t fs = fsize(StringBuff(opt->filelist));
if (filelist_sz != (size_t) -1) {
if (fs < 0) {
/* fsize() hides the cause; redo stat() for a precise errno (#49) */
struct stat st;
filelist_err = stat(StringBuff(opt->filelist), &st) != 0
? strerror(errno)
: "not a regular file";
} else if ((filelist_sz = off_t_to_size_t(fs)) == (size_t) -1) {
filelist_err = "file too large";
filelist_sz = 0;
} else {
FILE *fp = fopen(StringBuff(opt->filelist), "rb");
if (fp) {
if (fp == NULL) {
filelist_err = strerror(errno);
} else {
filelist_buff = malloct(filelist_sz + 1);
if (filelist_buff) {
if (fread(filelist_buff, 1, filelist_sz, fp) != filelist_sz) {
freet(filelist_buff);
filelist_buff = NULL;
} else {
*(filelist_buff + filelist_sz) = '\0';
}
if (filelist_buff == NULL) {
filelist_err = "out of memory";
} else if (fread(filelist_buff, 1, filelist_sz, fp) != filelist_sz) {
freet(filelist_buff);
filelist_err = "read error";
} else {
filelist_buff[filelist_sz] = '\0';
}
fclose(fp);
}
}
if (filelist_buff) {
if (filelist_buff != NULL) {
int filelist_ptr = 0;
int n = 0;
char BIGSTK line[HTS_URLMAXSIZE * 2];
@@ -780,8 +793,8 @@ int httpmirror(char *url1, httrackp * opt) {
// Free buffer
freet(filelist_buff);
} else {
hts_log_print(opt, LOG_ERROR, "Could not include URL list: %s",
StringBuff(opt->filelist));
hts_log_print(opt, LOG_ERROR, "Could not include URL list \"%s\": %s",
StringBuff(opt->filelist), filelist_err);
}
}

View File

@@ -0,0 +1,65 @@
#!/bin/bash
#
# -%L URL-list loading (#49): a readable list is honored; an unusable one fails
# with the reason (errno / not-a-regular-file), not a bare "Could not include
# URL list". Offline: file:// fixture, no server. Asserts on httrack's own
# strings and the message shape, so it is locale-independent.
set -euo pipefail
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_filelist.XXXXXX") || exit 1
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
echo '<html><body>hi</body></html>' >"$tmp/index.html"
# run httrack with the given -%L target; structured log lands in $out/hts-log.txt
run() {
local out="$1" list="$2"
rm -rf "$out"
mkdir -p "$out"
httrack -O "$out" --quiet -n "-%L" "$list" >"$out/.stdout" 2>&1 || true
LOG="$out/hts-log.txt"
}
fail() {
echo "FAIL: $1"
cat "$LOG"
exit 1
}
loghas() {
grep -Eq "$1" "$LOG" || fail "expected /$1/ in $LOG"
}
lognot() {
if grep -Eq "$1" "$LOG"; then fail "unexpected /$1/ in $LOG"; fi
}
# readable list: its one URL is loaded and counted (count must be non-zero)
printf 'file://%s/index.html\n' "$tmp" >"$tmp/urls.txt"
run "$tmp/ok" "$tmp/urls.txt"
loghas '[1-9][0-9]* links added from'
# missing file: quoted name + a non-empty reason, never the old reasonless
# "Could not include URL list: <name>". The reason is the stat() errno, not the
# directory fallback literal (guards against dropping the errno lookup).
run "$tmp/miss" "$tmp/nope.txt"
loghas 'Could not include URL list "[^"]+": .+'
lognot 'Could not include URL list: '
lognot 'not a regular file'
# a directory is rejected with our own reason (locale-independent)
mkdir -p "$tmp/adir"
run "$tmp/dir" "$tmp/adir"
loghas 'Could not include URL list "[^"]+": not a regular file'
# unreadable regular file: the fopen() errno arm fires, distinct from the
# directory branch. Root bypasses mode 000, so skip it there.
if test "$(id -u)" -ne 0; then
: >"$tmp/noperm.txt"
chmod 000 "$tmp/noperm.txt"
run "$tmp/perm" "$tmp/noperm.txt"
chmod 644 "$tmp/noperm.txt"
loghas 'Could not include URL list "[^"]+": .+'
lognot 'not a regular file'
fi
exit 0

View File

@@ -34,6 +34,7 @@ TESTS = \
01_engine-dns.test \
01_engine-doitlog.test \
01_engine-entities.test \
01_engine-filelist.test \
01_engine-filter.test \
01_engine-hashtable.test \
01_engine-idna.test \