mirror of
https://github.com/xroche/httrack.git
synced 2026-06-27 12:37:05 +03:00
Compare commits
1 Commits
master
...
fix/fileli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
74d6326413 |
@@ -736,26 +736,39 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
/* OPTIMIZED for fast load */
|
||||
if (StringNotEmpty(opt->filelist)) {
|
||||
char *filelist_buff = NULL;
|
||||
const size_t filelist_sz = off_t_to_size_t(fsize(StringBuff(opt->filelist)));
|
||||
size_t filelist_sz = 0;
|
||||
const char *filelist_err = NULL; /* failure reason, NULL on success */
|
||||
const off_t fs = fsize(StringBuff(opt->filelist));
|
||||
|
||||
if (filelist_sz != (size_t) -1) {
|
||||
if (fs < 0) {
|
||||
/* fsize() hides the cause; redo stat() for a precise errno (#49) */
|
||||
struct stat st;
|
||||
filelist_err = stat(StringBuff(opt->filelist), &st) != 0
|
||||
? strerror(errno)
|
||||
: "not a regular file";
|
||||
} else if ((filelist_sz = off_t_to_size_t(fs)) == (size_t) -1) {
|
||||
filelist_err = "file too large";
|
||||
filelist_sz = 0;
|
||||
} else {
|
||||
FILE *fp = fopen(StringBuff(opt->filelist), "rb");
|
||||
|
||||
if (fp) {
|
||||
if (fp == NULL) {
|
||||
filelist_err = strerror(errno);
|
||||
} else {
|
||||
filelist_buff = malloct(filelist_sz + 1);
|
||||
if (filelist_buff) {
|
||||
if (fread(filelist_buff, 1, filelist_sz, fp) != filelist_sz) {
|
||||
freet(filelist_buff);
|
||||
filelist_buff = NULL;
|
||||
} else {
|
||||
*(filelist_buff + filelist_sz) = '\0';
|
||||
}
|
||||
if (filelist_buff == NULL) {
|
||||
filelist_err = "out of memory";
|
||||
} else if (fread(filelist_buff, 1, filelist_sz, fp) != filelist_sz) {
|
||||
freet(filelist_buff);
|
||||
filelist_err = "read error";
|
||||
} else {
|
||||
filelist_buff[filelist_sz] = '\0';
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
if (filelist_buff) {
|
||||
if (filelist_buff != NULL) {
|
||||
int filelist_ptr = 0;
|
||||
int n = 0;
|
||||
char BIGSTK line[HTS_URLMAXSIZE * 2];
|
||||
@@ -780,8 +793,8 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
// Free buffer
|
||||
freet(filelist_buff);
|
||||
} else {
|
||||
hts_log_print(opt, LOG_ERROR, "Could not include URL list: %s",
|
||||
StringBuff(opt->filelist));
|
||||
hts_log_print(opt, LOG_ERROR, "Could not include URL list \"%s\": %s",
|
||||
StringBuff(opt->filelist), filelist_err);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
65
tests/01_engine-filelist.test
Normal file
65
tests/01_engine-filelist.test
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# -%L URL-list loading (#49): a readable list is honored; an unusable one fails
|
||||
# with the reason (errno / not-a-regular-file), not a bare "Could not include
|
||||
# URL list". Offline: file:// fixture, no server. Asserts on httrack's own
|
||||
# strings and the message shape, so it is locale-independent.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_filelist.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
echo '<html><body>hi</body></html>' >"$tmp/index.html"
|
||||
|
||||
# run httrack with the given -%L target; structured log lands in $out/hts-log.txt
|
||||
run() {
|
||||
local out="$1" list="$2"
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack -O "$out" --quiet -n "-%L" "$list" >"$out/.stdout" 2>&1 || true
|
||||
LOG="$out/hts-log.txt"
|
||||
}
|
||||
|
||||
fail() {
|
||||
echo "FAIL: $1"
|
||||
cat "$LOG"
|
||||
exit 1
|
||||
}
|
||||
loghas() {
|
||||
grep -Eq "$1" "$LOG" || fail "expected /$1/ in $LOG"
|
||||
}
|
||||
lognot() {
|
||||
if grep -Eq "$1" "$LOG"; then fail "unexpected /$1/ in $LOG"; fi
|
||||
}
|
||||
|
||||
# readable list: its one URL is loaded and counted (count must be non-zero)
|
||||
printf 'file://%s/index.html\n' "$tmp" >"$tmp/urls.txt"
|
||||
run "$tmp/ok" "$tmp/urls.txt"
|
||||
loghas '[1-9][0-9]* links added from'
|
||||
|
||||
# missing file: quoted name + a non-empty reason, never the old reasonless
|
||||
# "Could not include URL list: <name>". The reason is the stat() errno, not the
|
||||
# directory fallback literal (guards against dropping the errno lookup).
|
||||
run "$tmp/miss" "$tmp/nope.txt"
|
||||
loghas 'Could not include URL list "[^"]+": .+'
|
||||
lognot 'Could not include URL list: '
|
||||
lognot 'not a regular file'
|
||||
|
||||
# a directory is rejected with our own reason (locale-independent)
|
||||
mkdir -p "$tmp/adir"
|
||||
run "$tmp/dir" "$tmp/adir"
|
||||
loghas 'Could not include URL list "[^"]+": not a regular file'
|
||||
|
||||
# unreadable regular file: the fopen() errno arm fires, distinct from the
|
||||
# directory branch. Root bypasses mode 000, so skip it there.
|
||||
if test "$(id -u)" -ne 0; then
|
||||
: >"$tmp/noperm.txt"
|
||||
chmod 000 "$tmp/noperm.txt"
|
||||
run "$tmp/perm" "$tmp/noperm.txt"
|
||||
chmod 644 "$tmp/noperm.txt"
|
||||
loghas 'Could not include URL list "[^"]+": .+'
|
||||
lognot 'not a regular file'
|
||||
fi
|
||||
|
||||
exit 0
|
||||
@@ -34,6 +34,7 @@ TESTS = \
|
||||
01_engine-dns.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filelist.test \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
01_engine-idna.test \
|
||||
|
||||
Reference in New Issue
Block a user