mirror of
https://github.com/xroche/httrack.git
synced 2026-06-19 16:53:18 +03:00
Compare commits
6 Commits
feature/ap
...
fix/copy-h
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4549ec3695 | ||
|
|
ac56c31b24 | ||
|
|
ee6beeeb7d | ||
|
|
6788bda380 | ||
|
|
7ead8d595e | ||
|
|
93f502990c |
@@ -3702,7 +3702,9 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->maxsoc > 0)
|
||||
to->maxsoc = from->maxsoc;
|
||||
|
||||
if (from->nearlink > -1)
|
||||
/* hts_boolean/enum fields are unsigned (GCC), so a bare `> -1` unset-guard
|
||||
is always false; cast to int to keep the -1 "unset" sentinel test. */
|
||||
if ((int) from->nearlink > -1)
|
||||
to->nearlink = from->nearlink;
|
||||
|
||||
if (from->timeout > -1)
|
||||
@@ -3729,10 +3731,10 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->hostcontrol > -1)
|
||||
to->hostcontrol = from->hostcontrol;
|
||||
|
||||
if (from->errpage > -1)
|
||||
if ((int) from->errpage > -1)
|
||||
to->errpage = from->errpage;
|
||||
|
||||
if (from->parseall > -1)
|
||||
if ((int) from->parseall > -1)
|
||||
to->parseall = from->parseall;
|
||||
|
||||
// test all: bit 8 de travel
|
||||
|
||||
@@ -1991,7 +1991,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
case 'v':
|
||||
opt->verbosedisplay = 2;
|
||||
if (isdigit((unsigned char) *(com + 1))) {
|
||||
sscanf(com + 1, "%d", &opt->verbosedisplay);
|
||||
sscanf(com + 1, "%d", (int *) &opt->verbosedisplay);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
}
|
||||
@@ -2006,7 +2006,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
case 'N':
|
||||
opt->savename_delayed = 2;
|
||||
if (isdigit((unsigned char) *(com + 1))) {
|
||||
sscanf(com + 1, "%d", &opt->savename_delayed);
|
||||
sscanf(com + 1, "%d", (int *) &opt->savename_delayed);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
}
|
||||
@@ -3096,6 +3096,41 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
htsmain_free();
|
||||
return 0;
|
||||
break;
|
||||
case '9': { // copy_htsopt selftest: httrack -#9
|
||||
httrackp *from = hts_create_opt();
|
||||
httrackp *to = hts_create_opt();
|
||||
int err = 0;
|
||||
|
||||
/* from-values differ from both the to-values and the
|
||||
hts_create_opt() defaults (nearlink FALSE, errpage/parseall
|
||||
TRUE), so a copy that no-ops or just resets to defaults is
|
||||
caught too, not only the unsigned-guard bug. */
|
||||
from->retry = 7; /* int field: positive control */
|
||||
to->retry = 0;
|
||||
from->nearlink = HTS_TRUE;
|
||||
to->nearlink = HTS_FALSE;
|
||||
from->errpage = HTS_FALSE;
|
||||
to->errpage = HTS_TRUE;
|
||||
from->parseall = HTS_FALSE;
|
||||
to->parseall = HTS_TRUE;
|
||||
|
||||
copy_htsopt(from, to);
|
||||
|
||||
if (to->retry != 7)
|
||||
err = 1;
|
||||
if (to->nearlink != HTS_TRUE)
|
||||
err = 1;
|
||||
if (to->errpage != HTS_FALSE)
|
||||
err = 1;
|
||||
if (to->parseall != HTS_FALSE)
|
||||
err = 1;
|
||||
|
||||
hts_free_opt(from);
|
||||
hts_free_opt(to);
|
||||
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
} break;
|
||||
case '!':
|
||||
HTS_PANIC_PRINTF
|
||||
("Option #! is disabled for security reasons");
|
||||
|
||||
38
src/htsopt.h
38
src/htsopt.h
@@ -342,17 +342,37 @@ typedef enum hts_seeker {
|
||||
HTS_SEEKER_UP = 1 << 1 /**< may ascend to parent directories */
|
||||
} hts_seeker;
|
||||
|
||||
/* Link-following scope, stored in the low byte of opt->travel. */
|
||||
/* opt->travel: link-following scope in the low byte, flags OR'd in above it. */
|
||||
typedef enum hts_travel_scope {
|
||||
HTS_TRAVEL_SAME_ADDRESS = 0, /**< stay on the same address (host) */
|
||||
HTS_TRAVEL_SAME_DOMAIN = 1, /**< stay on the same principal domain */
|
||||
HTS_TRAVEL_SAME_TLD = 2, /**< stay on the same TLD (e.g. .com) */
|
||||
HTS_TRAVEL_EVERYWHERE = 7 /**< follow links anywhere on the web */
|
||||
HTS_TRAVEL_EVERYWHERE = 7, /**< follow links anywhere on the web */
|
||||
HTS_TRAVEL_TEST_ALL = 1 << 8 /**< also test forbidden URLs (-t) */
|
||||
} hts_travel_scope;
|
||||
|
||||
/* Flags OR'd into opt->travel above the scope value. */
|
||||
#define HTS_TRAVEL_SCOPE_MASK 0xff /**< mask selecting the scope value */
|
||||
#define HTS_TRAVEL_TEST_ALL (1 << 8) /**< also test forbidden URLs (-t) */
|
||||
/* Mask selecting the scope value out of opt->travel. */
|
||||
#define HTS_TRAVEL_SCOPE_MASK 0xff
|
||||
|
||||
/* Text progress display detail (opt->verbosedisplay). */
|
||||
typedef enum hts_verbosedisplay {
|
||||
HTS_VERBOSE_NONE = 0, /**< no animated progress display (default) */
|
||||
HTS_VERBOSE_SIMPLE = 1, /**< minimal single-line progress */
|
||||
HTS_VERBOSE_FULL = 2 /**< full animated progress */
|
||||
} hts_verbosedisplay;
|
||||
|
||||
/* Delayed file-type resolution policy (opt->savename_delayed). */
|
||||
typedef enum hts_savename_delayed {
|
||||
HTS_SAVENAME_DELAYED_NONE = 0, /**< resolve the type immediately */
|
||||
HTS_SAVENAME_DELAYED_SOFT = 1, /**< delay the type check when unknown */
|
||||
HTS_SAVENAME_DELAYED_HARD = 2 /**< always delay the type check (default) */
|
||||
} hts_savename_delayed;
|
||||
|
||||
/* Host-banning triggers (opt->hostcontrol bitmask). */
|
||||
typedef enum hts_hostcontrol {
|
||||
HTS_HOSTCONTROL_BAN_TIMEOUT = 1 << 0, /**< ban a timing-out host */
|
||||
HTS_HOSTCONTROL_BAN_SLOW = 1 << 1 /**< ban a too-slow host */
|
||||
} hts_hostcontrol;
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_lien_buffers
|
||||
#define HTS_DEF_FWSTRUCT_lien_buffers
|
||||
@@ -386,7 +406,7 @@ struct httrackp {
|
||||
hts_urlmode
|
||||
urlmode; /**< saved-link rewriting style (relative, absolute, etc.) */
|
||||
hts_boolean no_type_change; // do not change file type according to MIME
|
||||
int debug; /**< debug logging level */
|
||||
hts_log_type debug; /**< debug logging level */
|
||||
int getmode; /**< what to fetch (HTML, images, ...) bitmask */
|
||||
FILE *log; /**< informational log stream; NULL mutes it */
|
||||
FILE *errlog; /**< error log stream; NULL mutes it */
|
||||
@@ -414,7 +434,7 @@ struct httrackp {
|
||||
int savename_type; /**< saved-name layout (original tree, flat, ...) */
|
||||
String
|
||||
savename_userdef; /**< user-defined name template (e.g. %h%p/%n%q.%t) */
|
||||
int savename_delayed; // delayed type check
|
||||
hts_savename_delayed savename_delayed; /**< delayed type-check policy */
|
||||
hts_boolean
|
||||
delayed_cached; // delayed type check can be cached to speedup updates
|
||||
hts_boolean mimehtml; /**< produce a single MIME/MHTML archive */
|
||||
@@ -430,7 +450,7 @@ struct httrackp {
|
||||
hts_boolean makestat; /**< maintain a transfer-statistics log */
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< drop hosts that are too slow, etc. */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
@@ -455,7 +475,7 @@ struct httrackp {
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
int verbosedisplay; /**< animated text progress display */
|
||||
hts_verbosedisplay verbosedisplay; /**< animated text progress display */
|
||||
String footer; /**< footer/info line injected into pages */
|
||||
int maxcache; /**< in-memory cache backing limit (bytes) */
|
||||
// int maxcache_anticipate; // maximum links to anticipate (upper bound)
|
||||
|
||||
@@ -3722,7 +3722,8 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
|
||||
//case -1: can_retry=1; break;
|
||||
case STATUSCODE_TIMEOUT:
|
||||
if (opt->hostcontrol) { // timeout et retry épuisés
|
||||
if ((opt->hostcontrol & 1) && (heap(ptr)->retry <= 0)) {
|
||||
if ((opt->hostcontrol & HTS_HOSTCONTROL_BAN_TIMEOUT) &&
|
||||
(heap(ptr)->retry <= 0)) {
|
||||
hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr(), urlfil());
|
||||
host_ban(opt, ptr, sback, jump_identification_const(urladr()));
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
@@ -3735,7 +3736,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
|
||||
break;
|
||||
case STATUSCODE_SLOW:
|
||||
if ((opt->hostcontrol) && (heap(ptr)->retry <= 0)) { // too slow
|
||||
if (opt->hostcontrol & 2) {
|
||||
if (opt->hostcontrol & HTS_HOSTCONTROL_BAN_SLOW) {
|
||||
hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr(), urlfil());
|
||||
host_ban(opt, ptr, sback, jump_identification_const(urladr()));
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
|
||||
17
tests/01_engine-copyopt.test
Executable file
17
tests/01_engine-copyopt.test
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Regression guard for the unsigned-enum sentinel trap: copy_htsopt's
|
||||
# `if (from->X > -1)` guard is always false for unsigned hts_boolean fields, so
|
||||
# they silently stop being copied. Driven by the in-process 'httrack -#9' test.
|
||||
# Keep POSIX-portable (harness runs it via $(BASH), a plain /bin/sh on macOS).
|
||||
|
||||
set -eu
|
||||
|
||||
# A trailing token is required; a bare '-#9' falls through to the usage screen.
|
||||
out=$(httrack -#9 run)
|
||||
|
||||
# Exact-match the success line so a fall-through to usage can't pass the test.
|
||||
test "$out" = "copy-htsopt: OK" || {
|
||||
echo "expected 'copy-htsopt: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
@@ -24,6 +24,7 @@ TESTS = \
|
||||
01_engine-cache-golden.test \
|
||||
01_engine-charset.test \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-copyopt.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
|
||||
Reference in New Issue
Block a user