mirror of
https://github.com/xroche/httrack.git
synced 2026-06-19 00:33:03 +03:00
Compare commits
12 Commits
feature/ap
...
feature/ap
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b440c44b5 | ||
|
|
ac6dd1a570 | ||
|
|
4549ec3695 | ||
|
|
ac56c31b24 | ||
|
|
ee6beeeb7d | ||
|
|
6788bda380 | ||
|
|
7ead8d595e | ||
|
|
93f502990c | ||
|
|
0f4b2596b2 | ||
|
|
4a676bb5e1 | ||
|
|
36b4e834b8 | ||
|
|
bbb423f025 |
@@ -3838,7 +3838,7 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
/* funny log for commandline users */
|
||||
//if (!opt->quiet) {
|
||||
// petite animation
|
||||
if (opt->verbosedisplay == 1) {
|
||||
if (opt->verbosedisplay == HTS_VERBOSE_SIMPLE) {
|
||||
if (back[i].status == STATUS_READY) {
|
||||
if (back[i].r.statuscode == HTTP_OK)
|
||||
printf("* %s%s (" LLintP " bytes) - OK" VT_CLREOL "\r",
|
||||
|
||||
@@ -135,7 +135,8 @@ HTSEXT_API T_SOC catch_url_init(int *port, /* 128 bytes */ char *adr) {
|
||||
// returns 0 if error
|
||||
// url: buffer where URL must be stored - or ip:port in case of failure
|
||||
// data: 32Kb
|
||||
HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data) {
|
||||
HTSEXT_API hts_boolean catch_url(T_SOC soc, char *url, char *method,
|
||||
char *data) {
|
||||
int retour = 0;
|
||||
|
||||
// connexion (accept)
|
||||
|
||||
@@ -2585,7 +2585,7 @@ static int mkdir_compat(const char *pathname) {
|
||||
|
||||
/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */
|
||||
/* Note: preserve errno */
|
||||
HTSEXT_API int dir_exists(const char *path) {
|
||||
HTSEXT_API hts_boolean dir_exists(const char *path) {
|
||||
const int err = errno;
|
||||
STRUCT_STAT st;
|
||||
char BIGSTK file[HTS_URLMAXSIZE * 2];
|
||||
@@ -3342,7 +3342,8 @@ int back_fill(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
int ptr, int numero_passe) {
|
||||
int n = back_pluggable_sockets(sback, opt);
|
||||
|
||||
if (opt->savename_delayed == 2 && !opt->delayed_cached) /* cancel (always delayed) */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD &&
|
||||
!opt->delayed_cached) /* cancel (always delayed) */
|
||||
return 0;
|
||||
if (n > 0) {
|
||||
int p;
|
||||
@@ -3646,7 +3647,7 @@ HTSEXT_API int hts_setpause(httrackp * opt, int p) {
|
||||
}
|
||||
|
||||
// ask for termination
|
||||
HTSEXT_API int hts_request_stop(httrackp * opt, int force) {
|
||||
HTSEXT_API int hts_request_stop(httrackp *opt, hts_boolean force) {
|
||||
if (opt != NULL) {
|
||||
hts_log_print(opt, LOG_ERROR, "Exit requested by shell or user");
|
||||
hts_mutexlock(&opt->state.lock);
|
||||
@@ -3656,7 +3657,7 @@ HTSEXT_API int hts_request_stop(httrackp * opt, int force) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
HTSEXT_API int hts_has_stopped(httrackp * opt) {
|
||||
HTSEXT_API hts_boolean hts_has_stopped(httrackp *opt) {
|
||||
int ended;
|
||||
hts_mutexlock(&opt->state.lock);
|
||||
ended = opt->state.is_ended;
|
||||
@@ -3678,12 +3679,12 @@ HTSEXT_API int hts_has_stopped(httrackp * opt) {
|
||||
//}
|
||||
// ajout d'URL
|
||||
// -1 : erreur
|
||||
HTSEXT_API int hts_addurl(httrackp * opt, char **url) {
|
||||
HTSEXT_API hts_boolean hts_addurl(httrackp *opt, char **url) {
|
||||
if (url)
|
||||
opt->state._hts_addurl = url;
|
||||
return (opt->state._hts_addurl != NULL);
|
||||
}
|
||||
HTSEXT_API int hts_resetaddurl(httrackp * opt) {
|
||||
HTSEXT_API hts_boolean hts_resetaddurl(httrackp *opt) {
|
||||
opt->state._hts_addurl = NULL;
|
||||
return (opt->state._hts_addurl != NULL);
|
||||
}
|
||||
@@ -3702,7 +3703,9 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->maxsoc > 0)
|
||||
to->maxsoc = from->maxsoc;
|
||||
|
||||
if (from->nearlink > -1)
|
||||
/* hts_boolean/enum fields are unsigned (GCC), so a bare `> -1` unset-guard
|
||||
is always false; cast to int to keep the -1 "unset" sentinel test. */
|
||||
if ((int) from->nearlink > -1)
|
||||
to->nearlink = from->nearlink;
|
||||
|
||||
if (from->timeout > -1)
|
||||
@@ -3729,10 +3732,10 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->hostcontrol > -1)
|
||||
to->hostcontrol = from->hostcontrol;
|
||||
|
||||
if (from->errpage > -1)
|
||||
if ((int) from->errpage > -1)
|
||||
to->errpage = from->errpage;
|
||||
|
||||
if (from->parseall > -1)
|
||||
if ((int) from->parseall > -1)
|
||||
to->parseall = from->parseall;
|
||||
|
||||
// test all: bit 8 de travel
|
||||
@@ -3844,7 +3847,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
|
||||
a = opt->savename_type;
|
||||
b = opt->savename_83;
|
||||
opt->savename_type = 0;
|
||||
opt->savename_83 = 0;
|
||||
opt->savename_83 = HTS_SAVENAME_83_LONG;
|
||||
// note: adr,fil peuvent être patchés
|
||||
r =
|
||||
url_savename(&afs, NULL, NULL, NULL, opt, sback, cache, hashptr, ptr, numero_passe,
|
||||
|
||||
@@ -612,12 +612,12 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
/* Terminal is a tty, may ask questions and display funny information */
|
||||
if (isatty(1)) {
|
||||
opt->quiet = 0;
|
||||
opt->verbosedisplay = 1;
|
||||
opt->verbosedisplay = HTS_VERBOSE_SIMPLE;
|
||||
}
|
||||
/* Not a tty, no stdin input or funny output! */
|
||||
else {
|
||||
opt->quiet = 1;
|
||||
opt->verbosedisplay = 0;
|
||||
opt->verbosedisplay = HTS_VERBOSE_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1783,7 +1783,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
break;
|
||||
//
|
||||
case 'b':
|
||||
sscanf(com + 1, "%d", &opt->accept_cookie);
|
||||
sscanf(com + 1, "%d", (int *) &opt->accept_cookie);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
break;
|
||||
@@ -1815,24 +1815,22 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
com++;
|
||||
}
|
||||
break;
|
||||
case 'L':
|
||||
{
|
||||
sscanf(com + 1, "%d", &opt->savename_83);
|
||||
switch (opt->savename_83) {
|
||||
case 0: // 8-3 (ISO9660 L1)
|
||||
opt->savename_83 = 1;
|
||||
break;
|
||||
case 1:
|
||||
opt->savename_83 = 0;
|
||||
break;
|
||||
default: // 2 == ISO9660 (ISO9660 L2)
|
||||
opt->savename_83 = 2;
|
||||
break;
|
||||
}
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
case 'L': {
|
||||
sscanf(com + 1, "%d", (int *) &opt->savename_83);
|
||||
switch (opt->savename_83) {
|
||||
case 0: // 8-3 (ISO9660 L1)
|
||||
opt->savename_83 = HTS_SAVENAME_83_DOS;
|
||||
break;
|
||||
case 1:
|
||||
opt->savename_83 = HTS_SAVENAME_83_LONG;
|
||||
break;
|
||||
default: // 2 == ISO9660 (ISO9660 L2)
|
||||
opt->savename_83 = HTS_SAVENAME_83_ISO9660;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
while (isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
} break;
|
||||
case 's':
|
||||
if (isdigit((unsigned char) *(com + 1))) {
|
||||
sscanf(com + 1, "%d", (int *) &opt->robots);
|
||||
@@ -1845,12 +1843,12 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
#endif
|
||||
break;
|
||||
case 'o':
|
||||
sscanf(com + 1, "%d", &opt->errpage);
|
||||
sscanf(com + 1, "%d", (int *) &opt->errpage);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
break;
|
||||
case 'u':
|
||||
sscanf(com + 1, "%d", &opt->check_type);
|
||||
sscanf(com + 1, "%d", (int *) &opt->check_type);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
break;
|
||||
@@ -1917,7 +1915,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
case 'I':
|
||||
opt->kindex = 1;
|
||||
if (isdigit((unsigned char) *(com + 1))) {
|
||||
sscanf(com + 1, "%d", &opt->kindex);
|
||||
sscanf(com + 1, "%d", (int *) &opt->kindex);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
}
|
||||
@@ -1989,9 +1987,9 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
}
|
||||
break; // url hack
|
||||
case 'v':
|
||||
opt->verbosedisplay = 2;
|
||||
opt->verbosedisplay = HTS_VERBOSE_FULL;
|
||||
if (isdigit((unsigned char) *(com + 1))) {
|
||||
sscanf(com + 1, "%d", &opt->verbosedisplay);
|
||||
sscanf(com + 1, "%d", (int *) &opt->verbosedisplay);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
}
|
||||
@@ -2004,9 +2002,9 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
}
|
||||
break;
|
||||
case 'N':
|
||||
opt->savename_delayed = 2;
|
||||
opt->savename_delayed = HTS_SAVENAME_DELAYED_HARD;
|
||||
if (isdigit((unsigned char) *(com + 1))) {
|
||||
sscanf(com + 1, "%d", &opt->savename_delayed);
|
||||
sscanf(com + 1, "%d", (int *) &opt->savename_delayed);
|
||||
while(isdigit((unsigned char) *(com + 1)))
|
||||
com++;
|
||||
}
|
||||
@@ -3096,6 +3094,41 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
htsmain_free();
|
||||
return 0;
|
||||
break;
|
||||
case '9': { // copy_htsopt selftest: httrack -#9
|
||||
httrackp *from = hts_create_opt();
|
||||
httrackp *to = hts_create_opt();
|
||||
int err = 0;
|
||||
|
||||
/* from-values differ from both the to-values and the
|
||||
hts_create_opt() defaults (nearlink FALSE, errpage/parseall
|
||||
TRUE), so a copy that no-ops or just resets to defaults is
|
||||
caught too, not only the unsigned-guard bug. */
|
||||
from->retry = 7; /* int field: positive control */
|
||||
to->retry = 0;
|
||||
from->nearlink = HTS_TRUE;
|
||||
to->nearlink = HTS_FALSE;
|
||||
from->errpage = HTS_FALSE;
|
||||
to->errpage = HTS_TRUE;
|
||||
from->parseall = HTS_FALSE;
|
||||
to->parseall = HTS_TRUE;
|
||||
|
||||
copy_htsopt(from, to);
|
||||
|
||||
if (to->retry != 7)
|
||||
err = 1;
|
||||
if (to->nearlink != HTS_TRUE)
|
||||
err = 1;
|
||||
if (to->errpage != HTS_FALSE)
|
||||
err = 1;
|
||||
if (to->parseall != HTS_FALSE)
|
||||
err = 1;
|
||||
|
||||
hts_free_opt(from);
|
||||
hts_free_opt(to);
|
||||
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
} break;
|
||||
case '!':
|
||||
HTS_PANIC_PRINTF
|
||||
("Option #! is disabled for security reasons");
|
||||
|
||||
@@ -242,6 +242,14 @@ Please visit our Website: http://www.httrack.com
|
||||
#define HTS_NOPARAM "(none)"
|
||||
#define HTS_NOPARAM2 "\"(none)\""
|
||||
|
||||
/* Boolean flag for option fields and API yes/no returns. An enum (not C bool)
|
||||
so it stays int-sized: option fields keep the httrackp layout/ABI, and a
|
||||
return type stays compatible with the int it replaces. */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
#define HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
#endif
|
||||
|
||||
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||
#define maximum(A,B) ( (A) > (B) ? (A) : (B) )
|
||||
|
||||
|
||||
105
src/htslib.c
105
src/htslib.c
@@ -3646,8 +3646,9 @@ HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const cha
|
||||
// DOES NOT DECODE %25 (part of CHAR_DELIM)
|
||||
// no_high & 1: decode high chars
|
||||
// no_high & 2: decode space
|
||||
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size,
|
||||
const char *s, const int no_high) {
|
||||
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size,
|
||||
const char *s,
|
||||
const hts_boolean no_high) {
|
||||
size_t i, j;
|
||||
|
||||
RUNTIME_TIME_CHECK_SIZE(size);
|
||||
@@ -3931,8 +3932,8 @@ void hts_replace(char *s, char from, char to) {
|
||||
|
||||
// guess a local file's mime type (e.g. fil="toto.gif" -> s="image/gif")
|
||||
// returns 1 if a type was written to s, 0 otherwise
|
||||
int guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil) {
|
||||
hts_boolean guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil) {
|
||||
return get_httptype_sized(opt, s, ssize, fil, 1);
|
||||
}
|
||||
|
||||
@@ -3945,8 +3946,8 @@ void guess_httptype(httrackp * opt, char *s, const char *fil) {
|
||||
// write the mime type for fil into s (capacity ssize)
|
||||
// flag: 1 to always return a type (the "application/..." / octet-stream
|
||||
// fallback) returns 1 if a type was written to s, 0 otherwise
|
||||
HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil, int flag) {
|
||||
HTSEXT_API hts_boolean get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil, hts_boolean flag) {
|
||||
// userdef overrides get_httptype (a rule with an empty value, e.g. "--assume
|
||||
// cgi=", matches but writes nothing: report it as "no type" like the old
|
||||
// code, whose callers tested strnotempty(s))
|
||||
@@ -4196,7 +4197,7 @@ HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil) {
|
||||
|
||||
// page dynamique?
|
||||
// is_dyntype(get_ext("foo.asp"))
|
||||
HTSEXT_API int is_dyntype(const char *fil) {
|
||||
HTSEXT_API hts_boolean is_dyntype(const char *fil) {
|
||||
int j = 0;
|
||||
|
||||
if (!fil)
|
||||
@@ -4214,7 +4215,7 @@ HTSEXT_API int is_dyntype(const char *fil) {
|
||||
|
||||
// types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
|
||||
// connaissent pas le type
|
||||
int may_unknown(httrackp * opt, const char *st) {
|
||||
hts_boolean may_unknown(httrackp *opt, const char *st) {
|
||||
int j = 0;
|
||||
|
||||
// types média
|
||||
@@ -5236,7 +5237,8 @@ HTSEXT_API int hts_uninit_module(void) {
|
||||
}
|
||||
|
||||
// legacy. do not use
|
||||
HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg) {
|
||||
HTSEXT_API hts_boolean hts_log(httrackp *opt, const char *prefix,
|
||||
const char *msg) {
|
||||
if (opt->log != NULL) {
|
||||
fspc(opt, opt->log, prefix);
|
||||
fprintf(opt->log, "%s" LF, msg);
|
||||
@@ -5435,14 +5437,14 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
||||
/* default settings */
|
||||
|
||||
opt->wizard = HTS_WIZARD_AUTO; // wizard automatique
|
||||
opt->quiet = 0; // questions
|
||||
opt->quiet = HTS_FALSE;
|
||||
//
|
||||
opt->travel = HTS_TRAVEL_SAME_ADDRESS; // même adresse
|
||||
opt->depth = 9999; // mirror total par défaut
|
||||
opt->extdepth = 0; // mais pas à l'extérieur
|
||||
opt->seeker = HTS_SEEKER_DOWN; // down
|
||||
opt->urlmode = HTS_URLMODE_RELATIVE; // relatif par défaut
|
||||
opt->no_type_change = 0; // change file types
|
||||
opt->no_type_change = HTS_FALSE;
|
||||
opt->debug = LOG_NOTICE; // small log
|
||||
opt->getmode = HTS_GETMODE_HTML | HTS_GETMODE_NONHTML;
|
||||
opt->maxsite = -1; // taille max site (aucune)
|
||||
@@ -5450,53 +5452,56 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
||||
opt->maxfile_html = -1; // idem pour html
|
||||
opt->maxsoc = 4; // nbre socket max
|
||||
opt->fragment = -1; // pas de fragmentation
|
||||
opt->nearlink = 0; // ne pas prendre les liens non-html "adjacents"
|
||||
opt->makeindex = 1; // faire un index
|
||||
opt->kindex = 0; // index 'keyword'
|
||||
opt->delete_old = 1; // effacer anciens fichiers
|
||||
opt->background_on_suspend = 1; // Background the process if Control Z calls signal suspend.
|
||||
opt->makestat = 0; // pas de fichier de stats
|
||||
opt->maketrack = 0; // ni de tracking
|
||||
opt->nearlink = HTS_FALSE;
|
||||
opt->makeindex = HTS_TRUE;
|
||||
opt->kindex = HTS_FALSE;
|
||||
opt->delete_old = HTS_TRUE;
|
||||
opt->background_on_suspend = HTS_TRUE;
|
||||
opt->makestat = HTS_FALSE;
|
||||
opt->maketrack = HTS_FALSE;
|
||||
opt->timeout = 120; // timeout par défaut (2 minutes)
|
||||
opt->cache = HTS_CACHE_PRIORITY; // cache prioritaire
|
||||
opt->shell = 0; // pas de shell par defaut
|
||||
opt->shell = HTS_FALSE;
|
||||
opt->proxy.active = 0; // pas de proxy
|
||||
opt->user_agent_send = 1; // envoyer un user-agent
|
||||
opt->user_agent_send = HTS_TRUE;
|
||||
StringCopy(opt->user_agent,
|
||||
"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
|
||||
StringCopy(opt->referer, "");
|
||||
StringCopy(opt->from, "");
|
||||
opt->savename_83 = 0; // noms longs par défaut
|
||||
opt->savename_83 = HTS_SAVENAME_83_LONG; // long names by default
|
||||
opt->savename_type = 0; // avec structure originale
|
||||
opt->savename_delayed = 2; // hard delayed type (default)
|
||||
opt->delayed_cached = 1; // cached delayed type (default)
|
||||
opt->mimehtml = 0; // pas MIME-html
|
||||
opt->savename_delayed =
|
||||
HTS_SAVENAME_DELAYED_HARD; // always delay the type check (default)
|
||||
opt->delayed_cached = HTS_TRUE;
|
||||
opt->mimehtml = HTS_FALSE;
|
||||
opt->parsejava = HTSPARSE_DEFAULT; // parser classes
|
||||
opt->hostcontrol = 0; // PAS de control host pour timeout et traffic jammer
|
||||
opt->retry = 2; // 2 retry par défaut
|
||||
opt->errpage = 1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.)
|
||||
opt->check_type = 1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html
|
||||
opt->all_in_cache = 0; // ne pas tout stocker en cache
|
||||
opt->errpage = HTS_TRUE;
|
||||
// d'erreur (404 etc.)
|
||||
opt->check_type = HTS_TRUE;
|
||||
// considéré comme html
|
||||
opt->all_in_cache = HTS_FALSE;
|
||||
opt->robots = HTS_ROBOTS_ALWAYS; // traiter les robots.txt
|
||||
opt->external = 0; // liens externes normaux
|
||||
opt->passprivacy = 0; // mots de passe dans les fichiers
|
||||
opt->includequery = 1; // include query-string par défaut
|
||||
opt->mirror_first_page = 0; // pas mode mirror links
|
||||
opt->accept_cookie = 1; // gérer les cookies
|
||||
opt->external = HTS_FALSE;
|
||||
opt->passprivacy = HTS_FALSE;
|
||||
opt->includequery = HTS_TRUE;
|
||||
opt->mirror_first_page = HTS_FALSE;
|
||||
opt->accept_cookie = HTS_TRUE;
|
||||
opt->cookie = NULL;
|
||||
opt->http10 = 0; // laisser http/1.1
|
||||
opt->nokeepalive = 0; // pas keep-alive
|
||||
opt->nocompression = 0; // pas de compression
|
||||
opt->tolerant = 0; // ne pas accepter content-length incorrect
|
||||
opt->parseall = 1; // tout parser (tags inconnus, par exemple)
|
||||
opt->parsedebug = 0; // pas de mode débuggage
|
||||
opt->norecatch = 0; // ne pas reprendre les fichiers effacés par l'utilisateur
|
||||
opt->verbosedisplay = 0; // pas d'animation texte
|
||||
opt->sizehack = 0; // size hack
|
||||
opt->urlhack = 1; // url hack (normalizer)
|
||||
opt->http10 = HTS_FALSE;
|
||||
opt->nokeepalive = HTS_FALSE;
|
||||
opt->nocompression = HTS_FALSE;
|
||||
opt->tolerant = HTS_FALSE;
|
||||
opt->parseall = HTS_TRUE;
|
||||
opt->parsedebug = HTS_FALSE;
|
||||
opt->norecatch = HTS_FALSE;
|
||||
opt->verbosedisplay = HTS_VERBOSE_NONE; // no text animation
|
||||
opt->sizehack = HTS_FALSE;
|
||||
opt->urlhack = HTS_TRUE;
|
||||
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
|
||||
opt->ftp_proxy = 1; // proxy http pour ftp
|
||||
opt->convert_utf8 = 1; // convert html to UTF-8
|
||||
opt->ftp_proxy = HTS_TRUE;
|
||||
opt->convert_utf8 = HTS_TRUE;
|
||||
StringCopy(opt->filelist, "");
|
||||
StringCopy(opt->lang_iso, "en, *");
|
||||
StringCopy(opt->accept,
|
||||
@@ -5507,9 +5512,9 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
||||
//
|
||||
opt->log = stdout;
|
||||
opt->errlog = stderr;
|
||||
opt->flush = 1; // flush sur les fichiers log
|
||||
//opt->aff_progress=0;
|
||||
opt->keyboard = 0;
|
||||
opt->flush = HTS_TRUE;
|
||||
// opt->aff_progress=0;
|
||||
opt->keyboard = HTS_FALSE;
|
||||
//
|
||||
StringCopy(opt->path_html, "");
|
||||
StringCopy(opt->path_html_utf8, "");
|
||||
@@ -5526,10 +5531,10 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
||||
opt->waittime = -1; // wait until.. hh*3600+mm*60+ss
|
||||
//
|
||||
opt->exec = "";
|
||||
opt->is_update = 0; // not an update (yet)
|
||||
opt->dir_topindex = 0; // do not built top index (yet)
|
||||
opt->is_update = HTS_FALSE;
|
||||
opt->dir_topindex = HTS_FALSE;
|
||||
//
|
||||
opt->bypass_limits = 0; // enforce limits by default
|
||||
opt->bypass_limits = HTS_FALSE;
|
||||
opt->state.stop = 0; // stopper
|
||||
opt->state.exit_xh = 0; // abort
|
||||
//
|
||||
|
||||
@@ -184,10 +184,11 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
|
||||
/* 8-3 ? */
|
||||
switch (opt->savename_83) {
|
||||
case 1: // 8-3
|
||||
case HTS_SAVENAME_83_DOS: // 8-3
|
||||
max_char = 8;
|
||||
break;
|
||||
case 2: // Level 2 File names may be up to 31 characters.
|
||||
case HTS_SAVENAME_83_ISO9660: // Level 2 File names may be up to 31
|
||||
// characters.
|
||||
max_char = 31;
|
||||
break;
|
||||
default:
|
||||
@@ -324,7 +325,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
|
||||
/* replace shtml to html.. */
|
||||
if (opt->savename_delayed == 2)
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD)
|
||||
is_html = -1; /* ALWAYS delay type */
|
||||
else
|
||||
is_html = ishtml(opt, fil);
|
||||
@@ -363,7 +364,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
) {
|
||||
// tester type avec requète HEAD si on ne connait pas le type du fichier
|
||||
if (!((opt->check_type == 1) && (fil[strlen(fil) - 1] == '/'))) // slash doit être html?
|
||||
if (opt->savename_delayed == 2 || (ishtest = ishtml(opt, fil)) < 0) { // on ne sait pas si c'est un html ou un fichier..
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD ||
|
||||
(ishtest = ishtml(opt, fil)) <
|
||||
0) { // unsure whether it's html or a file
|
||||
// lire dans le cache
|
||||
htsblk r = cache_read_including_broken(opt, cache, adr, fil); // test uniquement
|
||||
|
||||
@@ -393,11 +396,12 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
#endif
|
||||
//
|
||||
} else if (opt->savename_delayed != 2 && is_userknowntype(opt, fil)) { /* PATCH BY BRIAN SCHRÖDER.
|
||||
Lookup mimetype not only by extension,
|
||||
but also by filename */
|
||||
/* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type,
|
||||
that is, ".html" */
|
||||
} else if (opt->savename_delayed != HTS_SAVENAME_DELAYED_HARD &&
|
||||
is_userknowntype(opt, fil)) { /* PATCH BY BRIAN SCHRÖDER.
|
||||
Lookup mimetype not only by extension,
|
||||
but also by filename */
|
||||
/* Note: "foo.cgi => text/html" means that foo.cgi shall have the
|
||||
text/html MIME file type, that is, ".html" */
|
||||
char BIGSTK mime[1024];
|
||||
|
||||
mime[0] = ext[0] = '\0';
|
||||
@@ -408,9 +412,13 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
}
|
||||
}
|
||||
// note: if savename_delayed is enabled, the naming will be temporary (and slightly invalid!)
|
||||
// note: if we are about to stop (opt->state.stop), back_add() will fail later
|
||||
else if (opt->savename_delayed != 0 && !opt->state.stop) {
|
||||
// note: if savename_delayed is enabled, the naming will be temporary
|
||||
// (and slightly invalid!)
|
||||
//
|
||||
// note: if we are about to stop (opt->state.stop), back_add() will
|
||||
// fail later
|
||||
else if (opt->savename_delayed != HTS_SAVENAME_DELAYED_NONE &&
|
||||
!opt->state.stop) {
|
||||
// Check if the file is ready in backing. We basically take the same logic as later.
|
||||
// FIXME: we should cleanup and factorize this unholy mess
|
||||
if (headers != NULL && headers->status >= 0 && !is_redirect) {
|
||||
@@ -698,7 +706,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
// restaurer
|
||||
opt->state._hts_in_html_parsing = hihp;
|
||||
} // caché?
|
||||
} // caché?
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1190,7 +1198,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
// Not used anymore unless non-delayed types.
|
||||
// de même en cas de manque d'extension on en place une de manière forcée..
|
||||
// cela évite les /chez/toto et les /chez/toto/index.html incompatibles
|
||||
if (opt->savename_type != -1 && opt->savename_delayed != 2) {
|
||||
if (opt->savename_type != -1 &&
|
||||
opt->savename_delayed != HTS_SAVENAME_DELAYED_HARD) {
|
||||
char *a = afs->save + strlen(afs->save) - 1;
|
||||
|
||||
while((a > afs->save) && (*a != '.') && (*a != '/'))
|
||||
@@ -1236,31 +1245,21 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
size_t i;
|
||||
for(i = 0 ; afs->save[i] != '\0' ; i++) {
|
||||
unsigned char c = (unsigned char) afs->save[i];
|
||||
if (c < 32 // control
|
||||
|| c == 127 // unwise
|
||||
|| c == '~' // unix unwise
|
||||
|| c == '\\' // windows separator
|
||||
|| c == ':' // windows forbidden
|
||||
|| c == '*' // windows forbidden
|
||||
|| c == '?' // windows forbidden
|
||||
|| c == '\"' // windows forbidden
|
||||
|| c == '<' // windows forbidden
|
||||
|| c == '>' // windows forbidden
|
||||
|| c == '|' // windows forbidden
|
||||
//|| c == '@' // ?
|
||||
||
|
||||
(
|
||||
opt->savename_83 == 2 // CDROM
|
||||
&&
|
||||
(
|
||||
c == '-'
|
||||
|| c == '='
|
||||
|| c == '+'
|
||||
)
|
||||
)
|
||||
)
|
||||
{
|
||||
afs->save[i] = '_';
|
||||
if (c < 32 // control
|
||||
|| c == 127 // unwise
|
||||
|| c == '~' // unix unwise
|
||||
|| c == '\\' // windows separator
|
||||
|| c == ':' // windows forbidden
|
||||
|| c == '*' // windows forbidden
|
||||
|| c == '?' // windows forbidden
|
||||
|| c == '\"' // windows forbidden
|
||||
|| c == '<' // windows forbidden
|
||||
|| c == '>' // windows forbidden
|
||||
|| c == '|' // windows forbidden
|
||||
//|| c == '@' // ?
|
||||
|| (opt->savename_83 == HTS_SAVENAME_83_ISO9660 // CDROM
|
||||
&& (c == '-' || c == '=' || c == '+'))) {
|
||||
afs->save[i] = '_';
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1521,7 +1520,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
char *a = afs->save + strlen(afs->save) - 1;
|
||||
char *b;
|
||||
int n = 2;
|
||||
char collisionSeparator = ((opt->savename_83 != 2) ? '-' : '_');
|
||||
char collisionSeparator =
|
||||
((opt->savename_83 != HTS_SAVENAME_83_ISO9660) ? '-' : '_');
|
||||
|
||||
tempo[0] = '\0';
|
||||
|
||||
|
||||
130
src/htsopt.h
130
src/htsopt.h
@@ -342,17 +342,44 @@ typedef enum hts_seeker {
|
||||
HTS_SEEKER_UP = 1 << 1 /**< may ascend to parent directories */
|
||||
} hts_seeker;
|
||||
|
||||
/* Link-following scope, stored in the low byte of opt->travel. */
|
||||
/* opt->travel: link-following scope in the low byte, flags OR'd in above it. */
|
||||
typedef enum hts_travel_scope {
|
||||
HTS_TRAVEL_SAME_ADDRESS = 0, /**< stay on the same address (host) */
|
||||
HTS_TRAVEL_SAME_DOMAIN = 1, /**< stay on the same principal domain */
|
||||
HTS_TRAVEL_SAME_TLD = 2, /**< stay on the same TLD (e.g. .com) */
|
||||
HTS_TRAVEL_EVERYWHERE = 7 /**< follow links anywhere on the web */
|
||||
HTS_TRAVEL_EVERYWHERE = 7, /**< follow links anywhere on the web */
|
||||
HTS_TRAVEL_TEST_ALL = 1 << 8 /**< also test forbidden URLs (-t) */
|
||||
} hts_travel_scope;
|
||||
|
||||
/* Flags OR'd into opt->travel above the scope value. */
|
||||
#define HTS_TRAVEL_SCOPE_MASK 0xff /**< mask selecting the scope value */
|
||||
#define HTS_TRAVEL_TEST_ALL (1 << 8) /**< also test forbidden URLs (-t) */
|
||||
/* Mask selecting the scope value out of opt->travel. */
|
||||
#define HTS_TRAVEL_SCOPE_MASK 0xff
|
||||
|
||||
/* Text progress display detail (opt->verbosedisplay). */
|
||||
typedef enum hts_verbosedisplay {
|
||||
HTS_VERBOSE_NONE = 0, /**< no animated progress display (default) */
|
||||
HTS_VERBOSE_SIMPLE = 1, /**< minimal single-line progress */
|
||||
HTS_VERBOSE_FULL = 2 /**< full animated progress */
|
||||
} hts_verbosedisplay;
|
||||
|
||||
/* Delayed file-type resolution policy (opt->savename_delayed). */
|
||||
typedef enum hts_savename_delayed {
|
||||
HTS_SAVENAME_DELAYED_NONE = 0, /**< resolve the type immediately */
|
||||
HTS_SAVENAME_DELAYED_SOFT = 1, /**< delay the type check when unknown */
|
||||
HTS_SAVENAME_DELAYED_HARD = 2 /**< always delay the type check (default) */
|
||||
} hts_savename_delayed;
|
||||
|
||||
/* Saved-name length layout (opt->savename_83). */
|
||||
typedef enum hts_savename_83 {
|
||||
HTS_SAVENAME_83_LONG = 0, /**< long file names (default) */
|
||||
HTS_SAVENAME_83_DOS = 1, /**< DOS 8.3 names (ISO9660 level 1) */
|
||||
HTS_SAVENAME_83_ISO9660 = 2 /**< ISO9660 level 2 names (up to 31 chars) */
|
||||
} hts_savename_83;
|
||||
|
||||
/* Host-banning triggers (opt->hostcontrol bitmask). */
|
||||
typedef enum hts_hostcontrol {
|
||||
HTS_HOSTCONTROL_BAN_TIMEOUT = 1 << 0, /**< ban a timing-out host */
|
||||
HTS_HOSTCONTROL_BAN_SLOW = 1 << 1 /**< ban a too-slow host */
|
||||
} hts_hostcontrol;
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_lien_buffers
|
||||
#define HTS_DEF_FWSTRUCT_lien_buffers
|
||||
@@ -378,15 +405,15 @@ struct httrackp {
|
||||
size_t size_httrackp; /**< size of this structure (version/ABI guard) */
|
||||
/* */
|
||||
hts_wizard wizard; /**< interactive wizard level (none/ask/auto) */
|
||||
int flush; /**< fflush() log files after each write */
|
||||
hts_boolean flush; /**< fflush() log files after each write */
|
||||
int travel; /**< link-following scope (same domain, etc.) */
|
||||
int seeker; /**< allowed direction: go up and/or down the tree */
|
||||
int depth; /**< maximum recursion depth (-rN) */
|
||||
int extdepth; /**< maximum recursion depth outside the start domain */
|
||||
hts_urlmode
|
||||
urlmode; /**< saved-link rewriting style (relative, absolute, etc.) */
|
||||
int no_type_change; // do not change file type according to MIME
|
||||
int debug; /**< debug logging level */
|
||||
hts_boolean no_type_change; // do not change file type according to MIME
|
||||
hts_log_type debug; /**< debug logging level */
|
||||
int getmode; /**< what to fetch (HTML, images, ...) bitmask */
|
||||
FILE *log; /**< informational log stream; NULL mutes it */
|
||||
FILE *errlog; /**< error log stream; NULL mutes it */
|
||||
@@ -395,10 +422,11 @@ struct httrackp {
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
int nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
int makeindex; /**< build a top-level index.html */
|
||||
int kindex; /**< build a keyword index */
|
||||
int delete_old; /**< delete locally obsolete files after update */
|
||||
hts_boolean
|
||||
nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
hts_boolean makeindex; /**< build a top-level index.html */
|
||||
hts_boolean kindex; /**< build a keyword index */
|
||||
hts_boolean delete_old; /**< delete locally obsolete files after update */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
@@ -407,16 +435,18 @@ struct httrackp {
|
||||
int waittime; /**< scheduled start time (wall-clock seconds) */
|
||||
hts_cachemode cache; /**< cache generation mode */
|
||||
// int aff_progress; // progress bar
|
||||
int shell; /**< driven by a shell over stdin/stdout pipes */
|
||||
hts_boolean shell; /**< driven by a shell over stdin/stdout pipes */
|
||||
t_proxy proxy; /**< proxy configuration */
|
||||
int savename_83; /**< force 8.3 (DOS) file names */
|
||||
hts_savename_83
|
||||
savename_83; /**< saved-name length layout (long/DOS/ISO9660) */
|
||||
int savename_type; /**< saved-name layout (original tree, flat, ...) */
|
||||
String
|
||||
savename_userdef; /**< user-defined name template (e.g. %h%p/%n%q.%t) */
|
||||
int savename_delayed; // delayed type check
|
||||
int delayed_cached; // delayed type check can be cached to speedup updates
|
||||
int mimehtml; /**< produce a single MIME/MHTML archive */
|
||||
int user_agent_send; /**< send a User-Agent header */
|
||||
hts_savename_delayed savename_delayed; /**< delayed type-check policy */
|
||||
hts_boolean
|
||||
delayed_cached; // delayed type check can be cached to speedup updates
|
||||
hts_boolean mimehtml; /**< produce a single MIME/MHTML archive */
|
||||
hts_boolean user_agent_send; /**< send a User-Agent header */
|
||||
String user_agent; /**< User-Agent value (e.g. httrack/1.0) */
|
||||
String referer; /**< Referer value to send */
|
||||
String from; /**< From value to send */
|
||||
@@ -425,37 +455,39 @@ struct httrackp {
|
||||
String path_html_utf8; /**< output directory for the mirror, UTF-8 form */
|
||||
String path_bin; /**< directory for HTML templates */
|
||||
int retry; /**< extra retries on a failed transfer */
|
||||
int makestat; /**< maintain a transfer-statistics log */
|
||||
int maketrack; /**< maintain an operations-statistics log */
|
||||
hts_boolean makestat; /**< maintain a transfer-statistics log */
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< drop hosts that are too slow, etc. */
|
||||
int errpage; /**< generate an error page on 404 and similar */
|
||||
int check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
int all_in_cache; /**< keep all retrieved data in the cache */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
int external; /**< render external links as error pages */
|
||||
int passprivacy; /**< strip passwords from external links */
|
||||
int includequery; /**< include the query string in saved names */
|
||||
int mirror_first_page; /**< only mirror the links of the first page */
|
||||
hts_boolean external; /**< render external links as error pages */
|
||||
hts_boolean passprivacy; /**< strip passwords from external links */
|
||||
hts_boolean includequery; /**< include the query string in saved names */
|
||||
hts_boolean mirror_first_page; /**< only mirror the links of the first page */
|
||||
String sys_com; /**< system command to run */
|
||||
int sys_com_exec; /**< actually execute sys_com */
|
||||
int accept_cookie; /**< accept and send cookies */
|
||||
hts_boolean sys_com_exec; /**< actually execute sys_com */
|
||||
hts_boolean accept_cookie; /**< accept and send cookies */
|
||||
t_cookie *cookie; /**< cookie store */
|
||||
int http10; /**< force HTTP/1.0 */
|
||||
int nokeepalive; /**< disable keep-alive */
|
||||
int nocompression; /**< disable content compression */
|
||||
int sizehack; /**< treat same-size response as "updated" */
|
||||
int urlhack; // force "url normalization" to avoid loops
|
||||
int tolerant; /**< accept an incorrect Content-Length */
|
||||
int parseall; /**< parse aggressively, including unknown tags with links */
|
||||
int parsedebug; /**< parser debug mode */
|
||||
int norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
int verbosedisplay; /**< animated text progress display */
|
||||
hts_boolean http10; /**< force HTTP/1.0 */
|
||||
hts_boolean nokeepalive; /**< disable keep-alive */
|
||||
hts_boolean nocompression; /**< disable content compression */
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
hts_boolean
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
hts_verbosedisplay verbosedisplay; /**< animated text progress display */
|
||||
String footer; /**< footer/info line injected into pages */
|
||||
int maxcache; /**< in-memory cache backing limit (bytes) */
|
||||
// int maxcache_anticipate; // maximum links to anticipate (upper bound)
|
||||
int ftp_proxy; /**< use the HTTP proxy for FTP too */
|
||||
hts_boolean ftp_proxy; /**< use the HTTP proxy for FTP too */
|
||||
String filelist; /**< file listing URLs to include */
|
||||
String urllist; /**< file listing filters to include */
|
||||
htsfilters filters; /**< filter pointers (+/-pattern rules) */
|
||||
@@ -469,20 +501,20 @@ struct httrackp {
|
||||
String headers; // Additional headers
|
||||
String mimedefs; // ext1=mimetype1\next2=mimetype2..
|
||||
String mod_blacklist; /**< blacklisted modules */
|
||||
int convert_utf8; // filenames UTF-8 conversion (3.46)
|
||||
hts_boolean convert_utf8; // filenames UTF-8 conversion (3.46)
|
||||
//
|
||||
int maxlink; /**< max number of links */
|
||||
int maxfilter; /**< max number of filters */
|
||||
//
|
||||
const char *exec; /**< path of the running executable */
|
||||
//
|
||||
int quiet; /**< suppress non-wizard questions */
|
||||
int keyboard; /**< poll stdin for keyboard input */
|
||||
int bypass_limits; // bypass built-in limits
|
||||
int background_on_suspend; // background process on suspend signal
|
||||
hts_boolean quiet; /**< suppress non-wizard questions */
|
||||
hts_boolean keyboard; /**< poll stdin for keyboard input */
|
||||
hts_boolean bypass_limits; // bypass built-in limits
|
||||
hts_boolean background_on_suspend; // background process on suspend signal
|
||||
//
|
||||
int is_update; /**< this run is an update (show "File updated...") */
|
||||
int dir_topindex; /**< rebuild the top index afterwards */
|
||||
hts_boolean is_update; /**< this run is an update (show "File updated...") */
|
||||
hts_boolean dir_topindex; /**< rebuild the top index afterwards */
|
||||
//
|
||||
// callbacks
|
||||
t_hts_htmlcheck_callbacks
|
||||
|
||||
@@ -3722,7 +3722,8 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
|
||||
//case -1: can_retry=1; break;
|
||||
case STATUSCODE_TIMEOUT:
|
||||
if (opt->hostcontrol) { // timeout et retry épuisés
|
||||
if ((opt->hostcontrol & 1) && (heap(ptr)->retry <= 0)) {
|
||||
if ((opt->hostcontrol & HTS_HOSTCONTROL_BAN_TIMEOUT) &&
|
||||
(heap(ptr)->retry <= 0)) {
|
||||
hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr(), urlfil());
|
||||
host_ban(opt, ptr, sback, jump_identification_const(urladr()));
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
@@ -3735,7 +3736,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
|
||||
break;
|
||||
case STATUSCODE_SLOW:
|
||||
if ((opt->hostcontrol) && (heap(ptr)->retry <= 0)) { // too slow
|
||||
if (opt->hostcontrol & 2) {
|
||||
if (opt->hostcontrol & HTS_HOSTCONTROL_BAN_SLOW) {
|
||||
hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr(), urlfil());
|
||||
host_ban(opt, ptr, sback, jump_identification_const(urladr()));
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
@@ -4261,10 +4262,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
|
||||
char com[256];
|
||||
|
||||
linput(stdin, com, 200);
|
||||
if (opt->verbosedisplay == 2)
|
||||
opt->verbosedisplay = 1;
|
||||
if (opt->verbosedisplay == HTS_VERBOSE_FULL)
|
||||
opt->verbosedisplay = HTS_VERBOSE_SIMPLE;
|
||||
else
|
||||
opt->verbosedisplay = 2;
|
||||
opt->verbosedisplay = HTS_VERBOSE_FULL;
|
||||
/* Info for wrappers */
|
||||
hts_log_print(opt, LOG_INFO, "engine: change-options");
|
||||
RUN_CALLBACK0(opt, chopt);
|
||||
@@ -4374,7 +4375,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
|
||||
printf("%c\x0d", ("/-\\|")[roll]);
|
||||
fflush(stdout);
|
||||
}
|
||||
} else if (opt->verbosedisplay == 1) {
|
||||
} else if (opt->verbosedisplay == HTS_VERBOSE_SIMPLE) {
|
||||
if (b >= 0) {
|
||||
if (back[b].r.statuscode == HTTP_OK)
|
||||
printf("%d/%d: %s%s (" LLintP " bytes) - OK\33[K\r", ptr, opt->lien_tot,
|
||||
@@ -4465,8 +4466,8 @@ int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,
|
||||
char in_error_msg[32];
|
||||
|
||||
// resolve unresolved type
|
||||
if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(afs->save)
|
||||
&& !opt->state.stop) {
|
||||
if (opt->savename_delayed != HTS_SAVENAME_DELAYED_NONE &&
|
||||
*forbidden_url == 0 && IS_DELAYED_EXT(afs->save) && !opt->state.stop) {
|
||||
int loops;
|
||||
int continue_loop;
|
||||
|
||||
@@ -4850,7 +4851,7 @@ int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,
|
||||
}
|
||||
}
|
||||
|
||||
} // delayed type check ?
|
||||
} // delayed type check ?
|
||||
|
||||
ENGINE_SAVE_CONTEXT_BASE();
|
||||
|
||||
|
||||
@@ -1213,7 +1213,7 @@ HTSEXT_API find_handle hts_findfirst(char *path) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
HTSEXT_API int hts_findnext(find_handle find) {
|
||||
HTSEXT_API hts_boolean hts_findnext(find_handle find) {
|
||||
if (find) {
|
||||
#ifdef _WIN32
|
||||
if ((FindNextFileA(find->handle, &find->hdata)))
|
||||
@@ -1273,7 +1273,7 @@ HTSEXT_API int hts_findgetsize(find_handle find) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
HTSEXT_API int hts_findisdir(find_handle find) {
|
||||
HTSEXT_API hts_boolean hts_findisdir(find_handle find) {
|
||||
if (find) {
|
||||
if (!hts_findissystem(find)) {
|
||||
#ifdef _WIN32
|
||||
@@ -1287,7 +1287,7 @@ HTSEXT_API int hts_findisdir(find_handle find) {
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
HTSEXT_API int hts_findisfile(find_handle find) {
|
||||
HTSEXT_API hts_boolean hts_findisfile(find_handle find) {
|
||||
if (find) {
|
||||
if (!hts_findissystem(find)) {
|
||||
#ifdef _WIN32
|
||||
@@ -1301,7 +1301,7 @@ HTSEXT_API int hts_findisfile(find_handle find) {
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
HTSEXT_API int hts_findissystem(find_handle find) {
|
||||
HTSEXT_API hts_boolean hts_findissystem(find_handle find) {
|
||||
if (find) {
|
||||
#ifdef _WIN32
|
||||
if (find->hdata.
|
||||
|
||||
@@ -108,15 +108,15 @@ HTSEXT_API int hts_buildtopindex(httrackp * opt, const char *path,
|
||||
// Portable directory find functions
|
||||
// Directory find functions
|
||||
HTSEXT_API find_handle hts_findfirst(char *path);
|
||||
HTSEXT_API int hts_findnext(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findnext(find_handle find);
|
||||
HTSEXT_API int hts_findclose(find_handle find);
|
||||
|
||||
//
|
||||
HTSEXT_API char *hts_findgetname(find_handle find);
|
||||
HTSEXT_API int hts_findgetsize(find_handle find);
|
||||
HTSEXT_API int hts_findisdir(find_handle find);
|
||||
HTSEXT_API int hts_findisfile(find_handle find);
|
||||
HTSEXT_API int hts_findissystem(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findisdir(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findisfile(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findissystem(find_handle find);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -206,7 +206,8 @@ HTSEXT_API htsErrorCallback hts_get_error_callback(void);
|
||||
/* Logging */
|
||||
/** Legacy: write prefix then msg to opt->log. Returns 0 if written, 1 if
|
||||
opt->log is NULL. Prefer hts_log_print(). */
|
||||
HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg);
|
||||
HTSEXT_API hts_boolean hts_log(httrackp *opt, const char *prefix,
|
||||
const char *msg);
|
||||
|
||||
/** printf-style log at level @p type (an hts_log_type, optionally |LOG_ERRNO).
|
||||
Forwards to the registered log callback, and when the level is <= opt->debug
|
||||
@@ -313,7 +314,8 @@ HTSEXT_API T_SOC catch_url_init(int *port, char *adr);
|
||||
"ip:port". The buffers are caller-allocated and not bounds-checked: @p data
|
||||
must be CATCH_URL_DATA_SIZE bytes, and @p url / @p method must fit the
|
||||
captured request line. */
|
||||
HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data);
|
||||
HTSEXT_API hts_boolean catch_url(T_SOC soc, char *url, char *method,
|
||||
char *data);
|
||||
|
||||
/* State */
|
||||
/** Whether the engine is parsing HTML. Returns 0 if not, otherwise the percent
|
||||
@@ -334,10 +336,10 @@ HTSEXT_API int hts_is_exiting(httrackp * opt);
|
||||
caller-owned, NULL-terminated array of strings; the engine stores the
|
||||
pointer without copying, so the array and its strings must stay valid until
|
||||
the engine consumes them. @return nonzero if a list is now set. */
|
||||
HTSEXT_API int hts_addurl(httrackp * opt, char **url);
|
||||
HTSEXT_API hts_boolean hts_addurl(httrackp *opt, char **url);
|
||||
|
||||
/** Clear any pending add-URL list set by hts_addurl(). Always returns 0. */
|
||||
HTSEXT_API int hts_resetaddurl(httrackp * opt);
|
||||
HTSEXT_API hts_boolean hts_resetaddurl(httrackp *opt);
|
||||
|
||||
/** Apply the runtime-tunable options from @p from onto @p to, to adjust a live
|
||||
mirror. Only fields set to a non-sentinel value are copied; the rest of @p
|
||||
@@ -356,7 +358,7 @@ HTSEXT_API int hts_setpause(httrackp * opt, int);
|
||||
lock, so it is safe to call from another thread). @p force is currently
|
||||
ignored.
|
||||
@return 0; no-op if @p opt is NULL. */
|
||||
HTSEXT_API int hts_request_stop(httrackp * opt, int force);
|
||||
HTSEXT_API int hts_request_stop(httrackp *opt, hts_boolean force);
|
||||
|
||||
/** Queue a single in-progress file, by URL, to be cancelled by the engine.
|
||||
@p url is copied internally. Takes the state lock, so it is thread-safe.
|
||||
@@ -373,7 +375,7 @@ HTSEXT_API void hts_cancel_parsing(httrackp * opt);
|
||||
|
||||
/** Nonzero once the mirror has fully ended. Read under the engine state lock,
|
||||
so safe to poll from another thread. Wait for this before hts_free_opt(). */
|
||||
HTSEXT_API int hts_has_stopped(httrackp * opt);
|
||||
HTSEXT_API hts_boolean hts_has_stopped(httrackp *opt);
|
||||
|
||||
/* Tools */
|
||||
/** Ensure the directory chain leading to @p path exists, creating missing
|
||||
@@ -390,7 +392,7 @@ HTSEXT_API int structcheck_utf8(const char *path);
|
||||
/** Whether the directory containing @p path exists. The basename is stripped
|
||||
first, so passing a file path tests its parent directory. @return 1 if it is
|
||||
a directory, 0 otherwise. */
|
||||
HTSEXT_API int dir_exists(const char *path);
|
||||
HTSEXT_API hts_boolean dir_exists(const char *path);
|
||||
|
||||
/** Write the HTTP reason phrase for @p statuscode into @p msg, a caller buffer
|
||||
of at least 64 bytes. For an unknown code a non-empty @p msg is kept,
|
||||
@@ -573,14 +575,15 @@ HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const cha
|
||||
must-avoid escapes are kept encoded, and %25 is never decoded). @p no_high &
|
||||
1 also decodes high (>= 128) bytes; @p no_high & 2 also decodes an escaped
|
||||
space. Returns @p catbuff. */
|
||||
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size, const char *s, const int no_high);
|
||||
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size,
|
||||
const char *s, const hts_boolean no_high);
|
||||
|
||||
/** Determine the MIME type of local file name @p fil into @p s (capacity
|
||||
@p ssize): user --assume rules, then ".html", then the built-in extension
|
||||
table. @p flag != 0 forces a fallback type. @return 1 if a type was written,
|
||||
0 otherwise. */
|
||||
HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil, int flag);
|
||||
HTSEXT_API hts_boolean get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil, hts_boolean flag);
|
||||
|
||||
/** @deprecated Use get_httptype_sized(). Assumes @p s has at least
|
||||
HTS_MIMETYPE_SIZE capacity. */
|
||||
@@ -600,7 +603,7 @@ HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil);
|
||||
|
||||
/** 1 if @p fil, an extension such as "asp" or "php" (not a full filename), is a
|
||||
known dynamic-page type, else 0. */
|
||||
HTSEXT_API int is_dyntype(const char *fil);
|
||||
HTSEXT_API hts_boolean is_dyntype(const char *fil);
|
||||
|
||||
/** Extract the extension of @p fil (text after the last '.', stopping at '?')
|
||||
into caller scratch @p catbuff (capacity @p size) and return it. Returns ""
|
||||
@@ -610,12 +613,12 @@ HTSEXT_API const char *get_ext(char *catbuff, size_t size, const char *fil);
|
||||
|
||||
/** 1 if MIME type @p st must not be reclassified or renamed (hypertext types
|
||||
and a built-in keep-list of commonly mislabeled types), else 0. */
|
||||
HTSEXT_API int may_unknown(httrackp * opt, const char *st);
|
||||
HTSEXT_API hts_boolean may_unknown(httrackp *opt, const char *st);
|
||||
|
||||
/** Guess the MIME type of local file @p fil into @p s (capacity @p ssize),
|
||||
always producing a type. @return 1 if a type was written. */
|
||||
HTSEXT_API int guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil);
|
||||
HTSEXT_API hts_boolean guess_httptype_sized(httrackp *opt, char *s,
|
||||
size_t ssize, const char *fil);
|
||||
|
||||
/** @deprecated Use guess_httptype_sized(). Assumes @p s has at least
|
||||
HTS_MIMETYPE_SIZE capacity. */
|
||||
@@ -677,7 +680,7 @@ HTSEXT_API find_handle hts_findfirst(char *path);
|
||||
|
||||
/** Advance to the next directory entry. Returns 1 if an entry is available, 0
|
||||
at end of directory. */
|
||||
HTSEXT_API int hts_findnext(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findnext(find_handle find);
|
||||
|
||||
/** Close the iteration and free @p find. Always returns 0; NULL is accepted. */
|
||||
HTSEXT_API int hts_findclose(find_handle find);
|
||||
@@ -692,16 +695,16 @@ HTSEXT_API int hts_findgetsize(find_handle find);
|
||||
|
||||
/** 1 if the current entry is a directory, else 0 (a system/special entry, see
|
||||
hts_findissystem(), reports 0). */
|
||||
HTSEXT_API int hts_findisdir(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findisdir(find_handle find);
|
||||
|
||||
/** 1 if the current entry is a regular file, else 0 (a system/special entry,
|
||||
see hts_findissystem(), reports 0). */
|
||||
HTSEXT_API int hts_findisfile(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findisfile(find_handle find);
|
||||
|
||||
/** 1 if the current entry is a special/system entry to skip: "." or "..", on
|
||||
POSIX also device/fifo/socket nodes, on Windows also system, hidden or
|
||||
temporary entries. Else 0. */
|
||||
HTSEXT_API int hts_findissystem(find_handle find);
|
||||
HTSEXT_API hts_boolean hts_findissystem(find_handle find);
|
||||
|
||||
/* UTF-8 aware FILE API */
|
||||
/* On non-Windows these macros resolve directly to the POSIX calls. On Windows
|
||||
|
||||
@@ -288,7 +288,7 @@ static void __cdecl htsshow_uninit(t_hts_callbackarg * carg) {
|
||||
}
|
||||
static int __cdecl htsshow_start(t_hts_callbackarg * carg, httrackp * opt) {
|
||||
use_show = 0;
|
||||
if (opt->verbosedisplay == 2) {
|
||||
if (opt->verbosedisplay == HTS_VERBOSE_FULL) {
|
||||
use_show = 1;
|
||||
vt_clear();
|
||||
}
|
||||
@@ -852,7 +852,7 @@ static void sig_doback(int blind) { // mettre en backing
|
||||
if (global_opt != NULL) {
|
||||
// suppress logging and asking lousy questions
|
||||
global_opt->quiet = 1;
|
||||
global_opt->verbosedisplay = 0;
|
||||
global_opt->verbosedisplay = HTS_VERBOSE_NONE;
|
||||
}
|
||||
|
||||
if (!blind)
|
||||
|
||||
17
tests/01_engine-copyopt.test
Executable file
17
tests/01_engine-copyopt.test
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Regression guard for the unsigned-enum sentinel trap: copy_htsopt's
|
||||
# `if (from->X > -1)` guard is always false for unsigned hts_boolean fields, so
|
||||
# they silently stop being copied. Driven by the in-process 'httrack -#9' test.
|
||||
# Keep POSIX-portable (harness runs it via $(BASH), a plain /bin/sh on macOS).
|
||||
|
||||
set -eu
|
||||
|
||||
# A trailing token is required; a bare '-#9' falls through to the usage screen.
|
||||
out=$(httrack -#9 run)
|
||||
|
||||
# Exact-match the success line so a fall-through to usage can't pass the test.
|
||||
test "$out" = "copy-htsopt: OK" || {
|
||||
echo "expected 'copy-htsopt: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
@@ -24,6 +24,7 @@ TESTS = \
|
||||
01_engine-cache-golden.test \
|
||||
01_engine-charset.test \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-copyopt.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
|
||||
Reference in New Issue
Block a user