mirror of
https://github.com/xroche/httrack.git
synced 2026-06-30 14:05:47 +03:00
Compare commits
3 Commits
string-mac
...
inplace-es
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
417a60d6a3 | ||
|
|
db9ec2cc3b | ||
|
|
6a9ab2a11f |
@@ -406,29 +406,40 @@ void hts_invalidate_link(httrackp * opt, int lpos) {
|
||||
opt->liens[lpos]->pass2 = -1;
|
||||
}
|
||||
|
||||
|
||||
#define HT_INDEX_END do { \
|
||||
if (!makeindex_done) { \
|
||||
if (makeindex_fp) { \
|
||||
char BIGSTK tempo[1024]; \
|
||||
if (makeindex_links == 1) { \
|
||||
char BIGSTK link_escaped[HTS_URLMAXSIZE*2]; \
|
||||
escape_uri_utf(makeindex_firstlink, link_escaped, sizeof(link_escaped)); \
|
||||
snprintf(tempo,sizeof(tempo),"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF, link_escaped); \
|
||||
} else \
|
||||
tempo[0]='\0'; \
|
||||
hts_template_format(makeindex_fp,template_footer, \
|
||||
"<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
|
||||
tempo, /* EOF */ NULL \
|
||||
); \
|
||||
fflush(makeindex_fp); \
|
||||
fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
|
||||
makeindex_fp=NULL; \
|
||||
usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),StringBuff(opt->path_html_utf8),"index.html"),"",""); \
|
||||
} \
|
||||
} \
|
||||
makeindex_done=1; /* ok c'est fait */ \
|
||||
} while(0)
|
||||
// Write the makeindex footer (refresh meta when makeindex_links==1), close
|
||||
// the file, then run usercommand.
|
||||
void hts_finish_makeindex(httrackp *opt, int *makeindex_done,
|
||||
FILE **makeindex_fp, int makeindex_links,
|
||||
const char *makeindex_firstlink,
|
||||
const char *template_footer, const char *adr,
|
||||
const char *fil) {
|
||||
if (!*makeindex_done) {
|
||||
if (*makeindex_fp) {
|
||||
char BIGSTK tempo[1024];
|
||||
if (makeindex_links == 1) {
|
||||
char BIGSTK link_escaped[HTS_URLMAXSIZE * 2];
|
||||
escape_uri_utf(makeindex_firstlink, link_escaped, sizeof(link_escaped));
|
||||
snprintf(tempo, sizeof(tempo),
|
||||
"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">" CRLF,
|
||||
link_escaped);
|
||||
} else
|
||||
tempo[0] = '\0';
|
||||
hts_template_format(*makeindex_fp, template_footer,
|
||||
"<!-- Mirror and index made by HTTrack Website "
|
||||
"Copier/" HTTRACK_VERSION " " HTTRACK_AFF_AUTHORS
|
||||
" -->",
|
||||
tempo, /* EOF */ NULL);
|
||||
fflush(*makeindex_fp);
|
||||
fclose(*makeindex_fp);
|
||||
*makeindex_fp = NULL;
|
||||
usercommand(opt, 0, NULL,
|
||||
fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_html_utf8), "index.html"),
|
||||
adr, fil);
|
||||
}
|
||||
}
|
||||
*makeindex_done = 1;
|
||||
}
|
||||
|
||||
/* does it look like XML ? (SVG et al.) */
|
||||
static int look_like_xml(const char *s) {
|
||||
@@ -2044,7 +2055,8 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
/*
|
||||
Ensure the index is being closed
|
||||
*/
|
||||
HT_INDEX_END;
|
||||
hts_finish_makeindex(opt, &makeindex_done, &makeindex_fp, makeindex_links,
|
||||
makeindex_firstlink, template_footer, "", "");
|
||||
|
||||
/*
|
||||
updating-a-remotely-deteted-website hack
|
||||
|
||||
@@ -362,6 +362,14 @@ void usercommand(httrackp * opt, int exe, const char *cmd, const char *file,
|
||||
|
||||
void usercommand_exe(const char *cmd, const char *file);
|
||||
|
||||
// Finish the makeindex index.html (footer + refresh meta), run usercommand.
|
||||
// Updates *makeindex_done/*makeindex_fp in place; adr/fil are the mode strings.
|
||||
void hts_finish_makeindex(httrackp *opt, int *makeindex_done,
|
||||
FILE **makeindex_fp, int makeindex_links,
|
||||
const char *makeindex_firstlink,
|
||||
const char *template_footer, const char *adr,
|
||||
const char *fil);
|
||||
|
||||
int filters_init(char ***ptrfilters, int maxfilter, int filterinc);
|
||||
|
||||
int fspc(httrackp * opt, FILE * fp, const char *type);
|
||||
|
||||
42
src/htslib.c
42
src/htslib.c
@@ -4131,25 +4131,33 @@ DECLARE_APPEND_ESCAPE_VERSION(escape_uri)
|
||||
|
||||
#undef DECLARE_APPEND_ESCAPE_VERSION
|
||||
|
||||
// Same as above, but in-place
|
||||
#undef DECLARE_INPLACE_ESCAPE_VERSION
|
||||
#define DECLARE_INPLACE_ESCAPE_VERSION(NAME) \
|
||||
HTSEXT_API size_t inplace_ ##NAME(char *const dest, const size_t size) { \
|
||||
char buffer[256]; \
|
||||
const size_t len = strnlen(dest, size); \
|
||||
const int in_buffer = len + 1 < sizeof(buffer); \
|
||||
char *src = in_buffer ? buffer : malloct(len + 1); \
|
||||
size_t ret; \
|
||||
assertf(src != NULL); \
|
||||
assertf(len < size); \
|
||||
memcpy(src, dest, len + 1); \
|
||||
ret = NAME(src, dest, size); \
|
||||
if (!in_buffer) { \
|
||||
freet(src); \
|
||||
} \
|
||||
return ret; \
|
||||
// In-place escaping: copy dest aside, then escape that copy back into dest.
|
||||
typedef size_t (*escape_fn_t)(const char *src, char *dest, size_t size);
|
||||
|
||||
static size_t inplace_escape(char *const dest, const size_t size,
|
||||
escape_fn_t escape) {
|
||||
char buffer[256];
|
||||
const size_t len = strnlen(dest, size);
|
||||
const int in_buffer = len + 1 < sizeof(buffer);
|
||||
char *src = in_buffer ? buffer : malloct(len + 1);
|
||||
size_t ret;
|
||||
assertf(src != NULL);
|
||||
assertf(len < size);
|
||||
memcpy(src, dest, len + 1);
|
||||
ret = escape(src, dest, size);
|
||||
if (!in_buffer) {
|
||||
freet(src);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Thin exported wrappers binding inplace_escape() to each escaper (ABI).
|
||||
#undef DECLARE_INPLACE_ESCAPE_VERSION
|
||||
#define DECLARE_INPLACE_ESCAPE_VERSION(NAME) \
|
||||
HTSEXT_API size_t inplace_##NAME(char *const dest, const size_t size) { \
|
||||
return inplace_escape(dest, size, NAME); \
|
||||
}
|
||||
|
||||
DECLARE_INPLACE_ESCAPE_VERSION(escape_in_url)
|
||||
DECLARE_INPLACE_ESCAPE_VERSION(escape_spc_url)
|
||||
DECLARE_INPLACE_ESCAPE_VERSION(escape_uri_utf)
|
||||
|
||||
@@ -167,30 +167,6 @@ Please visit our Website: http://www.httrack.com
|
||||
}
|
||||
#define HT_ADD_FOP
|
||||
|
||||
// COPY IN HTSCORE.C
|
||||
#define HT_INDEX_END do { \
|
||||
if (!makeindex_done) { \
|
||||
if (makeindex_fp) { \
|
||||
char BIGSTK tempo[1024]; \
|
||||
if (makeindex_links == 1) { \
|
||||
char BIGSTK link_escaped[HTS_URLMAXSIZE*2]; \
|
||||
escape_uri_utf(makeindex_firstlink, link_escaped, sizeof(link_escaped)); \
|
||||
snprintf(tempo,sizeof(tempo),"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,link_escaped); \
|
||||
} else \
|
||||
tempo[0]='\0'; \
|
||||
hts_template_format(makeindex_fp,template_footer, \
|
||||
"<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
|
||||
tempo, /* EOF */ NULL \
|
||||
); \
|
||||
fflush(makeindex_fp); \
|
||||
fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
|
||||
makeindex_fp=NULL; \
|
||||
usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_html_utf8),"index.html"),"primary","primary"); \
|
||||
} \
|
||||
} \
|
||||
makeindex_done=1; /* ok c'est fait */ \
|
||||
} while(0)
|
||||
|
||||
#define ENGINE_DEFINE_CONTEXT() \
|
||||
ENGINE_DEFINE_CONTEXT_BASE(); \
|
||||
/* */ \
|
||||
@@ -709,7 +685,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
}
|
||||
|
||||
} else if (heap(ptr)->depth < opt->depth) { // on a sauté level1+1 et level1
|
||||
HT_INDEX_END;
|
||||
hts_finish_makeindex(opt, &makeindex_done, &makeindex_fp,
|
||||
makeindex_links, makeindex_firstlink,
|
||||
template_footer, "primary", "primary");
|
||||
}
|
||||
} // if (opt->makeindex)
|
||||
}
|
||||
|
||||
@@ -524,6 +524,41 @@ static int string_safety_selftests(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* StringCatN/StringSetLength must eval SIZE once: (n_eval++, V) leaves
|
||||
n_eval == 2 on a double-eval macro. */
|
||||
{
|
||||
String s = STRING_EMPTY;
|
||||
int n_eval = 0;
|
||||
|
||||
StringCat(s, "hello");
|
||||
StringCatN(s, "world", (n_eval++, 3)); /* strlen>SIZE so the clamp runs */
|
||||
if (n_eval != 1 || strcmp(StringBuff(s), "hellowor") != 0) {
|
||||
StringFree(s);
|
||||
return 1;
|
||||
}
|
||||
|
||||
n_eval = 0;
|
||||
StringSetLength(s, (n_eval++, 5));
|
||||
if (n_eval != 1 || StringLength(s) != 5) {
|
||||
StringFree(s);
|
||||
return 1;
|
||||
}
|
||||
StringFree(s);
|
||||
}
|
||||
|
||||
/* StringSubRW still reads/writes after dropping its duplicate definition. */
|
||||
{
|
||||
String s = STRING_EMPTY;
|
||||
|
||||
StringCat(s, "abc");
|
||||
StringSubRW(s, 1) = 'X';
|
||||
if (StringSub(s, 1) != 'X' || strcmp(StringBuff(s), "aXc") != 0) {
|
||||
StringFree(s);
|
||||
return 1;
|
||||
}
|
||||
StringFree(s);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1305,6 +1340,90 @@ static int st_urlhack(httrackp *opt, int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// hts_finish_makeindex writes the footer, emits the refresh meta only when
|
||||
// makeindex_links==1, and clears *fp / sets *done. argv[0] is a writable dir.
|
||||
static int st_makeindex(httrackp *opt, int argc, char **argv) {
|
||||
char path[HTS_URLMAXSIZE];
|
||||
char buf[4096];
|
||||
FILE *fp;
|
||||
size_t n;
|
||||
int done;
|
||||
|
||||
assertf(argc >= 1);
|
||||
snprintf(path, sizeof(path), "%s/index.html", argv[0]);
|
||||
|
||||
/* single first link: footer + a refresh meta carrying the escaped URL */
|
||||
done = 0;
|
||||
fp = fopen(path, "wb");
|
||||
assertf(fp != NULL);
|
||||
hts_finish_makeindex(opt, &done, &fp, 1, "http://example.com/a b", "%s%s", "",
|
||||
"");
|
||||
assertf(fp == NULL); /* the function closed and cleared it */
|
||||
assertf(done != 0);
|
||||
fp = fopen(path, "rb");
|
||||
assertf(fp != NULL);
|
||||
n = fread(buf, 1, sizeof(buf) - 1, fp);
|
||||
fclose(fp);
|
||||
buf[n] = '\0';
|
||||
assertf(strstr(buf, "Mirror and index made by HTTrack") != NULL);
|
||||
assertf(strstr(buf, "Refresh") != NULL);
|
||||
assertf(strstr(buf, "example.com") != NULL);
|
||||
|
||||
/* no single link: footer only, no refresh meta */
|
||||
done = 0;
|
||||
fp = fopen(path, "wb");
|
||||
assertf(fp != NULL);
|
||||
hts_finish_makeindex(opt, &done, &fp, 0, NULL, "%s%s", "", "");
|
||||
assertf(fp == NULL);
|
||||
assertf(done != 0);
|
||||
fp = fopen(path, "rb");
|
||||
assertf(fp != NULL);
|
||||
n = fread(buf, 1, sizeof(buf) - 1, fp);
|
||||
fclose(fp);
|
||||
buf[n] = '\0';
|
||||
assertf(strstr(buf, "Mirror and index made by HTTrack") != NULL);
|
||||
assertf(strstr(buf, "Refresh") == NULL);
|
||||
|
||||
UNLINK(path);
|
||||
printf("makeindex self-test OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Each inplace_escape_*() must equal escape_*() on a copy. */
|
||||
static int st_inplace_escape(httrackp *opt, int argc, char **argv) {
|
||||
/* >255 bytes forces the helper's malloct path, not the stack buffer */
|
||||
static char longstr[600];
|
||||
static const char *const samples[] = {
|
||||
"", "abc", "a b/c?d=e&f", "h\x8ello w\x94rld",
|
||||
"a%b\"c<d>", "/path to/file", longstr};
|
||||
static size_t (*const inplace[])(char *, size_t) = {
|
||||
inplace_escape_in_url, inplace_escape_spc_url, inplace_escape_uri_utf,
|
||||
inplace_escape_check_url, inplace_escape_uri};
|
||||
static size_t (*const plain[])(const char *, char *, size_t) = {
|
||||
escape_in_url, escape_spc_url, escape_uri_utf, escape_check_url,
|
||||
escape_uri};
|
||||
size_t i, f;
|
||||
|
||||
(void) opt;
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
memset(longstr, 'a', sizeof(longstr) - 1);
|
||||
for (f = 0; f < sizeof(inplace) / sizeof(inplace[0]); f++) {
|
||||
for (i = 0; i < sizeof(samples) / sizeof(samples[0]); i++) {
|
||||
char ref[4096], work[4096];
|
||||
size_t rret, iret;
|
||||
rret = plain[f](samples[i], ref, sizeof(ref));
|
||||
strcpybuff(work, samples[i]);
|
||||
iret = inplace[f](work, sizeof(work));
|
||||
assertf(iret == rret);
|
||||
assertf(strcmp(work, ref) == 0);
|
||||
}
|
||||
}
|
||||
printf("inplace-escape self-test OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Default User-Agent: honest HTTrack token, no resurrected Windows 98. */
|
||||
static int st_useragent(httrackp *opt, int argc, char **argv) {
|
||||
const char *ua = StringBuff(opt->user_agent);
|
||||
@@ -1621,6 +1740,10 @@ static const struct selftest_entry {
|
||||
{"dns", "", "DNS resolver/cache self-test", st_dns},
|
||||
{"cookies", "", "cookie request-header self-test", st_cookies},
|
||||
{"useragent", "", "default User-Agent self-test", st_useragent},
|
||||
{"makeindex", "[dir]", "hts_finish_makeindex footer/refresh self-test",
|
||||
st_makeindex},
|
||||
{"inplace-escape", "", "inplace_escape_* vs escape_* equivalence self-test",
|
||||
st_inplace_escape},
|
||||
{"status", "", "HTTP status code -> reason phrase self-test", st_status},
|
||||
{"acceptencoding", "[dir]",
|
||||
"Accept-Encoding advertises gzip+deflate, both decode", st_acceptencoding},
|
||||
|
||||
@@ -121,9 +121,6 @@ struct String {
|
||||
/** Byte at POS (read/write). No bounds check; POS must be < StringLength. **/
|
||||
#define StringSubRW(BLK, POS) (StringBuffRW(BLK)[POS])
|
||||
|
||||
/** Subcharacter (read/write) **/
|
||||
#define StringSubRW(BLK, POS) (StringBuffRW(BLK)[POS])
|
||||
|
||||
/** Byte POS positions from the end (read). POS==1 is the last byte. **/
|
||||
#define StringRight(BLK, POS) (StringBuff(BLK)[StringLength(BLK) - POS])
|
||||
|
||||
@@ -191,8 +188,9 @@ HTS_STATIC char *StringBuffN_(String *blk, int size) {
|
||||
asserts SIZE fits the existing content; does not (re)allocate. **/
|
||||
#define StringSetLength(BLK, SIZE) \
|
||||
do { \
|
||||
if (SIZE >= 0) { \
|
||||
(BLK).length_ = SIZE; \
|
||||
const int len__ = (SIZE); /* signed: negative means strlen(buffer_) */ \
|
||||
if (len__ >= 0) { \
|
||||
(BLK).length_ = len__; \
|
||||
} else { \
|
||||
(BLK).length_ = strlen((BLK).buffer_); \
|
||||
} \
|
||||
@@ -308,10 +306,11 @@ HTS_STATIC void StringAttach(String *blk, char **str) {
|
||||
#define StringCatN(BLK, STR, SIZE) \
|
||||
do { \
|
||||
const char *str__ = (STR); \
|
||||
const size_t usize__ = (SIZE); \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
if (size__ > (SIZE)) { \
|
||||
size__ = (SIZE); \
|
||||
if (size__ > usize__) { \
|
||||
size__ = usize__; \
|
||||
} \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} \
|
||||
|
||||
7
tests/01_engine-inplace-escape.test
Executable file
7
tests/01_engine-inplace-escape.test
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# inplace_escape_*() must match escape_*() on a copy: guards the shared helper.
|
||||
httrack -O /dev/null -#test=inplace-escape run | grep -q "inplace-escape self-test OK"
|
||||
12
tests/01_engine-makeindex.test
Executable file
12
tests/01_engine-makeindex.test
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# hts_finish_makeindex writes the footer and gates the refresh meta on a single
|
||||
# first link (guards the macro->function extraction).
|
||||
dir=$(mktemp -d)
|
||||
trap 'rm -rf "$dir"' EXIT
|
||||
|
||||
httrack -O /dev/null -#test=makeindex "$dir" run |
|
||||
grep -q "makeindex self-test OK"
|
||||
@@ -36,6 +36,8 @@ TESTS = \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
01_engine-idna.test \
|
||||
01_engine-inplace-escape.test \
|
||||
01_engine-makeindex.test \
|
||||
01_engine-mime.test \
|
||||
01_engine-parse.test \
|
||||
01_engine-pause.test \
|
||||
|
||||
Reference in New Issue
Block a user