mirror of
https://github.com/xroche/httrack.git
synced 2026-07-02 23:24:03 +03:00
Compare commits
3 Commits
phase0-net
...
htsparse-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b804ee2da1 | ||
|
|
20317cb85b | ||
|
|
98e382390b |
@@ -478,4 +478,8 @@ void voidf(void);
|
||||
/* HTML marker comment marking where the top index is spliced. */
|
||||
#define HTS_TOPINDEX "TOP_INDEX_HTTRACK"
|
||||
|
||||
/* Worst-case byte expansion HT_ADD_HTMLESCAPED* must reserve per escaper. */
|
||||
#define HTS_HTMLESCAPE_MAXEXP 5 /* escape_for_html_print: '&'->"&" */
|
||||
#define HTS_HTMLESCAPE_FULL_MAXEXP 6 /* _full: high byte->"&#xHH;" */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -77,13 +77,14 @@ Please visit our Website: http://www.httrack.com
|
||||
/** Append to the output buffer the string 'A'. **/
|
||||
#define HT_ADD(A) TypedArrayAppend(output_buffer, A, strlen(A))
|
||||
|
||||
/** Append to the output buffer the string 'A', html-escaped. **/
|
||||
#define HT_ADD_HTMLESCAPED_ANY(A, FUNCTION) do { \
|
||||
/* clang-format off: an edit realigns all backslashes, churning the macro. */
|
||||
/* clang-format off */
|
||||
/** Append 'A' to the output buffer, html-escaped; FACTOR = max byte expansion. **/
|
||||
#define HT_ADD_HTMLESCAPED_ANY(A, FUNCTION, FACTOR) do { \
|
||||
if ((opt->getmode & 1) != 0 && ptr>0) { \
|
||||
const char *const str_ = (A); \
|
||||
size_t size_; \
|
||||
/* & is the maximum expansion */ \
|
||||
TypedArrayEnsureRoom(output_buffer, strlen(str_) * 5 + 1024); \
|
||||
TypedArrayEnsureRoom(output_buffer, strlen(str_) * (FACTOR) + 1024); \
|
||||
size_ = FUNCTION(str_, &TypedArrayTail(output_buffer), \
|
||||
TypedArrayRoom(output_buffer)); \
|
||||
TypedArraySize(output_buffer) += size_; \
|
||||
@@ -91,17 +92,22 @@ Please visit our Website: http://www.httrack.com
|
||||
} while(0)
|
||||
|
||||
/** Append to the output buffer the string 'A', html-escaped for &. **/
|
||||
#define HT_ADD_HTMLESCAPED(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print)
|
||||
#define HT_ADD_HTMLESCAPED(A) \
|
||||
HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print, HTS_HTMLESCAPE_MAXEXP)
|
||||
|
||||
/**
|
||||
* Append to the output buffer the string 'A', html-escaped for & and
|
||||
* Append to the output buffer the string 'A', html-escaped for & and
|
||||
* high chars.
|
||||
**/
|
||||
#define HT_ADD_HTMLESCAPED_FULL(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print_full)
|
||||
#define HT_ADD_HTMLESCAPED_FULL(A) \
|
||||
HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print_full, HTS_HTMLESCAPE_FULL_MAXEXP)
|
||||
/* clang-format on */
|
||||
|
||||
// does nothing
|
||||
#define XH_uninit do {} while(0)
|
||||
|
||||
/* clang-format off: an edit realigns all backslashes, churning the macro. */
|
||||
/* clang-format off */
|
||||
#define HT_ADD_END { \
|
||||
int ok=0;\
|
||||
if (TypedArraySize(output_buffer) != 0) { \
|
||||
@@ -123,6 +129,7 @@ Please visit our Website: http://www.httrack.com
|
||||
} else {\
|
||||
ok=0;\
|
||||
} \
|
||||
freet(mbuff);\
|
||||
}\
|
||||
if (!ok) { \
|
||||
file_notify(opt,urladr(), urlfil(), savename(), 1, 1, r->notmodified); \
|
||||
@@ -165,6 +172,7 @@ Please visit our Website: http://www.httrack.com
|
||||
} \
|
||||
TypedArrayFree(output_buffer); \
|
||||
}
|
||||
/* clang-format on */
|
||||
#define HT_ADD_FOP
|
||||
|
||||
#define ENGINE_DEFINE_CONTEXT() \
|
||||
@@ -193,6 +201,9 @@ Please visit our Website: http://www.httrack.com
|
||||
HTS_UNUSED TStamp makestat_time = stre->makestat_time; \
|
||||
HTS_UNUSED FILE* makestat_fp = stre->makestat_fp
|
||||
|
||||
/* clang-format off: an edit realigns all backslashes, churning the macro. */
|
||||
/* clang-format off */
|
||||
/* Load-once: re-reading resets makestat_time (mutated locally, never SAVEd). */
|
||||
#define ENGINE_SET_CONTEXT() \
|
||||
ENGINE_SET_CONTEXT_BASE(); \
|
||||
/* */ \
|
||||
@@ -203,9 +214,8 @@ Please visit our Website: http://www.httrack.com
|
||||
makeindex_fp = *stre->makeindex_fp_; \
|
||||
makeindex_links = *stre->makeindex_links_; \
|
||||
/* */ \
|
||||
stat_fragment = *stre->stat_fragment_; \
|
||||
makestat_time = stre->makestat_time; \
|
||||
makestat_fp = stre->makestat_fp
|
||||
stat_fragment = *stre->stat_fragment_
|
||||
/* clang-format on */
|
||||
|
||||
#define ENGINE_LOAD_CONTEXT() \
|
||||
ENGINE_DEFINE_CONTEXT()
|
||||
|
||||
@@ -1424,6 +1424,50 @@ static int st_inplace_escape(httrackp *opt, int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Pin HTS_HTMLESCAPE*_MAXEXP to each escaper's true max byte expansion. */
|
||||
static int st_escape_room(httrackp *opt, int argc, char **argv) {
|
||||
/* N > 1023: where 6n outgrows the old 5n+1024 reservation */
|
||||
enum { N = 2000 };
|
||||
|
||||
char *src = malloct(N + 1);
|
||||
char *dst;
|
||||
size_t room, got;
|
||||
(void) opt;
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
/* _full worst case: a high byte expands to "&#xHH;" (6 bytes) */
|
||||
memset(src, 0xE9, N);
|
||||
src[N] = '\0';
|
||||
room = (size_t) N * HTS_HTMLESCAPE_FULL_MAXEXP + 1024;
|
||||
dst = malloct(room);
|
||||
got = escape_for_html_print_full(src, dst, room);
|
||||
assertf(got == (size_t) N * HTS_HTMLESCAPE_FULL_MAXEXP);
|
||||
assertf(strlen(dst) == got);
|
||||
freet(dst);
|
||||
|
||||
/* one factor short overflows (returns size), truncating the page: the bug */
|
||||
room = (size_t) N * (HTS_HTMLESCAPE_FULL_MAXEXP - 1) + 1024;
|
||||
dst = malloct(room);
|
||||
got = escape_for_html_print_full(src, dst, room);
|
||||
assertf(got == room);
|
||||
freet(dst);
|
||||
|
||||
/* plain escaper worst case: '&' -> "&" (5); high bytes stay verbatim */
|
||||
memset(src, '&', N);
|
||||
src[N] = '\0';
|
||||
room = (size_t) N * HTS_HTMLESCAPE_MAXEXP + 1024;
|
||||
dst = malloct(room);
|
||||
got = escape_for_html_print(src, dst, room);
|
||||
assertf(got == (size_t) N * HTS_HTMLESCAPE_MAXEXP);
|
||||
assertf(strlen(dst) == got);
|
||||
freet(dst);
|
||||
|
||||
freet(src);
|
||||
printf("escape-room self-test OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Default User-Agent: honest HTTrack token, no resurrected Windows 98. */
|
||||
static int st_useragent(httrackp *opt, int argc, char **argv) {
|
||||
const char *ua = StringBuff(opt->user_agent);
|
||||
@@ -1744,6 +1788,8 @@ static const struct selftest_entry {
|
||||
st_makeindex},
|
||||
{"inplace-escape", "", "inplace_escape_* vs escape_* equivalence self-test",
|
||||
st_inplace_escape},
|
||||
{"escape-room", "", "HT_ADD_HTMLESCAPED* reservation-factor self-test",
|
||||
st_escape_room},
|
||||
{"status", "", "HTTP status code -> reason phrase self-test", st_status},
|
||||
{"acceptencoding", "[dir]",
|
||||
"Accept-Encoding advertises gzip+deflate, both decode", st_acceptencoding},
|
||||
|
||||
7
tests/01_engine-escape-room.test
Normal file
7
tests/01_engine-escape-room.test
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# HT_ADD_HTMLESCAPED* must reserve the escaper's worst case (6 for _full).
|
||||
httrack -O /dev/null -#test=escape-room run | grep -q "escape-room self-test OK"
|
||||
@@ -36,6 +36,7 @@ TESTS = \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
01_engine-idna.test \
|
||||
01_engine-escape-room.test \
|
||||
01_engine-inplace-escape.test \
|
||||
01_engine-makeindex.test \
|
||||
01_engine-mime.test \
|
||||
|
||||
Reference in New Issue
Block a user