mirror of
https://github.com/xroche/httrack.git
synced 2026-06-18 00:04:12 +03:00
Compare commits
4 Commits
tests/stri
...
tests/cach
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8246c7bbcd | ||
|
|
f13f90e9c4 | ||
|
|
c177923fa1 | ||
|
|
7091f85104 |
@@ -41,25 +41,6 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
|
||||
|
||||
// COPY OF cmdl_ins in htscoremain.c
|
||||
/* Bytes left in x_argvblk from offset ptr. The offset can in principle outrun
|
||||
the block (alias/doit.log expansion), so the copy aborts cleanly instead of
|
||||
the subtraction wrapping to a huge unbounded size. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
// Insert a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 1); \
|
||||
argc++
|
||||
// END OF COPY OF cmdl_ins in htscoremain.c
|
||||
|
||||
/*
|
||||
Aliases for command-line and config file definitions
|
||||
These definitions can be used:
|
||||
|
||||
@@ -52,6 +52,34 @@ const char *opttype_value(int p);
|
||||
const char *opthelp_value(int p);
|
||||
const char *hts_gethome(void);
|
||||
void expand_home(String * str);
|
||||
|
||||
/* Command-line argv-block builders, shared by htscoremain.c (the CLI parser)
|
||||
and htsalias.c (config-file alias expansion). Tokens are packed back-to-back
|
||||
into x_argvblk (total capacity bufsize); each argv[] entry points into the
|
||||
block. cmdl_room bounds every copy: the running offset ptr can outrun the
|
||||
block (alias / doit.log expansion outpacing the +32768 slack), so it yields
|
||||
0 rather than a wrapped size_t and the bounded copy aborts cleanly. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
|
||||
/* Append a token as a new argv[argc]. */
|
||||
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
|
||||
argv[argc] = (buff + ptr); \
|
||||
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[argc]) + 1); \
|
||||
argc++
|
||||
|
||||
/* Insert a token at argv[0], shifting the existing argc entries up by one. */
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 1); \
|
||||
argc++
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -52,6 +52,9 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#define SELFTEST_VOLUME 3000 /* number of small entries in the scale pass */
|
||||
|
||||
/* prefix on assertion failures; set per entry point (-#A vs -#B) */
|
||||
static const char *selftest_tag = "cache-selftest";
|
||||
|
||||
/* Open a cache session. A write session (ro=0) rotates new.zip -> old.zip and
|
||||
opens a fresh new.zip; a read session (ro=1) opens new.zip in place. */
|
||||
static void selftest_open(cache_back *cache, httrackp *opt, int ro) {
|
||||
@@ -95,15 +98,13 @@ static void selftest_close(cache_back *cache) {
|
||||
that exits right after (same choice as the other -# cache subcommands) */
|
||||
}
|
||||
|
||||
/* Store one entry. The body is copied into a private buffer (any size), so
|
||||
callers may pass const data and cache_add never sees a cast-away qualifier;
|
||||
it consumes everything synchronously, so the copy is freed on return. */
|
||||
/* Store one entry; the body is copied so callers may pass const data. */
|
||||
static void store_entry(httrackp *opt, cache_back *cache, const char *adr,
|
||||
const char *fil, const char *save, int statuscode,
|
||||
const char *msg, const char *contenttype,
|
||||
const char *charset, const char *lastmodified,
|
||||
const char *etag, const char *location,
|
||||
const char *body, size_t body_len) {
|
||||
const char *cdispo, const char *body, size_t body_len) {
|
||||
htsblk r;
|
||||
char locbuf[HTS_URLMAXSIZE * 2];
|
||||
char *bodycopy = NULL;
|
||||
@@ -116,32 +117,30 @@ static void store_entry(httrackp *opt, cache_back *cache, const char *adr,
|
||||
strcpybuff(r.charset, charset);
|
||||
strcpybuff(r.lastmodified, lastmodified);
|
||||
strcpybuff(r.etag, etag);
|
||||
strcpybuff(r.cdispo, cdispo);
|
||||
strcpybuff(locbuf, location);
|
||||
r.location = locbuf;
|
||||
r.is_write = 0;
|
||||
/* an empty body must be a NULL pointer: cache_add rejects a non-NULL
|
||||
pointer with size 0 */
|
||||
/* an empty body must be NULL: cache_add rejects non-NULL with size 0 */
|
||||
if (body_len != 0) {
|
||||
bodycopy = malloct(body_len);
|
||||
memcpy(bodycopy, body, body_len);
|
||||
r.adr = bodycopy;
|
||||
}
|
||||
/* all_in_cache=1: keep the body in the ZIP whatever the content-type,
|
||||
so the read path never depends on a file on disk */
|
||||
/* all_in_cache=1: body stays in the ZIP, so reads never need a disk file */
|
||||
cache_add(opt, cache, &r, adr, fil, save, 1, NULL);
|
||||
if (bodycopy != NULL) {
|
||||
freet(bodycopy);
|
||||
}
|
||||
}
|
||||
|
||||
/* Read one entry back and check every field. Returns the number of
|
||||
mismatches (0 == success). */
|
||||
/* Read one entry back and check every field. Returns the mismatch count. */
|
||||
static int check_entry(httrackp *opt, cache_back *cache, const char *adr,
|
||||
const char *fil, int statuscode, const char *msg,
|
||||
const char *contenttype, const char *charset,
|
||||
const char *lastmodified, const char *etag,
|
||||
const char *location, const char *body,
|
||||
size_t body_len) {
|
||||
const char *location, const char *cdispo,
|
||||
const char *body, size_t body_len) {
|
||||
int fail = 0;
|
||||
char *locbuf = malloct(HTS_URLMAXSIZE * 2);
|
||||
htsblk r;
|
||||
@@ -153,15 +152,14 @@ static int check_entry(httrackp *opt, cache_back *cache, const char *adr,
|
||||
#define CHECK_STR(field, want) \
|
||||
do { \
|
||||
if (strcmp((field), (want)) != 0) { \
|
||||
fprintf(stderr, \
|
||||
"cache-selftest: %s%s: " #field " is '%s', expected '%s'\n", \
|
||||
adr, fil, (field), (want)); \
|
||||
fprintf(stderr, "%s: %s%s: " #field " is '%s', expected '%s'\n", \
|
||||
selftest_tag, adr, fil, (field), (want)); \
|
||||
fail++; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
if (r.statuscode != statuscode) {
|
||||
fprintf(stderr, "cache-selftest: %s%s: statuscode is %d, expected %d\n",
|
||||
fprintf(stderr, "%s: %s%s: statuscode is %d, expected %d\n", selftest_tag,
|
||||
adr, fil, r.statuscode, statuscode);
|
||||
fail++;
|
||||
}
|
||||
@@ -171,24 +169,23 @@ static int check_entry(httrackp *opt, cache_back *cache, const char *adr,
|
||||
CHECK_STR(r.lastmodified, lastmodified);
|
||||
CHECK_STR(r.etag, etag);
|
||||
CHECK_STR(locbuf, location);
|
||||
CHECK_STR(r.cdispo, cdispo);
|
||||
|
||||
if (r.size != (LLint) body_len) {
|
||||
fprintf(stderr, "cache-selftest: %s%s: size is " LLintP ", expected %d\n",
|
||||
fprintf(stderr, "%s: %s%s: size is " LLintP ", expected %d\n", selftest_tag,
|
||||
adr, fil, (LLint) r.size, (int) body_len);
|
||||
fail++;
|
||||
} else if (body_len != 0 &&
|
||||
(r.adr == NULL || memcmp(r.adr, body, body_len) != 0)) {
|
||||
fprintf(stderr, "cache-selftest: %s%s: body mismatch\n", adr, fil);
|
||||
fprintf(stderr, "%s: %s%s: body mismatch\n", selftest_tag, adr, fil);
|
||||
fail++;
|
||||
}
|
||||
|
||||
/* The loaded body must be NUL-terminated at [size]: cache_readex's strlen()
|
||||
consumers (htscore.c:1046, htscache.c) rely on it, and a missing
|
||||
terminator is a heap over-read. The buffer is malloc(size + slack), so
|
||||
reading [size] is in bounds. */
|
||||
/* loaded body must be NUL-terminated at [size] for cache_readex's strlen()
|
||||
consumers; buffer is malloc(size + slack) so [size] is in bounds */
|
||||
if (r.adr != NULL && r.adr[r.size] != '\0') {
|
||||
fprintf(stderr, "cache-selftest: %s%s: body not NUL-terminated at [size]\n",
|
||||
adr, fil);
|
||||
fprintf(stderr, "%s: %s%s: body not NUL-terminated at [size]\n",
|
||||
selftest_tag, adr, fil);
|
||||
fail++;
|
||||
}
|
||||
|
||||
@@ -378,27 +375,29 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
/* pass 1: create everything in a single write session */
|
||||
selftest_open_for_write(&cache, opt);
|
||||
|
||||
/* edge cases: normal HTML page */
|
||||
/* edge cases (cdispo "" except where noted): normal HTML page */
|
||||
store_entry(opt, &cache, "example.com", "/", "example.com/index.html", 200,
|
||||
"OK", "text/html", "utf-8", "Mon, 01 Jan 2024 00:00:00 GMT",
|
||||
"etag-normal", "", body_index, strlen(body_index));
|
||||
"etag-normal", "", "", body_index, strlen(body_index));
|
||||
/* redirect: empty body, empty optional fields, near-limit location */
|
||||
store_entry(opt, &cache, "example.com", "/moved", "example.com/moved.html",
|
||||
301, "Moved Permanently", "text/html", "", "", "", location_long,
|
||||
NULL, 0);
|
||||
/* non-HTML content-type kept in cache via all_in_cache, near-limit etag */
|
||||
"", NULL, 0);
|
||||
/* non-HTML content-type, near-limit etag */
|
||||
store_entry(opt, &cache, "example.com", "/api", "example.com/api.json", 200,
|
||||
"OK", "application/json", "utf-8",
|
||||
"Tue, 02 Jan 2024 12:00:00 GMT", etag_long, "", body_api,
|
||||
"Tue, 02 Jan 2024 12:00:00 GMT", etag_long, "", "", body_api,
|
||||
strlen(body_api));
|
||||
/* binary body */
|
||||
/* binary body, with a Content-Disposition */
|
||||
store_entry(opt, &cache, "example.com", "/logo", "example.com/logo.png", 200,
|
||||
"OK", "image/png", "", "", "etag-bin", "", binary_body,
|
||||
"OK", "image/png", "", "", "etag-bin", "",
|
||||
"attachment; filename=\"logo.png\"", binary_body,
|
||||
sizeof(binary_body));
|
||||
/* error status with a body and a location (non-2xx codes are cached too) */
|
||||
/* error status with a body and a location */
|
||||
store_entry(opt, &cache, "example.com", "/gone", "example.com/gone.html", 404,
|
||||
"Not Found", "text/html", "utf-8", "", "etag-404",
|
||||
"https://example.com/where-it-went", body_404, strlen(body_404));
|
||||
"https://example.com/where-it-went", "", body_404,
|
||||
strlen(body_404));
|
||||
|
||||
/* scale: a few thousand small entries */
|
||||
for (i = 0; i < SELFTEST_VOLUME; i++) {
|
||||
@@ -408,7 +407,7 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
sprintf(save, "example.com/v/%05d.html", i);
|
||||
sprintf(body, "<html>volume entry %d</html>", i);
|
||||
store_entry(opt, &cache, "example.com", fil, save, 200, "OK", "text/html",
|
||||
"utf-8", "", "", "", body, strlen(body));
|
||||
"utf-8", "", "", "", "", body, strlen(body));
|
||||
}
|
||||
|
||||
/* compression: a few large bodies */
|
||||
@@ -418,7 +417,7 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
sprintf(fil, "/big/%d.bin", i);
|
||||
sprintf(save, "example.com/big/%d.bin", i);
|
||||
store_entry(opt, &cache, "example.com", fil, save, 200, "OK",
|
||||
"application/octet-stream", "", "", "", "", large_body[i],
|
||||
"application/octet-stream", "", "", "", "", "", large_body[i],
|
||||
large_size[i]);
|
||||
}
|
||||
|
||||
@@ -427,22 +426,24 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
/* pass 2: read back and verify everything round-tripped */
|
||||
selftest_open_for_read(&cache, opt);
|
||||
|
||||
failures += check_entry(opt, &cache, "example.com", "/", 200, "OK",
|
||||
"text/html", "utf-8", "Mon, 01 Jan 2024 00:00:00 GMT",
|
||||
"etag-normal", "", body_index, strlen(body_index));
|
||||
failures +=
|
||||
check_entry(opt, &cache, "example.com", "/", 200, "OK", "text/html",
|
||||
"utf-8", "Mon, 01 Jan 2024 00:00:00 GMT", "etag-normal", "",
|
||||
"", body_index, strlen(body_index));
|
||||
failures += check_entry(opt, &cache, "example.com", "/moved", 301,
|
||||
"Moved Permanently", "text/html", "", "", "",
|
||||
location_long, NULL, 0);
|
||||
location_long, "", NULL, 0);
|
||||
failures +=
|
||||
check_entry(opt, &cache, "example.com", "/api", 200, "OK",
|
||||
"application/json", "utf-8", "Tue, 02 Jan 2024 12:00:00 GMT",
|
||||
etag_long, "", body_api, strlen(body_api));
|
||||
etag_long, "", "", body_api, strlen(body_api));
|
||||
failures +=
|
||||
check_entry(opt, &cache, "example.com", "/logo", 200, "OK", "image/png",
|
||||
"", "", "etag-bin", "", binary_body, sizeof(binary_body));
|
||||
"", "", "etag-bin", "", "attachment; filename=\"logo.png\"",
|
||||
binary_body, sizeof(binary_body));
|
||||
failures += check_entry(opt, &cache, "example.com", "/gone", 404, "Not Found",
|
||||
"text/html", "utf-8", "", "etag-404",
|
||||
"https://example.com/where-it-went", body_404,
|
||||
"https://example.com/where-it-went", "", body_404,
|
||||
strlen(body_404));
|
||||
|
||||
for (i = 0; i < SELFTEST_VOLUME; i++) {
|
||||
@@ -452,7 +453,7 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
sprintf(body, "<html>volume entry %d</html>", i);
|
||||
failures +=
|
||||
check_entry(opt, &cache, "example.com", fil, 200, "OK", "text/html",
|
||||
"utf-8", "", "", "", body, strlen(body));
|
||||
"utf-8", "", "", "", "", body, strlen(body));
|
||||
}
|
||||
|
||||
for (i = 0; i < large_count; i++) {
|
||||
@@ -460,7 +461,7 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
|
||||
sprintf(fil, "/big/%d.bin", i);
|
||||
failures += check_entry(opt, &cache, "example.com", fil, 200, "OK",
|
||||
"application/octet-stream", "", "", "", "",
|
||||
"application/octet-stream", "", "", "", "", "",
|
||||
large_body[i], large_size[i]);
|
||||
}
|
||||
|
||||
@@ -470,7 +471,7 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
selftest_open_for_write(&cache, opt);
|
||||
store_entry(opt, &cache, "example.com", "/", "example.com/index.html", 200,
|
||||
"OK", "text/html", "iso-8859-1", "Wed, 03 Jan 2024 09:30:00 GMT",
|
||||
"etag-updated", "", body_updated, strlen(body_updated));
|
||||
"etag-updated", "", "", body_updated, strlen(body_updated));
|
||||
selftest_close(&cache);
|
||||
|
||||
/* pass 4: re-read and confirm the updated value, not the old one */
|
||||
@@ -478,7 +479,7 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
failures +=
|
||||
check_entry(opt, &cache, "example.com", "/", 200, "OK", "text/html",
|
||||
"iso-8859-1", "Wed, 03 Jan 2024 09:30:00 GMT", "etag-updated",
|
||||
"", body_updated, strlen(body_updated));
|
||||
"", "", body_updated, strlen(body_updated));
|
||||
selftest_close(&cache);
|
||||
|
||||
/* pass 5: the disk-fallback read path (X-In-Cache: 0, body on disk) */
|
||||
@@ -490,3 +491,97 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
/* Golden fixture: a small frozen cache read back to guard the read path and ZIP
|
||||
format. The table is the contract; tests/fixtures/cache-golden/.../new.zip is
|
||||
a witness written once via `httrack -#B <dir> regen`. Bodies stay in the ZIP
|
||||
(all_in_cache=1), so a read needs only new.zip -- fully portable. */
|
||||
|
||||
/* embedded NUL + high bytes: the binary-safe read path */
|
||||
static const char golden_binary[] = {
|
||||
'P', 'N', 'G', '\0', '\r', '\n', (char) 0xFF, (char) 0x80,
|
||||
'\0', '\0', 'e', 'n', 'd', (char) 0xCA, (char) 0xFE, '\n'};
|
||||
|
||||
typedef struct {
|
||||
const char *adr, *fil, *save, *msg, *contenttype, *charset, *lastmodified,
|
||||
*etag, *location, *cdispo, *body;
|
||||
size_t body_len;
|
||||
int statuscode;
|
||||
} golden_entry;
|
||||
|
||||
/* string-literal body + length (drops the terminator); the binary array passes
|
||||
its length explicitly, every byte counts */
|
||||
#define GBODY(s) (s), (sizeof(s) - 1)
|
||||
|
||||
static const golden_entry golden_entries[] = {
|
||||
/* normal HTML page */
|
||||
{"example.com", "/", "example.com/index.html", "OK", "text/html", "utf-8",
|
||||
"Mon, 01 Jan 2024 00:00:00 GMT", "etag-normal", "", "",
|
||||
GBODY("<html><body>hello</body></html>"), 200},
|
||||
/* redirect: empty body and optionals, a Location */
|
||||
{"example.com", "/moved", "example.com/moved.html", "Moved Permanently",
|
||||
"text/html", "", "", "", "https://example.com/new-home", "", NULL, 0, 301},
|
||||
/* non-HTML content */
|
||||
{"example.com", "/api", "example.com/api.json", "OK", "application/json",
|
||||
"utf-8", "Tue, 02 Jan 2024 12:00:00 GMT", "etag-api", "", "",
|
||||
GBODY("{\"k\":\"v\"}"), 200},
|
||||
/* binary body with a Content-Disposition */
|
||||
{"example.com", "/logo", "example.com/logo.png", "OK", "image/png", "", "",
|
||||
"etag-bin", "", "attachment; filename=\"logo.png\"", golden_binary,
|
||||
sizeof(golden_binary), 200},
|
||||
/* error status with a body and a Location */
|
||||
{"example.com", "/gone", "example.com/gone.html", "Not Found", "text/html",
|
||||
"utf-8", "", "etag-404", "https://example.com/where-it-went", "",
|
||||
GBODY("<html><body>404 Not Found</body></html>"), 404},
|
||||
};
|
||||
|
||||
#define GOLDEN_COUNT (sizeof(golden_entries) / sizeof(golden_entries[0]))
|
||||
|
||||
static void golden_setup(httrackp *opt, const char *dir) {
|
||||
char base[HTS_URLMAXSIZE];
|
||||
|
||||
strcpybuff(base, dir);
|
||||
if (base[0] != '\0' && base[strlen(base) - 1] != '/') {
|
||||
strcatbuff(base, "/");
|
||||
}
|
||||
StringCopy(opt->path_log, base);
|
||||
StringCopy(opt->path_html, base);
|
||||
StringCopy(opt->path_html_utf8, base);
|
||||
opt->cache = 1;
|
||||
}
|
||||
|
||||
int cache_golden_selftest(httrackp *opt, const char *dir, int regen) {
|
||||
int failures = 0;
|
||||
size_t k;
|
||||
cache_back cache;
|
||||
|
||||
selftest_tag = "cache-golden";
|
||||
golden_setup(opt, dir);
|
||||
|
||||
/* regen rewrites the fixture from the table; the test never passes it, so the
|
||||
read pass verifies bytes a previous build froze */
|
||||
if (regen) {
|
||||
selftest_open_for_write(&cache, opt);
|
||||
for (k = 0; k < GOLDEN_COUNT; k++) {
|
||||
const golden_entry *e = &golden_entries[k];
|
||||
|
||||
store_entry(opt, &cache, e->adr, e->fil, e->save, e->statuscode, e->msg,
|
||||
e->contenttype, e->charset, e->lastmodified, e->etag,
|
||||
e->location, e->cdispo, e->body, e->body_len);
|
||||
}
|
||||
selftest_close(&cache);
|
||||
}
|
||||
|
||||
selftest_open_for_read(&cache, opt);
|
||||
for (k = 0; k < GOLDEN_COUNT; k++) {
|
||||
const golden_entry *e = &golden_entries[k];
|
||||
|
||||
failures +=
|
||||
check_entry(opt, &cache, e->adr, e->fil, e->statuscode, e->msg,
|
||||
e->contenttype, e->charset, e->lastmodified, e->etag,
|
||||
e->location, e->cdispo, e->body, e->body_len);
|
||||
}
|
||||
selftest_close(&cache);
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
@@ -44,6 +44,14 @@ typedef struct httrackp httrackp;
|
||||
Returns the number of failed checks (0 == success). */
|
||||
int cache_selftests(httrackp *opt, const char *dir);
|
||||
|
||||
/* Read a committed (frozen) cache fixture under <dir>/hts-cache/new.zip and
|
||||
assert a fixed set of entries decodes field- and byte-exact. Unlike
|
||||
cache_selftests (write-then-read with the same build, a round-trip), this
|
||||
reads bytes an earlier build froze, so it catches read-path / format drift.
|
||||
regen!=0 first rewrites the fixture from the same table (to regenerate the
|
||||
committed file, never by the test). Returns the failed-check count. */
|
||||
int cache_golden_selftest(httrackp *opt, const char *dir, int regen);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -69,31 +69,6 @@ Please visit our Website: http://www.httrack.com
|
||||
/* Resolver */
|
||||
extern int IPV6_resolver;
|
||||
|
||||
/* Remaining room in the argv block; 0 once it is exhausted (alias expansion or
|
||||
doit.log insertion can outrun the +32768 slack), so the copy aborts cleanly
|
||||
instead of the subtraction wrapping to a huge unbounded size. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
|
||||
// Add a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
|
||||
argv[argc] = (buff + ptr); \
|
||||
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[argc]) + 2); \
|
||||
argc++
|
||||
|
||||
// Insert a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 2); \
|
||||
argc++
|
||||
|
||||
#define htsmain_free() do { \
|
||||
if (url != NULL) { \
|
||||
free(url); \
|
||||
@@ -2465,6 +2440,22 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'B': // golden cache fixture read: httrack -#B <dir> [regen]
|
||||
if (na + 1 < argc) {
|
||||
const int regen =
|
||||
(na + 2 < argc && strcmp(argv[na + 2], "regen") == 0);
|
||||
const int err =
|
||||
cache_golden_selftest(opt, argv[na + 1], regen);
|
||||
|
||||
printf("cache-golden: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
} else {
|
||||
fprintf(stderr, "Option #B requires a directory argument\n");
|
||||
htsmain_free();
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
|
||||
{
|
||||
int hasFilter = 0;
|
||||
|
||||
48
tests/01_engine-cache-golden.test
Normal file
48
tests/01_engine-cache-golden.test
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Keep this POSIX-portable: the harness runs it via $(BASH), which is a plain
|
||||
# POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms and GNU-only
|
||||
# tool flags despite the #!/bin/bash above.
|
||||
|
||||
# Golden cache-format regression test (driven by 'httrack -#B <dir>').
|
||||
#
|
||||
# 01_engine-cache.test writes the cache with the same build it reads back (a
|
||||
# round-trip), so it cannot catch a read-path or ZIP-format regression where
|
||||
# writer and reader drift together. This reads a *committed* cache frozen by an
|
||||
# earlier build and asserts a fixed set of entries still decodes field- and
|
||||
# byte-exact.
|
||||
#
|
||||
# Regenerate the fixture after a deliberate format change with
|
||||
# 'httrack -#B <dir> regen', then copy <dir>/hts-cache/new.zip over the
|
||||
# committed file.
|
||||
|
||||
set -eu
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
fixture="$top_srcdir/tests/fixtures/cache-golden"
|
||||
|
||||
test -e "$fixture/hts-cache/new.zip" || {
|
||||
echo "missing committed cache fixture: $fixture/hts-cache/new.zip" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
dir=$(mktemp -d)
|
||||
trap 'rm -rf "$dir"' EXIT
|
||||
|
||||
# Read against a private copy so the source tree is never touched (a read
|
||||
# session does not write, but copying keeps the test hermetic). Create the dir
|
||||
# with mkdir so it is writable for the cleanup trap: under "make distcheck" the
|
||||
# srcdir is read-only, and "cp -r" of that directory would carry its read-only
|
||||
# mode over and defeat the rm -rf.
|
||||
mkdir -p "$dir/hts-cache"
|
||||
cp "$fixture/hts-cache/new.zip" "$dir/hts-cache/new.zip"
|
||||
|
||||
out=$(httrack -#B "$dir")
|
||||
|
||||
# Match the exact success line: the read must have found and verified every
|
||||
# entry, not merely failed to enter the mode (a bad -#B falls through to the
|
||||
# usage screen, which also exits non-zero but never prints this).
|
||||
test "$out" = "cache-golden: OK" || {
|
||||
echo "expected 'cache-golden: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
92
tests/01_engine-doitlog.test
Normal file
92
tests/01_engine-doitlog.test
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# doit.log reprise (no network). Re-running httrack in a mirror directory with
|
||||
# NO url makes the engine read hts-cache/doit.log and re-insert every recorded
|
||||
# argument into the command line through htscoremain.c's cmdl_ins macro (the
|
||||
# x_argvblk builder). That path is distinct from the rc-file one in
|
||||
# 01_engine-rcfile.test (htsalias.c) and from the url-on-command-line update in
|
||||
# 02_update-cache.test, and nothing else exercises it. Two properties:
|
||||
# 1. A multi-token reprise re-mirrors cleanly: every token (the url and each
|
||||
# option) survives the back-to-back packing, so the no-url run reproduces
|
||||
# the file set with no errors. A packing/bound bug corrupts a later token
|
||||
# and surfaces as an error or a missing file.
|
||||
# 2. The reprise actually re-crawls through the inserted url: changing a source
|
||||
# file and re-running with no url picks up the new content.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Resolve httrack to an absolute path before we cd: PATH may hold a
|
||||
# build-relative entry that would not resolve from the temp directory.
|
||||
bin=$(command -v httrack) || {
|
||||
echo "FAIL: httrack not found on PATH"
|
||||
exit 1
|
||||
}
|
||||
case "$bin" in
|
||||
/*) ;;
|
||||
*) bin="$(cd "$(dirname "$bin")" && pwd)/$(basename "$bin")" ;;
|
||||
esac
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_doitlog.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
site="$tmp/site"
|
||||
out="$tmp/out"
|
||||
mkdir -p "$site/sub"
|
||||
printf '<a href="a.html">a</a> <a href="sub/b.html">b</a>' >"$site/index.html"
|
||||
echo 'OLDCONTENT' >"$site/a.html"
|
||||
echo '<p>bbb</p>' >"$site/sub/b.html"
|
||||
url="file://$site/index.html"
|
||||
|
||||
# count Error: lines in the log (grep -c exits 1 on zero matches: guard it)
|
||||
errors() { grep -ciE '^[0-9:]*[[:space:]]Error:' "$out/hts-log.txt" || true; }
|
||||
|
||||
# initial mirror with the url and a handful of options, so doit.log records a
|
||||
# multi-token command line for cmdl_ins to re-insert one token at a time.
|
||||
rc=0
|
||||
"$bin" "$url" -O "$out" --quiet -n -%v0 -r3 >/dev/null 2>&1 || rc=$?
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: initial mirror exited $rc"
|
||||
exit 1
|
||||
}
|
||||
test -f "$out/hts-cache/doit.log" || {
|
||||
echo "FAIL: doit.log not written by the initial mirror"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# --- 1. no-url reprise re-mirrors cleanly -----------------------------------
|
||||
# No url on the command line, so the engine loads doit.log and re-inserts the
|
||||
# recorded arguments (cmdl_ins). -O selects the mirror; argv carries no url.
|
||||
rc=0
|
||||
"$bin" -O "$out" --quiet >/dev/null 2>&1 || rc=$?
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: doit.log reprise exited $rc"
|
||||
exit 1
|
||||
}
|
||||
test "$(errors)" = 0 || {
|
||||
echo "FAIL: doit.log reprise reported errors (a token may have been corrupted)"
|
||||
grep -iE 'Error:' "$out/hts-log.txt" | head -3
|
||||
exit 1
|
||||
}
|
||||
for suffix in a.html sub/b.html; do
|
||||
test -n "$(find "$out" -path "*/$suffix" -print -quit)" || {
|
||||
echo "FAIL: $suffix missing after the no-url reprise"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
|
||||
# --- 2. the reprise re-crawls through the inserted url -----------------------
|
||||
sleep 1
|
||||
echo 'NEWCONTENT' >"$site/a.html"
|
||||
rc=0
|
||||
"$bin" -O "$out" --quiet >/dev/null 2>&1 || rc=$?
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: second reprise exited $rc"
|
||||
exit 1
|
||||
}
|
||||
grep -q NEWCONTENT "$(find "$out" -path '*/a.html' -print -quit)" || {
|
||||
echo "FAIL: reprise did not pick up the changed source (inserted url not re-crawled)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
exit 0
|
||||
@@ -1,4 +1,8 @@
|
||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh
|
||||
# Committed binary fixture read by 01_engine-cache-golden.test. List it
|
||||
# explicitly: automake does not expand wildcards in EXTRA_DIST, so a glob would
|
||||
# silently drop it from the dist tarball and break "make distcheck".
|
||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||
fixtures/cache-golden/hts-cache/new.zip
|
||||
|
||||
TESTS_ENVIRONMENT =
|
||||
TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
|
||||
@@ -17,8 +21,10 @@ TEST_LOG_COMPILER = $(BASH)
|
||||
TESTS = \
|
||||
00_runnable.test \
|
||||
01_engine-cache.test \
|
||||
01_engine-cache-golden.test \
|
||||
01_engine-charset.test \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
|
||||
BIN
tests/fixtures/cache-golden/hts-cache/new.zip
vendored
Normal file
BIN
tests/fixtures/cache-golden/hts-cache/new.zip
vendored
Normal file
Binary file not shown.
Reference in New Issue
Block a user