mirror of
https://github.com/xroche/httrack.git
synced 2026-07-05 16:44:55 +03:00
Compare commits
10 Commits
3.49.11
...
p2-4-cache
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf5ccdc65b | ||
|
|
715ef6c069 | ||
|
|
6eb5ec4a57 | ||
|
|
107e92a558 | ||
|
|
c089612e2e | ||
|
|
0ad0019716 | ||
|
|
a6ed343dfe | ||
|
|
f785286c87 | ||
|
|
440a8603a9 | ||
|
|
4979e58dc0 |
9
debian/changelog
vendored
9
debian/changelog
vendored
@@ -10,6 +10,15 @@ httrack (3.49.11-1) unstable; urgency=medium
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 05 Jul 2026 00:03:18 +0200
|
||||
|
||||
httrack (3.49.10-2) unstable; urgency=medium
|
||||
|
||||
* Fix FTBFS: tests/28_local-pause failed instead of skipping when python3 is
|
||||
absent (the local-server tests need python3, which the buildds lack). Add
|
||||
patches/skip-local-pause-test-without-python3.patch to guard the test on
|
||||
python3 up front, like its siblings, so it skips cleanly.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 28 Jun 2026 20:18:46 +0200
|
||||
|
||||
httrack (3.49.10-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release: new download-pacing and URL-handling options plus a
|
||||
|
||||
@@ -40,6 +40,7 @@ Please visit our Website: http://www.httrack.com
|
||||
#include "htscore.h"
|
||||
#include "htsbasenet.h"
|
||||
#include "htsmd5.h"
|
||||
#include <limits.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "htszlib.h"
|
||||
@@ -768,6 +769,15 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
strlcpybuff(return_save, previous_save, HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
|
||||
/* A tampered X-Size must be rejected before the size-driven malloc.
|
||||
The alloc casts to int (malloct((int) r.size + 1)), so bound it to
|
||||
[0, INT_MAX): a negative value, or a positive one whose (int) cast
|
||||
truncates negative, would otherwise wrap to a huge allocation. */
|
||||
if (r.size < 0 || r.size >= INT_MAX) {
|
||||
r.statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(r.msg, "Cache Read Error : Bad Size");
|
||||
}
|
||||
|
||||
/* Complete fields */
|
||||
r.totalsize = r.size;
|
||||
r.adr = NULL;
|
||||
@@ -794,7 +804,8 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
} // otherwise, the ZIP file is supposed to be consistent with data.
|
||||
}
|
||||
/* Read data ? */
|
||||
else { /* ne pas lire uniquement header */
|
||||
else if (r.statuscode !=
|
||||
STATUSCODE_INVALID) { /* ne pas lire uniquement header */
|
||||
int ok = 0;
|
||||
|
||||
#if HTS_DIRECTDISK
|
||||
@@ -1420,6 +1431,86 @@ static int hts_rename(httrackp * opt, const char *a, const char *b) {
|
||||
return rename(a, b);
|
||||
}
|
||||
|
||||
/* Pathname of a file inside the mirror dir (rotating concat buffer). */
|
||||
static char *reconcile_path(httrackp *opt, const char *name) {
|
||||
return fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), name);
|
||||
}
|
||||
|
||||
/* Interrupted-run heuristic: prefer the old generation when the new cache
|
||||
stalled below NEW_TINY while the old one grew past OLD_SOLID (historical
|
||||
arbitrary thresholds). */
|
||||
#define CACHE_RECONCILE_NEW_TINY 32768
|
||||
#define CACHE_RECONCILE_OLD_SOLID 65536
|
||||
|
||||
/* Replace the new-generation file by the old one, when the old one exists. */
|
||||
static void reconcile_promote(httrackp *opt, const char *oldname,
|
||||
const char *newname) {
|
||||
if (fexist(reconcile_path(opt, oldname))) {
|
||||
remove(reconcile_path(opt, newname));
|
||||
rename(reconcile_path(opt, oldname), reconcile_path(opt, newname));
|
||||
}
|
||||
}
|
||||
|
||||
void hts_cache_reconcile(httrackp *opt, hts_cache_reconcile_mode mode) {
|
||||
switch (mode) {
|
||||
case CACHE_RECONCILE_PROMOTE:
|
||||
/* Previous run rotated new.* to old.* then died before writing: promote
|
||||
the old generation back, whichever format it uses. */
|
||||
if (!fexist(reconcile_path(opt, "hts-cache/new.zip")))
|
||||
reconcile_promote(opt, "hts-cache/old.zip", "hts-cache/new.zip");
|
||||
if ((!fexist(reconcile_path(opt, "hts-cache/new.dat")) ||
|
||||
!fexist(reconcile_path(opt, "hts-cache/new.ndx"))) &&
|
||||
fexist(reconcile_path(opt, "hts-cache/old.dat")) &&
|
||||
fexist(reconcile_path(opt, "hts-cache/old.ndx"))) {
|
||||
reconcile_promote(opt, "hts-cache/old.dat", "hts-cache/new.dat");
|
||||
reconcile_promote(opt, "hts-cache/old.ndx", "hts-cache/new.ndx");
|
||||
}
|
||||
break;
|
||||
case CACHE_RECONCILE_INTERRUPTED:
|
||||
/* Aborted run: keep the larger generation when the new cache is
|
||||
suspiciously small next to the old one. The new file must exist: fsize()
|
||||
is -1 for a missing file, which would spuriously pass the "< TINY" test
|
||||
and overwrite a solid old generation that PROMOTE/ROLLBACK should keep.
|
||||
*/
|
||||
if (!opt->cache || !fexist(reconcile_path(opt, "hts-in_progress.lock")))
|
||||
break;
|
||||
if (fexist(reconcile_path(opt, "hts-cache/new.zip")) &&
|
||||
fexist(reconcile_path(opt, "hts-cache/old.zip")) &&
|
||||
fsize(reconcile_path(opt, "hts-cache/new.zip")) <
|
||||
CACHE_RECONCILE_NEW_TINY &&
|
||||
fsize(reconcile_path(opt, "hts-cache/old.zip")) >
|
||||
CACHE_RECONCILE_OLD_SOLID &&
|
||||
fsize(reconcile_path(opt, "hts-cache/old.zip")) >
|
||||
fsize(reconcile_path(opt, "hts-cache/new.zip")))
|
||||
reconcile_promote(opt, "hts-cache/old.zip", "hts-cache/new.zip");
|
||||
if (fexist(reconcile_path(opt, "hts-cache/new.dat")) &&
|
||||
fexist(reconcile_path(opt, "hts-cache/old.dat")) &&
|
||||
fexist(reconcile_path(opt, "hts-cache/old.ndx")) &&
|
||||
fsize(reconcile_path(opt, "hts-cache/new.dat")) <
|
||||
CACHE_RECONCILE_NEW_TINY &&
|
||||
fsize(reconcile_path(opt, "hts-cache/old.dat")) >
|
||||
CACHE_RECONCILE_OLD_SOLID &&
|
||||
fsize(reconcile_path(opt, "hts-cache/old.dat")) >
|
||||
fsize(reconcile_path(opt, "hts-cache/new.dat"))) {
|
||||
reconcile_promote(opt, "hts-cache/old.dat", "hts-cache/new.dat");
|
||||
reconcile_promote(opt, "hts-cache/old.ndx", "hts-cache/new.ndx");
|
||||
}
|
||||
break;
|
||||
case CACHE_RECONCILE_ROLLBACK:
|
||||
/* Nothing transferred: restore the previous generation and sidecars. */
|
||||
reconcile_promote(opt, "hts-cache/old.zip", "hts-cache/new.zip");
|
||||
if (fexist(reconcile_path(opt, "hts-cache/old.dat")) &&
|
||||
fexist(reconcile_path(opt, "hts-cache/old.ndx"))) {
|
||||
reconcile_promote(opt, "hts-cache/old.dat", "hts-cache/new.dat");
|
||||
reconcile_promote(opt, "hts-cache/old.ndx", "hts-cache/new.ndx");
|
||||
}
|
||||
reconcile_promote(opt, "hts-cache/old.lst", "hts-cache/new.lst");
|
||||
reconcile_promote(opt, "hts-cache/old.txt", "hts-cache/new.txt");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// renvoyer uniquement en tête, ou NULL si erreur
|
||||
// return NULL upon error, and set -1 to r.statuscode
|
||||
htsblk *cache_header(httrackp * opt, cache_back * cache, const char *adr,
|
||||
|
||||
@@ -78,6 +78,17 @@ htsblk *cache_header(httrackp * opt, cache_back * cache, const char *adr,
|
||||
const char *fil, htsblk * r);
|
||||
void cache_init(cache_back * cache, httrackp * opt);
|
||||
|
||||
/* Which hts-cache/ generation (new.* vs old.*) is authoritative. */
|
||||
typedef enum {
|
||||
CACHE_RECONCILE_PROMOTE, /* no new cache: promote the old generation */
|
||||
CACHE_RECONCILE_INTERRUPTED, /* aborted run: keep the larger generation */
|
||||
CACHE_RECONCILE_ROLLBACK /* nothing transferred: restore the old one */
|
||||
} hts_cache_reconcile_mode;
|
||||
|
||||
/* Reconcile the on-disk cache generations according to mode; a no-op when
|
||||
the involved files are absent. */
|
||||
void hts_cache_reconcile(httrackp *opt, hts_cache_reconcile_mode mode);
|
||||
|
||||
int cache_writedata(FILE * cache_ndx, FILE * cache_dat, const char *str1,
|
||||
const char *str2, char *outbuff, int len);
|
||||
int cache_readdata(cache_back * cache, const char *str1, const char *str2,
|
||||
|
||||
@@ -716,3 +716,398 @@ int cache_golden_selftest(httrackp *opt, const char *dir, int regen) {
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
/* --- hts_cache_reconcile() policies -------------------------------------- */
|
||||
|
||||
/* All reconcile inputs/outputs, wiped between cases. */
|
||||
static const char *const reconcile_files[] = {
|
||||
"hts-cache/new.zip", "hts-cache/old.zip", "hts-cache/new.dat",
|
||||
"hts-cache/old.dat", "hts-cache/new.ndx", "hts-cache/old.ndx",
|
||||
"hts-cache/new.lst", "hts-cache/old.lst", "hts-cache/new.txt",
|
||||
"hts-cache/old.txt", "hts-in_progress.lock"};
|
||||
|
||||
static char *reconcile_st_path(httrackp *opt, const char *name) {
|
||||
return fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), name);
|
||||
}
|
||||
|
||||
static void reconcile_wipe(httrackp *opt) {
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < sizeof(reconcile_files) / sizeof(reconcile_files[0]); i++)
|
||||
remove(reconcile_st_path(opt, reconcile_files[i]));
|
||||
}
|
||||
|
||||
/* Create a filler file of exactly `size` bytes. */
|
||||
static void reconcile_put(httrackp *opt, const char *name, size_t size) {
|
||||
FILE *const fp = fopen(reconcile_st_path(opt, name), "wb");
|
||||
static const char filler[1024] = {'x'};
|
||||
|
||||
assertf(fp != NULL);
|
||||
while (size > 0) {
|
||||
const size_t n = size > sizeof(filler) ? sizeof(filler) : size;
|
||||
|
||||
assertf(fwrite(filler, 1, n, fp) == n);
|
||||
size -= n;
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/* Expect `name` to weigh `size` bytes, or be absent when size == -1. */
|
||||
static int reconcile_expect(httrackp *opt, const char *name, off_t size,
|
||||
const char *what) {
|
||||
const off_t got = fsize(reconcile_st_path(opt, name));
|
||||
|
||||
if (got != size) {
|
||||
fprintf(stderr, "cache-reconcile: %s: %s is %d bytes, expected %d\n", what,
|
||||
name, (int) got, (int) size);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cache_reconcile_selftest(httrackp *opt, const char *dir) {
|
||||
int failures = 0;
|
||||
|
||||
/* around the interrupted-run thresholds (new < 32768, old > 65536) */
|
||||
static const off_t TINY = 1024, MID = 40000, SOLID = 131072;
|
||||
|
||||
golden_setup(opt, dir);
|
||||
#ifdef _WIN32
|
||||
mkdir(reconcile_st_path(opt, "hts-cache"));
|
||||
#else
|
||||
mkdir(reconcile_st_path(opt, "hts-cache"), HTS_PROTECT_FOLDER);
|
||||
#endif
|
||||
|
||||
/* PROMOTE: a zip old generation replaces a missing new one */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_PROMOTE);
|
||||
failures += reconcile_expect(opt, "hts-cache/new.zip", SOLID, "promote-zip");
|
||||
failures += reconcile_expect(opt, "hts-cache/old.zip", -1, "promote-zip");
|
||||
|
||||
/* PROMOTE: an existing new.zip is left alone */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.zip", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_PROMOTE);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.zip", TINY, "promote-zip-noop");
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/old.zip", SOLID, "promote-zip-noop");
|
||||
|
||||
/* PROMOTE: a pure-legacy old generation is promoted too (was dead when no
|
||||
zip cache existed) */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/old.dat", SOLID);
|
||||
reconcile_put(opt, "hts-cache/old.ndx", TINY);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_PROMOTE);
|
||||
failures += reconcile_expect(opt, "hts-cache/new.dat", SOLID, "promote-dat");
|
||||
failures += reconcile_expect(opt, "hts-cache/new.ndx", TINY, "promote-dat");
|
||||
failures += reconcile_expect(opt, "hts-cache/old.dat", -1, "promote-dat");
|
||||
|
||||
/* PROMOTE: a half-written legacy new pair is replaced by the old pair */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.dat", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.dat", SOLID);
|
||||
reconcile_put(opt, "hts-cache/old.ndx", TINY);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_PROMOTE);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.dat", SOLID, "promote-dat-partial");
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.ndx", TINY, "promote-dat-partial");
|
||||
|
||||
/* INTERRUPTED: no lock file, no action */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.zip", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.zip", TINY, "interrupted-nolock");
|
||||
|
||||
/* INTERRUPTED: an absent new.zip must NOT promote old.zip (fsize(-1) would
|
||||
spuriously pass "< TINY"); leave the solid old generation for ROLLBACK */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-in_progress.lock", 0);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.zip", -1, "interrupted-nonew");
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/old.zip", SOLID, "interrupted-nonew");
|
||||
|
||||
/* INTERRUPTED: stalled tiny new.zip loses to a solid old.zip (was dead for
|
||||
zip caches: the arm was gated on a legacy new.dat) */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-in_progress.lock", 0);
|
||||
reconcile_put(opt, "hts-cache/new.zip", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.zip", SOLID, "interrupted-zip");
|
||||
failures += reconcile_expect(opt, "hts-cache/old.zip", -1, "interrupted-zip");
|
||||
|
||||
/* INTERRUPTED: old below the confidence threshold, keep new */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-in_progress.lock", 0);
|
||||
reconcile_put(opt, "hts-cache/new.zip", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.zip", MID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.zip", TINY, "interrupted-smallold");
|
||||
|
||||
/* INTERRUPTED: new big enough to trust, keep it */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-in_progress.lock", 0);
|
||||
reconcile_put(opt, "hts-cache/new.zip", MID);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.zip", MID, "interrupted-bignew");
|
||||
|
||||
/* INTERRUPTED: the legacy pair follows the same size rule (was dead code) */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-in_progress.lock", 0);
|
||||
reconcile_put(opt, "hts-cache/new.dat", TINY);
|
||||
reconcile_put(opt, "hts-cache/new.ndx", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.dat", SOLID);
|
||||
reconcile_put(opt, "hts-cache/old.ndx", MID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.dat", SOLID, "interrupted-dat");
|
||||
failures +=
|
||||
reconcile_expect(opt, "hts-cache/new.ndx", MID, "interrupted-dat");
|
||||
|
||||
/* ROLLBACK: the old zip generation is restored (a zip cache used to lose
|
||||
its only good generation here) */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.zip", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.zip", SOLID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_ROLLBACK);
|
||||
failures += reconcile_expect(opt, "hts-cache/new.zip", SOLID, "rollback-zip");
|
||||
failures += reconcile_expect(opt, "hts-cache/old.zip", -1, "rollback-zip");
|
||||
|
||||
/* ROLLBACK: sidecars are restored regardless of format */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.lst", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.lst", MID);
|
||||
reconcile_put(opt, "hts-cache/old.txt", MID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_ROLLBACK);
|
||||
failures += reconcile_expect(opt, "hts-cache/new.lst", MID, "rollback-lst");
|
||||
failures += reconcile_expect(opt, "hts-cache/new.txt", MID, "rollback-txt");
|
||||
|
||||
/* ROLLBACK: full legacy generation incl. sidecars (historical behavior) */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.dat", TINY);
|
||||
reconcile_put(opt, "hts-cache/new.ndx", TINY);
|
||||
reconcile_put(opt, "hts-cache/old.dat", SOLID);
|
||||
reconcile_put(opt, "hts-cache/old.ndx", MID);
|
||||
reconcile_put(opt, "hts-cache/old.lst", MID);
|
||||
reconcile_put(opt, "hts-cache/old.txt", MID);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_ROLLBACK);
|
||||
failures += reconcile_expect(opt, "hts-cache/new.dat", SOLID, "rollback-dat");
|
||||
failures += reconcile_expect(opt, "hts-cache/new.ndx", MID, "rollback-dat");
|
||||
failures += reconcile_expect(opt, "hts-cache/new.lst", MID, "rollback-dat");
|
||||
failures += reconcile_expect(opt, "hts-cache/new.txt", MID, "rollback-dat");
|
||||
|
||||
/* ROLLBACK: nothing to restore, the new generation stays */
|
||||
reconcile_wipe(opt);
|
||||
reconcile_put(opt, "hts-cache/new.zip", TINY);
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_ROLLBACK);
|
||||
failures += reconcile_expect(opt, "hts-cache/new.zip", TINY, "rollback-noop");
|
||||
|
||||
reconcile_wipe(opt);
|
||||
return failures;
|
||||
}
|
||||
|
||||
/* --- read-side corruption injection --------------------------------------- */
|
||||
|
||||
/* canary read back intact after each corruption; victim gets the byte surgery
|
||||
*/
|
||||
#define CORRUPT_ADR "corrupt.example.com"
|
||||
static char corrupt_body_a[33 + 1];
|
||||
static char corrupt_body_b[44 + 1];
|
||||
|
||||
/* Write a fresh two-entry cache: /canary.html then /victim.html. */
|
||||
static void corrupt_build(httrackp *opt) {
|
||||
cache_back cache;
|
||||
|
||||
memset(corrupt_body_a, 'a', sizeof(corrupt_body_a) - 1);
|
||||
memset(corrupt_body_b, 'b', sizeof(corrupt_body_b) - 1);
|
||||
remove(reconcile_st_path(opt, "hts-cache/new.zip"));
|
||||
remove(reconcile_st_path(opt, "hts-cache/old.zip"));
|
||||
selftest_open_for_write(&cache, opt);
|
||||
store_entry(opt, &cache, CORRUPT_ADR, "/canary.html", "canary.html", 200,
|
||||
"OK", "text/html", "utf-8", "", "", "", "", corrupt_body_a,
|
||||
strlen(corrupt_body_a));
|
||||
store_entry(opt, &cache, CORRUPT_ADR, "/victim.html", "victim.html", 200,
|
||||
"OK", "text/html", "utf-8", "", "", "", "", corrupt_body_b,
|
||||
strlen(corrupt_body_b));
|
||||
selftest_close(&cache);
|
||||
}
|
||||
|
||||
/* Like corrupt_build, but the victim carries a 20-char Etag whose header line
|
||||
is later overwritten with a forged oversized X-Size (same byte length). */
|
||||
static void corrupt_build_etag(httrackp *opt) {
|
||||
cache_back cache;
|
||||
|
||||
memset(corrupt_body_a, 'a', sizeof(corrupt_body_a) - 1);
|
||||
memset(corrupt_body_b, 'b', sizeof(corrupt_body_b) - 1);
|
||||
remove(reconcile_st_path(opt, "hts-cache/new.zip"));
|
||||
remove(reconcile_st_path(opt, "hts-cache/old.zip"));
|
||||
selftest_open_for_write(&cache, opt);
|
||||
store_entry(opt, &cache, CORRUPT_ADR, "/canary.html", "canary.html", 200,
|
||||
"OK", "text/html", "utf-8", "", "", "", "", corrupt_body_a,
|
||||
strlen(corrupt_body_a));
|
||||
store_entry(opt, &cache, CORRUPT_ADR, "/victim.html", "victim.html", 200,
|
||||
"OK", "text/html", "utf-8", "", "AAAAAAAAAAAAAAAAAAAA", "", "",
|
||||
corrupt_body_b, strlen(corrupt_body_b));
|
||||
selftest_close(&cache);
|
||||
}
|
||||
|
||||
/* Patch the nth of total occurrences of pat (same-length rep) in new.zip. */
|
||||
static void corrupt_patch(httrackp *opt, const char *pat, size_t patlen,
|
||||
const char *rep, size_t nth, size_t total) {
|
||||
LLint fsz = 0;
|
||||
char *data = readfile2(reconcile_st_path(opt, "hts-cache/new.zip"), &fsz);
|
||||
const size_t n = (size_t) fsz;
|
||||
size_t k, hits = 0, at = 0;
|
||||
FILE *fp;
|
||||
|
||||
assertf(data != NULL);
|
||||
for (k = 0; k + patlen <= n; k++) {
|
||||
if (memcmp(data + k, pat, patlen) == 0) {
|
||||
hits++;
|
||||
if (hits == nth)
|
||||
at = k;
|
||||
}
|
||||
}
|
||||
assertf(hits == total);
|
||||
memcpy(data + at, rep, patlen);
|
||||
fp = fopen(reconcile_st_path(opt, "hts-cache/new.zip"), "wb");
|
||||
assertf(fp != NULL);
|
||||
assertf(fwrite(data, 1, n, fp) == n);
|
||||
fclose(fp);
|
||||
freet(data);
|
||||
}
|
||||
|
||||
/* Garbage the first bytes of the victim's deflated data (2nd local header). */
|
||||
static void corrupt_victim_body(httrackp *opt) {
|
||||
LLint fsz = 0;
|
||||
char *data = readfile2(reconcile_st_path(opt, "hts-cache/new.zip"), &fsz);
|
||||
const size_t n = (size_t) fsz;
|
||||
size_t k, hits = 0, off = 0;
|
||||
FILE *fp;
|
||||
|
||||
assertf(data != NULL);
|
||||
for (k = 0; k + 4 <= n; k++) {
|
||||
if (memcmp(data + k, "PK\x03\x04", 4) == 0 && ++hits == 2) {
|
||||
const size_t namelen =
|
||||
(unsigned char) data[k + 26] | ((unsigned char) data[k + 27] << 8);
|
||||
const size_t extralen =
|
||||
(unsigned char) data[k + 28] | ((unsigned char) data[k + 29] << 8);
|
||||
|
||||
off = k + 30 + namelen + extralen;
|
||||
}
|
||||
}
|
||||
assertf(hits == 2);
|
||||
assertf(off != 0 && off + 4 <= n);
|
||||
memset(data + off, 0xFF, 4);
|
||||
fp = fopen(reconcile_st_path(opt, "hts-cache/new.zip"), "wb");
|
||||
assertf(fp != NULL);
|
||||
assertf(fwrite(data, 1, n, fp) == n);
|
||||
fclose(fp);
|
||||
freet(data);
|
||||
}
|
||||
|
||||
/* Read the corrupt /victim.html and, in the SAME read session, the intact
|
||||
/canary.html: the victim must be rejected (wantmsg pins which path) and the
|
||||
canary must still decode byte-exact, proving one bad entry never taints a
|
||||
sibling read. */
|
||||
static int corrupt_expect_victim(httrackp *opt, const char *wantmsg,
|
||||
const char *what) {
|
||||
cache_back cache;
|
||||
htsblk v, c;
|
||||
char BIGSTK lv[HTS_URLMAXSIZE * 2];
|
||||
char BIGSTK lc[HTS_URLMAXSIZE * 2];
|
||||
int fail = 0;
|
||||
|
||||
selftest_open_for_read(&cache, opt);
|
||||
lv[0] = lc[0] = '\0';
|
||||
v = cache_readex(opt, &cache, CORRUPT_ADR, "/victim.html", "", lv, NULL, 1);
|
||||
if (v.statuscode != STATUSCODE_INVALID) {
|
||||
fprintf(stderr, "%s: %s: victim: statuscode is %d, expected %d\n",
|
||||
selftest_tag, what, v.statuscode, STATUSCODE_INVALID);
|
||||
fail++;
|
||||
}
|
||||
if (wantmsg != NULL && strcmp(v.msg, wantmsg) != 0) {
|
||||
fprintf(stderr, "%s: %s: victim: msg is '%s', expected '%s'\n",
|
||||
selftest_tag, what, v.msg, wantmsg);
|
||||
fail++;
|
||||
}
|
||||
c = cache_readex(opt, &cache, CORRUPT_ADR, "/canary.html", "", lc, NULL, 1);
|
||||
if (c.statuscode != 200 || c.adr == NULL ||
|
||||
c.size != (LLint) strlen(corrupt_body_a) ||
|
||||
memcmp(c.adr, corrupt_body_a, strlen(corrupt_body_a)) != 0) {
|
||||
fprintf(stderr, "%s: %s: canary tainted (status %d)\n", selftest_tag, what,
|
||||
c.statuscode);
|
||||
fail++;
|
||||
}
|
||||
if (v.adr != NULL)
|
||||
freet(v.adr);
|
||||
if (c.adr != NULL)
|
||||
freet(c.adr);
|
||||
selftest_close(&cache);
|
||||
return fail;
|
||||
}
|
||||
|
||||
/* One zip corruption case: build, patch, then check victim+canary in-session.
|
||||
*/
|
||||
static int corrupt_case_zip(httrackp *opt, const char *pat, const char *rep,
|
||||
size_t nth, size_t total, const char *wantmsg,
|
||||
const char *what) {
|
||||
corrupt_build(opt);
|
||||
corrupt_patch(opt, pat, strlen(pat), rep, nth, total);
|
||||
return corrupt_expect_victim(opt, wantmsg, what);
|
||||
}
|
||||
|
||||
int cache_corruption_selftest(httrackp *opt, const char *dir) {
|
||||
int failures = 0;
|
||||
|
||||
selftest_tag = "cache-corrupt";
|
||||
golden_setup(opt, dir);
|
||||
|
||||
failures +=
|
||||
corrupt_case_zip(opt, "X-Size: 44", "X-Size: 99", 1, 1,
|
||||
"Cache Read Error : Read Data", "oversized X-Size");
|
||||
failures +=
|
||||
corrupt_case_zip(opt, "X-Size: 44", "X-Size: -4", 1, 1,
|
||||
"Cache Read Error : Bad Size", "negative X-Size");
|
||||
/* both entries carry the line; the victim's is the second */
|
||||
failures += corrupt_case_zip(opt, "X-In-Cache: 1", "X-In-Cache: 0", 2, 2,
|
||||
"Previous cache file not found (empty filename)",
|
||||
"blanked X-In-Cache");
|
||||
/* smashed local file header: the entry is dropped at index load */
|
||||
failures +=
|
||||
corrupt_case_zip(opt, "PK\x03\x04", "XK\x03\x04", 2, 2,
|
||||
"File Cache Entry Not Found", "smashed local header");
|
||||
|
||||
corrupt_build(opt);
|
||||
corrupt_victim_body(opt);
|
||||
failures += corrupt_expect_victim(opt, "Cache Read Error : Read Data",
|
||||
"garbled deflate stream");
|
||||
|
||||
/* An X-Size above INT_MAX is positive as int64 (slips a bare sign check) but
|
||||
truncates negative in the (int) cast the malloc uses: a wraparound alloc.
|
||||
cache_add asserts size fits an int, so such a value only reaches the reader
|
||||
from a corrupt/foreign cache; inject it by overwriting the victim's long
|
||||
Etag line with a same-length forged X-Size line (the parser keeps the last
|
||||
X-Size it sees), keeping the zip byte-length and offsets intact. */
|
||||
corrupt_build_etag(opt);
|
||||
corrupt_patch(opt, "Etag: AAAAAAAAAAAAAAAAAAAA", 26,
|
||||
"X-Size: 2147483648AAAAAAAA", 1, 1);
|
||||
failures += corrupt_expect_victim(opt, "Cache Read Error : Bad Size",
|
||||
"X-Size above INT_MAX");
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
@@ -56,6 +56,15 @@ int cache_golden_selftest(httrackp *opt, const char *dir, int regen);
|
||||
crashing. Returns the failed-check count. */
|
||||
int cache_write_failure_selftest(httrackp *opt, const char *dir);
|
||||
|
||||
/* Exercise the hts_cache_reconcile() generation policies on file fixtures
|
||||
under <dir>. Returns the failed-check count. */
|
||||
int cache_reconcile_selftest(httrackp *opt, const char *dir);
|
||||
|
||||
/* Inject read-side corruption (zip byte surgery: bad size, header, deflate)
|
||||
under <dir> and assert every case degrades to STATUSCODE_INVALID without
|
||||
tainting a sibling entry. */
|
||||
int cache_corruption_selftest(httrackp *opt, const char *dir);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2137,47 +2137,7 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
hts_log_print(opt, LOG_NOTICE,
|
||||
"No data seems to have been transferred during this session! : restoring previous one!");
|
||||
XH_uninit;
|
||||
if ((fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log), "hts-cache/old.dat")))
|
||||
&&
|
||||
(fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.ndx")))) {
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"));
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"));
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.lst"));
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.txt"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.dat"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.ndx"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.lst"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.lst"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.txt"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.txt"));
|
||||
}
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_ROLLBACK);
|
||||
opt->state.exit_xh = 2; /* interrupted (no connection detected) */
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -544,69 +544,11 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
}
|
||||
}
|
||||
|
||||
// Existence d'un cache - pas de new mais un old.. renommer
|
||||
// No new cache but an old one? promote it
|
||||
#if DEBUG_STEPS
|
||||
printf("Checking cache\n");
|
||||
#endif
|
||||
if (!fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), "hts-cache/new.zip"))) {
|
||||
if (fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), "hts-cache/old.zip"))) {
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/old.zip"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.zip"));
|
||||
}
|
||||
} else
|
||||
if ((!fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), "hts-cache/new.dat")))
|
||||
||
|
||||
(!fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx")))) {
|
||||
if ((fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), "hts-cache/old.dat")))
|
||||
&&
|
||||
(fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/old.ndx")))) {
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"));
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"));
|
||||
//remove(fconcat(StringBuff(opt->path_log),"hts-cache/new.lst"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/old.dat"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.ndx"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"));
|
||||
//rename(fconcat(StringBuff(opt->path_log),"hts-cache/old.lst"),fconcat(StringBuff(opt->path_log),"hts-cache/new.lst"));
|
||||
}
|
||||
}
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_PROMOTE);
|
||||
|
||||
/* Interrupted mirror detected */
|
||||
if (!opt->quiet) {
|
||||
@@ -2554,109 +2496,8 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
printf("Cache & log settings\n");
|
||||
#endif
|
||||
|
||||
// on utilise le cache..
|
||||
// en cas de présence des deux versions, garder la version la plus avancée,
|
||||
// cad la version contenant le plus de fichiers
|
||||
if (opt->cache) {
|
||||
if (fexist(fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log), "hts-in_progress.lock"))) { // problemes..
|
||||
if (fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"))) {
|
||||
if (fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.zip"))) {
|
||||
if (fsize
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.zip")) < 32768) {
|
||||
if (fsize
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.zip")) > 65536) {
|
||||
if (fsize
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.zip")) > fsize(fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->
|
||||
path_log),
|
||||
"hts-cache/new.zip")))
|
||||
{
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.zip"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.zip"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.zip"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else
|
||||
if (fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"))
|
||||
&&
|
||||
fexist(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"))) {
|
||||
if (fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.dat"))
|
||||
&&
|
||||
fexist(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.ndx"))) {
|
||||
// switcher si new<32Ko et old>65Ko (tailles arbitraires) ?
|
||||
// ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre
|
||||
// alors l'ancien cache
|
||||
if (fsize
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat")) < 32768) {
|
||||
if (fsize
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.dat")) > 65536) {
|
||||
if (fsize
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.dat")) > fsize(fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->
|
||||
path_log),
|
||||
"hts-cache/new.dat")))
|
||||
{
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"));
|
||||
remove(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.dat"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.dat"));
|
||||
rename(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/old.ndx"), fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/new.ndx"));
|
||||
//} else { // ne rien faire
|
||||
// remove("hts-cache/old.dat");
|
||||
// remove("hts-cache/old.ndx");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If both cache generations exist, keep the most complete one
|
||||
hts_cache_reconcile(opt, CACHE_RECONCILE_INTERRUPTED);
|
||||
// Débuggage des en têtes
|
||||
if (_DEBUG_HEAD) {
|
||||
ioinfo =
|
||||
|
||||
@@ -1347,6 +1347,30 @@ static int st_cache_writefail(httrackp *opt, int argc, char **argv) {
|
||||
return err;
|
||||
}
|
||||
|
||||
static int st_cache_corrupt(httrackp *opt, int argc, char **argv) {
|
||||
int err;
|
||||
|
||||
if (argc < 1) {
|
||||
fprintf(stderr, "cache-corrupt: needs a directory\n");
|
||||
return 1;
|
||||
}
|
||||
err = cache_corruption_selftest(opt, argv[0]);
|
||||
printf("cache-corrupt: %s\n", err ? "FAIL" : "OK");
|
||||
return err;
|
||||
}
|
||||
|
||||
static int st_reconcile(httrackp *opt, int argc, char **argv) {
|
||||
int err;
|
||||
|
||||
if (argc < 1) {
|
||||
fprintf(stderr, "reconcile: needs a directory\n");
|
||||
return 1;
|
||||
}
|
||||
err = cache_reconcile_selftest(opt, argv[0]);
|
||||
printf("cache-reconcile: %s\n", err ? "FAIL" : "OK");
|
||||
return err;
|
||||
}
|
||||
|
||||
static int st_dns(httrackp *opt, int argc, char **argv) {
|
||||
const int err = dns_selftests(opt);
|
||||
|
||||
@@ -2119,6 +2143,10 @@ static const struct selftest_entry {
|
||||
st_cache_golden},
|
||||
{"cache-writefail", "<dir>", "cache write-failure handling self-test",
|
||||
st_cache_writefail},
|
||||
{"reconcile", "<dir>", "cache generation reconcile policy self-test",
|
||||
st_reconcile},
|
||||
{"cache-corrupt", "<dir>", "cache read-side corruption self-test",
|
||||
st_cache_corrupt},
|
||||
{"dns", "", "DNS resolver/cache self-test", st_dns},
|
||||
{"cookies", "", "cookie request-header self-test", st_cookies},
|
||||
{"useragent", "", "default User-Agent self-test", st_useragent},
|
||||
|
||||
17
tests/01_engine-reconcile.test
Normal file
17
tests/01_engine-reconcile.test
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Cache generation reconcile policies (httrack -#test=reconcile <dir>):
|
||||
# promote a stranded old generation, keep the larger one after an aborted
|
||||
# run, and restore the old one when an update transferred nothing.
|
||||
|
||||
set -eu
|
||||
|
||||
dir=$(mktemp -d)
|
||||
trap 'rm -rf "$dir"' EXIT
|
||||
|
||||
out=$(httrack -#test=reconcile "$dir")
|
||||
|
||||
test "$out" = "cache-reconcile: OK" || {
|
||||
echo "expected 'cache-reconcile: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
19
tests/01_zlib-cache-corrupt.test
Normal file
19
tests/01_zlib-cache-corrupt.test
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Read-side cache corruption (httrack -#test=cache-corrupt <dir>): zip byte
|
||||
# surgery (bad/oversized X-Size, blanked X-In-Cache, smashed header, garbled
|
||||
# deflate) must each be rejected per-entry, never crash, never taint the sibling.
|
||||
|
||||
set -eu
|
||||
|
||||
dir=$(mktemp -d)
|
||||
trap 'rm -rf "$dir"' EXIT
|
||||
|
||||
# the smashed-header case logs expected "Corrupted cache entry" warnings on
|
||||
# stdout; the verdict is the last line
|
||||
out=$(httrack -#test=cache-corrupt "$dir" 2>/dev/null | tail -n1)
|
||||
|
||||
test "$out" = "cache-corrupt: OK" || {
|
||||
echo "expected 'cache-corrupt: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
12
tests/37_local-cache-outage.test
Normal file
12
tests/37_local-cache-outage.test
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# An update run against a dead server must not destroy the cache: the no-data
|
||||
# rollback restores the previous hts-cache generation (zip caches lost it).
|
||||
|
||||
set -eu
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun-dead \
|
||||
--found 'simple/basic.html' \
|
||||
httrack 'BASEURL/simple/basic.html'
|
||||
14
tests/38_local-update-304.test
Normal file
14
tests/38_local-update-304.test
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# An all-304 update of a tiny site (headers under the 32K rollback threshold)
|
||||
# is a healthy run: it must not trip the no-data rollback as a fake outage.
|
||||
|
||||
set -eu
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun \
|
||||
--log-found 'no files updated' \
|
||||
--log-not-found 'No data seems to have been transferred' \
|
||||
--found 'mini304/index.html' --found 'mini304/page.html' \
|
||||
httrack 'BASEURL/mini304/index.html'
|
||||
@@ -48,6 +48,7 @@ TESTS = \
|
||||
01_engine-parse.test \
|
||||
01_engine-pause.test \
|
||||
01_engine-rcfile.test \
|
||||
01_engine-reconcile.test \
|
||||
01_engine-redirect.test \
|
||||
01_engine-relative.test \
|
||||
01_engine-robots.test \
|
||||
@@ -63,6 +64,7 @@ TESTS = \
|
||||
01_engine-useragent.test \
|
||||
01_zlib-acceptencoding.test \
|
||||
01_zlib-cache.test \
|
||||
01_zlib-cache-corrupt.test \
|
||||
01_zlib-cache-golden.test \
|
||||
01_zlib-cache-writefail.test \
|
||||
01_zlib-savename-cached.test \
|
||||
@@ -99,6 +101,8 @@ TESTS = \
|
||||
33_local-delayed.test \
|
||||
34_local-maxtime.test \
|
||||
35_local-maxsize.test \
|
||||
36_local-bigcrawl.test
|
||||
36_local-bigcrawl.test \
|
||||
37_local-cache-outage.test \
|
||||
38_local-update-304.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
# --cookie writes a Netscape cookies.txt (scoped to the discovered host:port,
|
||||
# which the ephemeral port forces into the cookie domain) and passes it to
|
||||
# httrack via --cookies-file, to exercise preloaded cookies.
|
||||
# --rerun-dead re-runs with the server stopped: the no-data rollback must
|
||||
# restore the previous hts-cache generation byte-identical.
|
||||
|
||||
set -u
|
||||
|
||||
@@ -37,6 +39,7 @@ key="${testdir}/server.key"
|
||||
tls=
|
||||
verbose=
|
||||
rerun=
|
||||
rerun_dead=
|
||||
tmpdir=
|
||||
serverpid=
|
||||
crawlpid=
|
||||
@@ -102,7 +105,8 @@ nargs=$#
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
case "${args[$pos]}" in
|
||||
--debug) verbose=1 ;;
|
||||
--rerun) rerun=1 ;; # run httrack a second time (update pass) before auditing
|
||||
--rerun) rerun=1 ;; # run httrack a second time (update pass) before auditing
|
||||
--rerun-dead) rerun_dead=1 ;; # re-run with the server stopped (cache rollback)
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
audit+=("--no-purge")
|
||||
@@ -241,6 +245,43 @@ if test -n "$rerun"; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- optional dead pass: server stopped, the cache must survive the rollback --
|
||||
if test -n "$rerun_dead"; then
|
||||
zip="${out}/hts-cache/new.zip"
|
||||
test -s "$zip" || die "no cache was written by the first pass"
|
||||
cp "$zip" "${tmpdir}/cache-before.zip"
|
||||
cp "${out}/hts-log.txt" "${tmpdir}/log-before.txt"
|
||||
kill "$serverpid" 2>/dev/null
|
||||
wait "$serverpid" 2>/dev/null
|
||||
serverpid=
|
||||
info "re-running httrack against the stopped server"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" \
|
||||
"${moreargs[@]}" "${hts[@]}" >"${log}.dead" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid" || true
|
||||
crawlpid=
|
||||
result "OK (dead pass ran)"
|
||||
# The dead pass must have gone through the no-data rollback, not bailed out
|
||||
# before the mirror loop (which would leave the cache trivially untouched).
|
||||
info "checking the dead pass hit the rollback"
|
||||
if grep -aq "No data seems to have been transferred" "${out}/hts-log.txt"; then
|
||||
result "OK"
|
||||
else
|
||||
result "rollback notice not found in hts-log.txt"
|
||||
exit 1
|
||||
fi
|
||||
info "checking the previous cache generation was restored"
|
||||
if cmp -s "$zip" "${tmpdir}/cache-before.zip" &&
|
||||
test ! -e "${out}/hts-cache/old.zip"; then
|
||||
result "OK"
|
||||
else
|
||||
result "new.zip differs from the pre-outage cache (or old.zip left behind)"
|
||||
exit 1
|
||||
fi
|
||||
# Audits below describe the healthy crawl, not the dead pass.
|
||||
cp "${tmpdir}/log-before.txt" "${out}/hts-log.txt"
|
||||
fi
|
||||
|
||||
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||
hostroot=
|
||||
for cand in "${out}/127.0.0.1_${port}" "${out}/127.0.0.1"; do
|
||||
|
||||
@@ -831,6 +831,16 @@ class Handler(SimpleHTTPRequestHandler):
|
||||
def route_redir_target(self):
|
||||
self.send_raw(b"<html><body>redirect target</body></html>\n", "text/html")
|
||||
|
||||
# --- /mini304/: tiny fully-cacheable site (an update gets only 304s) ---
|
||||
def route_mini304_index(self):
|
||||
self.big_send(
|
||||
b'<html><body>\n\t<a href="page.html">page</a>\n</body></html>\n',
|
||||
"text/html",
|
||||
)
|
||||
|
||||
def route_mini304_page(self):
|
||||
self.big_send(b"<html><body>tiny cacheable page</body></html>\n", "text/html")
|
||||
|
||||
# --- delayed-type degenerate paths (issues #5/#107) --------------------
|
||||
def route_delayed_index(self):
|
||||
self.send_html(
|
||||
@@ -993,6 +1003,8 @@ class Handler(SimpleHTTPRequestHandler):
|
||||
"/redir/index.html": route_redir_index,
|
||||
"/redir/go.php": route_redir_go,
|
||||
"/redir/target.html": route_redir_target,
|
||||
"/mini304/index.html": route_mini304_index,
|
||||
"/mini304/page.html": route_mini304_page,
|
||||
}
|
||||
|
||||
# --- /big/ seeded pseudo-site ------------------------------------------
|
||||
|
||||
@@ -211,7 +211,9 @@ main() {
|
||||
# lintian ourselves below as the real gate.
|
||||
local -a debuild_opts=(--no-lintian)
|
||||
local -a build_opts=()
|
||||
[[ $source_only -eq 1 ]] && build_opts+=(-S)
|
||||
# -d: a source build runs no debhelper, so don't require Build-Depends
|
||||
# locally (the buildds and the --sbuild gate enforce them).
|
||||
[[ $source_only -eq 1 ]] && build_opts+=(-S -d)
|
||||
if [[ $unsigned -eq 1 ]]; then
|
||||
build_opts+=(-us -uc)
|
||||
else
|
||||
@@ -234,12 +236,15 @@ main() {
|
||||
|
||||
# The real lintian gate (debuild only reports, it does not fail on tags).
|
||||
# --profile debian: CI runners are Ubuntu, whose vendor data would wrongly
|
||||
# reject the Debian "unstable" distribution. newer-standards-version only
|
||||
# means the local lintian is older than the buildds', not a package
|
||||
# defect, so suppress it. set -e turns any error/warning tag into a failure.
|
||||
# reject the Debian "unstable" distribution. Suppressed tags are stale-local-
|
||||
# lintian skew, not package defects: newer-standards-version, and
|
||||
# recommended-field (old lintian still wants the Priority field the sid
|
||||
# lintian in CI accepts dropping). set -e turns any error/warning tag into
|
||||
# a failure.
|
||||
info "running lintian gate (--fail-on=error,warning)"
|
||||
lintian --profile debian -I -i --fail-on=error,warning \
|
||||
--suppress-tags newer-standards-version "${changes[@]}"
|
||||
--suppress-tags newer-standards-version,recommended-field \
|
||||
"${changes[@]}"
|
||||
|
||||
dcmd cp -- "${changes[@]}" "$outdir/"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user