mirror of
https://github.com/xroche/httrack.git
synced 2026-06-14 22:33:54 +03:00
Compare commits
6 Commits
cleanup/ht
...
test/cache
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a52a2b146c | ||
|
|
226a38d3d0 | ||
|
|
1e463f65a5 | ||
|
|
09ed9968cd | ||
|
|
ad6915e3cc | ||
|
|
4a5580dec0 |
@@ -102,7 +102,8 @@ int cookie_add(t_cookie * cookie, const char *cook_name, const char *cook_value,
|
||||
strcatbuff(cook, "\n");
|
||||
if (!((strlen(cookie->data) + strlen(cook)) < cookie->max_len))
|
||||
return -1; // impossible d'ajouter
|
||||
cookie_insert(insert, cook);
|
||||
cookie_insert(insert, cookie->max_len - (size_t) (insert - cookie->data),
|
||||
cook);
|
||||
#if DEBUG_COOK
|
||||
printf("add_new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",
|
||||
cook_name, cook_value, domain, path);
|
||||
@@ -118,7 +119,7 @@ int cookie_del(t_cookie * cookie, const char *cook_name, const char *domain, con
|
||||
b = cookie_find(cookie->data, cook_name, domain, path);
|
||||
if (b) {
|
||||
a = cookie_nextfield(b);
|
||||
cookie_delete(b, a - b);
|
||||
cookie_delete(b, cookie->max_len - (size_t) (b - cookie->data), a - b);
|
||||
#if DEBUG_COOK
|
||||
printf("deleted old cookie: %s %s %s\n", cook_name, domain, path);
|
||||
#endif
|
||||
@@ -336,41 +337,44 @@ int cookie_save(t_cookie * cookie, const char *name) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// insertion chaine ins avant s
|
||||
void cookie_insert(char *s, const char *ins) {
|
||||
// Insert string ins before s. s_size is the capacity of the buffer at s.
|
||||
void cookie_insert(char *s, size_t s_size, const char *ins) {
|
||||
char *buff;
|
||||
|
||||
if (strnotempty(s) == 0) { // rien à faire, juste concat
|
||||
strcatbuff(s, ins);
|
||||
if (strnotempty(s) == 0) { // nothing there yet: just concatenate
|
||||
strlcatbuff(s, ins, s_size);
|
||||
} else {
|
||||
buff = (char *) malloct(strlen(s) + 1);
|
||||
if (buff) {
|
||||
strcpybuff(buff, s); // copie temporaire
|
||||
strcpybuff(s, ins); // insérer
|
||||
strcatbuff(s, buff); // copier
|
||||
strlcpybuff(buff, s, strlen(s) + 1); // temporary copy of s
|
||||
strlcpybuff(s, ins, s_size); // write ins
|
||||
strlcatbuff(s, buff, s_size); // then the saved content
|
||||
freet(buff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// destruction chaine dans s position pos
|
||||
void cookie_delete(char *s, size_t pos) {
|
||||
// Delete the substring of s at position pos. s_size is the capacity at s.
|
||||
void cookie_delete(char *s, size_t s_size, size_t pos) {
|
||||
char *buff;
|
||||
|
||||
if (strnotempty(s + pos) == 0) { // rien à faire, effacer
|
||||
if (strnotempty(s + pos) == 0) { // nothing after pos: truncate
|
||||
s[0] = '\0';
|
||||
} else {
|
||||
buff = (char *) malloct(strlen(s + pos) + 1);
|
||||
if (buff) {
|
||||
strcpybuff(buff, s + pos); // copie temporaire
|
||||
strcpybuff(s, buff); // copier
|
||||
strlcpybuff(buff, s + pos, strlen(s + pos) + 1); // temporary copy
|
||||
strlcpybuff(s, buff, s_size); // overwrite from start
|
||||
freet(buff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// renvoie champ param de la chaine cookie_base
|
||||
// ex: cookie_get("ceci est<tab>un<tab>exemple",1) renvoi "un"
|
||||
// Return field <param> (0-based, tab-separated) of the cookie line cookie_base,
|
||||
// into buffer. ex: cookie_get("ceci est<tab>un<tab>exemple", 1) returns "un".
|
||||
// buffer must hold at least COOKIE_FIELD_BUFFER_SIZE bytes (all callers use
|
||||
// char[8192]).
|
||||
#define COOKIE_FIELD_BUFFER_SIZE 8192
|
||||
const char *cookie_get(char *buffer, const char *cookie_base, int param) {
|
||||
const char *limit;
|
||||
|
||||
@@ -394,11 +398,11 @@ const char *cookie_get(char *buffer, const char *cookie_base, int param) {
|
||||
if (cookie_base) {
|
||||
if (cookie_base < limit) {
|
||||
const char *a = cookie_base;
|
||||
htsbuff b = htsbuff_ptr(buffer, COOKIE_FIELD_BUFFER_SIZE);
|
||||
|
||||
while((*a) && (*a != '\t') && (*a != '\n'))
|
||||
a++;
|
||||
buffer[0] = '\0';
|
||||
strncatbuff(buffer, cookie_base, (int) (a - cookie_base));
|
||||
htsbuff_catn(&b, cookie_base, (size_t) (a - cookie_base));
|
||||
return buffer;
|
||||
} else
|
||||
return "";
|
||||
@@ -458,11 +462,13 @@ char *bauth_check(t_cookie * cookie, const char *adr, const char *fil) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Build the auth prefix (host + path, query stripped) into prefix.
|
||||
Callers pass a buffer of HTS_URLMAXSIZE * 2 bytes. */
|
||||
char *bauth_prefix(char *prefix, const char *adr, const char *fil) {
|
||||
char *a;
|
||||
|
||||
strcpybuff(prefix, jump_identification_const(adr));
|
||||
strcatbuff(prefix, fil);
|
||||
strlcpybuff(prefix, jump_identification_const(adr), HTS_URLMAXSIZE * 2);
|
||||
strlcatbuff(prefix, fil, HTS_URLMAXSIZE * 2);
|
||||
a = strchr(prefix, '?');
|
||||
if (a)
|
||||
*a = '\0';
|
||||
|
||||
@@ -67,8 +67,8 @@ int cookie_add(t_cookie * cookie, const char *cook_name, const char *cook_valu
|
||||
int cookie_del(t_cookie * cookie, const char *cook_name, const char *domain, const char *path);
|
||||
int cookie_load(t_cookie * cookie, const char *path, const char *name);
|
||||
int cookie_save(t_cookie * cookie, const char *name);
|
||||
void cookie_insert(char *s, const char *ins);
|
||||
void cookie_delete(char *s, size_t pos);
|
||||
void cookie_insert(char *s, size_t s_size, const char *ins);
|
||||
void cookie_delete(char *s, size_t s_size, size_t pos);
|
||||
const char *cookie_get(char *buffer, const char *cookie_base, int param);
|
||||
char *cookie_find(char *s, const char *cook_name, const char *domain, const char *path);
|
||||
char *cookie_nextfield(char *a);
|
||||
|
||||
@@ -196,12 +196,13 @@ struct cache_back_zip_entry {
|
||||
int compressionMethod;
|
||||
};
|
||||
|
||||
#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \
|
||||
if (line[0] != '\0' && strfield2(line, refline)) { \
|
||||
strcpybuff(refvalue, value); \
|
||||
line[0] = '\0'; \
|
||||
} \
|
||||
} while(0)
|
||||
#define ZIP_READFIELD_STRING(line, value, refline, refvalue, refvalue_size) \
|
||||
do { \
|
||||
if (line[0] != '\0' && strfield2(line, refline)) { \
|
||||
strlcpybuff(refvalue, value, refvalue_size); \
|
||||
line[0] = '\0'; \
|
||||
} \
|
||||
} while (0)
|
||||
#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \
|
||||
if (line[0] != '\0' && strfield2(line, refline)) { \
|
||||
int intval = 0; \
|
||||
@@ -643,7 +644,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
} else {
|
||||
r.location = location_default;
|
||||
}
|
||||
strcpybuff(r.location, "");
|
||||
r.location[0] = '\0';
|
||||
strcpybuff(buff, adr);
|
||||
strcatbuff(buff, fil);
|
||||
hash_pos_return = coucal_read(cache->hashtable, buff, &hash_pos);
|
||||
@@ -706,17 +707,25 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
value++;
|
||||
ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache);
|
||||
ZIP_READFIELD_INT(line, value, "X-Statuscode", r.statuscode);
|
||||
ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r.msg); // msg
|
||||
ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r.msg,
|
||||
sizeof(r.msg));
|
||||
ZIP_READFIELD_LLINT(line, value, "X-Size", r.size); // size
|
||||
ZIP_READFIELD_STRING(line, value, "Content-Type", r.contenttype); // contenttype
|
||||
ZIP_READFIELD_STRING(line, value, "X-Charset", r.charset); // contenttype
|
||||
ZIP_READFIELD_STRING(line, value, "Last-Modified", r.lastmodified); // last-modified
|
||||
ZIP_READFIELD_STRING(line, value, "Etag", r.etag); // Etag
|
||||
ZIP_READFIELD_STRING(line, value, "Location", r.location); // 'location' pour moved
|
||||
ZIP_READFIELD_STRING(line, value, "Content-Disposition", r.cdispo); // Content-disposition
|
||||
ZIP_READFIELD_STRING(line, value, "Content-Type", r.contenttype,
|
||||
sizeof(r.contenttype));
|
||||
ZIP_READFIELD_STRING(line, value, "X-Charset", r.charset,
|
||||
sizeof(r.charset));
|
||||
ZIP_READFIELD_STRING(line, value, "Last-Modified", r.lastmodified,
|
||||
sizeof(r.lastmodified));
|
||||
ZIP_READFIELD_STRING(line, value, "Etag", r.etag, sizeof(r.etag));
|
||||
// r.location is a char* pointing into a HTS_URLMAXSIZE*2 buffer
|
||||
ZIP_READFIELD_STRING(line, value, "Location", r.location,
|
||||
HTS_URLMAXSIZE * 2);
|
||||
ZIP_READFIELD_STRING(line, value, "Content-Disposition", r.cdispo,
|
||||
sizeof(r.cdispo));
|
||||
//ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address
|
||||
//ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename
|
||||
ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename
|
||||
ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_,
|
||||
sizeof(previous_save_));
|
||||
}
|
||||
} while(offset < readSizeHeader && !lineEof);
|
||||
//totalHeader = offset;
|
||||
@@ -733,7 +742,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
}
|
||||
}
|
||||
if (return_save != NULL) {
|
||||
strcpybuff(return_save, previous_save);
|
||||
strlcpybuff(return_save, previous_save, HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
|
||||
/* Complete fields */
|
||||
@@ -1025,7 +1034,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
|
||||
} else {
|
||||
r.location = location_default;
|
||||
}
|
||||
strcpybuff(r.location, "");
|
||||
r.location[0] = '\0';
|
||||
#if HTS_FAST_CACHE
|
||||
strcpybuff(buff, adr);
|
||||
strcatbuff(buff, fil);
|
||||
@@ -1111,7 +1120,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
|
||||
previous_save[0] = '\0';
|
||||
cache_rstr(cache->olddat, previous_save); // save
|
||||
if (return_save != NULL) {
|
||||
strcpybuff(return_save, previous_save);
|
||||
strlcpybuff(return_save, previous_save, HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
}
|
||||
if (cache->version >= 5) {
|
||||
@@ -2088,7 +2097,7 @@ char *readfile_or(const char *fil, const char *defaultdata) {
|
||||
char *adr = malloct(strlen(defaultdata) + 1);
|
||||
|
||||
if (adr) {
|
||||
strcpybuff(adr, defaultdata);
|
||||
strlcpybuff(adr, defaultdata, strlen(defaultdata) + 1);
|
||||
return adr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,6 +40,7 @@ Please visit our Website: http://www.httrack.com
|
||||
#include "htscore.h"
|
||||
#include "htsdefines.h"
|
||||
#include "htsalias.h"
|
||||
#include "htsbauth.h"
|
||||
#include "htswrap.h"
|
||||
#include "htsmodules.h"
|
||||
#include "htszlib.h"
|
||||
@@ -138,6 +139,19 @@ static void basic_selftests(void) {
|
||||
fil_normalized(source, buffer);
|
||||
// MD5 selftests
|
||||
md5selftest();
|
||||
// cookie_get field extraction (tab-separated, 0-based)
|
||||
{
|
||||
char cbuf[8192];
|
||||
|
||||
assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 0), "a") == 0);
|
||||
assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 1), "b") == 0);
|
||||
assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 2), "c") == 0);
|
||||
// multi-char fields catch length/boundary bugs that 1-char fields hide
|
||||
assertf(strcmp(cookie_get(cbuf, "host\tx\t/path/to", 0), "host") == 0);
|
||||
assertf(strcmp(cookie_get(cbuf, "host\tx\t/path/to", 2), "/path/to") == 0);
|
||||
assertf(strcmp(cookie_get(cbuf, "a\t\tc", 1), "") == 0); // empty field
|
||||
assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 9), "") == 0); // beyond last
|
||||
}
|
||||
}
|
||||
|
||||
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
|
||||
|
||||
62
tests/02_update-cache.test
Executable file
62
tests/02_update-cache.test
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# Update path: re-mirroring a site reads the cache (cache_readex) to decide what
|
||||
# is up to date -- a path the one-shot crawl tests never exercise. Offline
|
||||
# (file://), so it always runs.
|
||||
#
|
||||
# 1. mirror, then re-mirror unchanged -> the cache-read pass must complete clean
|
||||
# (guards against a crash/abort/error in cache_readex).
|
||||
# 2. change a source file, re-mirror -> the update must pick up the new content
|
||||
# (guards the update decision that reads the cached metadata).
|
||||
|
||||
set -eu
|
||||
|
||||
site=$(mktemp -d)
|
||||
out=$(mktemp -d)
|
||||
trap 'rm -rf "$site" "$out"' EXIT
|
||||
|
||||
cat >"$site/index.html" <<EOF
|
||||
<a href="a.html">a</a> <a href="sub/b.html">b</a>
|
||||
EOF
|
||||
echo 'OLDCONTENT' >"$site/a.html"
|
||||
mkdir -p "$site/sub"
|
||||
echo '<p>bbb</p>' >"$site/sub/b.html"
|
||||
|
||||
url="file://$site/index.html"
|
||||
|
||||
# count Error: lines in the log (grep -c exits 1 on zero matches: guard it)
|
||||
errors() { grep -ciE '^[0-9:]*[[:space:]]Error:' "$out/hts-log.txt" || true; }
|
||||
|
||||
# 1. fresh mirror writes the cache
|
||||
httrack "$url" -O "$out" -q -%v0 -r3 >/dev/null 2>&1
|
||||
test -e "$out/hts-cache/new.zip" || {
|
||||
echo "no cache was written" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 2. re-mirror unchanged: the update reads the cache and must complete cleanly
|
||||
httrack "$url" -O "$out" -q -%v0 -r3 >/dev/null 2>&1
|
||||
test "$(errors)" = 0 || {
|
||||
echo "update (unchanged) reported errors" >&2
|
||||
exit 1
|
||||
}
|
||||
for suffix in a.html sub/b.html; do
|
||||
find "$out" -path "*/$suffix" | grep -q . || {
|
||||
echo "missing $suffix after update" >&2
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
|
||||
# 3. change a source file: the update must pick up the new content
|
||||
sleep 1
|
||||
echo 'NEWCONTENT' >"$site/a.html"
|
||||
httrack "$url" -O "$out" -q -%v0 -r3 >/dev/null 2>&1
|
||||
test "$(errors)" = 0 || {
|
||||
echo "update (changed) reported errors" >&2
|
||||
exit 1
|
||||
}
|
||||
grep -q NEWCONTENT "$(find "$out" -path '*/a.html')" || {
|
||||
echo "update did not pick up the changed source" >&2
|
||||
exit 1
|
||||
}
|
||||
@@ -22,6 +22,7 @@ TESTS = \
|
||||
01_engine-simplify.test \
|
||||
01_engine-strsafe.test \
|
||||
02_manpage-regen.test \
|
||||
02_update-cache.test \
|
||||
10_crawl-simple.test \
|
||||
11_crawl-cookies.test \
|
||||
11_crawl-idna.test \
|
||||
|
||||
Reference in New Issue
Block a user