Compare commits

..

3 Commits

Author SHA1 Message Date
Xavier Roche
0bea390973 Add HTTRACK_DEBUG_RESOLVE and a deterministic connect-fallback test
Exercising the connect fallback needs a host that resolves to a dead address
first and a live one next, deterministically and offline. A true SYN
black-hole can't be simulated without root, but a refused address can.

HTTRACK_DEBUG_RESOLVE="host:ip[,ip...]" pins a host's resolution to a fixed
address list (curl --resolve style), reusing the PR1 resolver seam: an
addrinfo backend that synthesizes the listed addresses for the named host and
delegates other hosts to libc (copying into its own allocations so one
freeaddrinfo frees both). It is a debug/test hook, inert unless the env var is
set, and IPv6-build-only like the rest of the resolver list.

The new local crawl test binds the server to 127.0.0.1 and resolves a host to
127.0.0.2 (refused) then 127.0.0.1: the mirror only succeeds via the fallback.
A V6_SUPPORT substitution (mirroring HTTPS_SUPPORT) lets it skip on non-IPv6
builds.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-22 20:43:51 +02:00
Xavier Roche
67af1c2f0b Fall back to the next address when a connect fails or stalls
A slot connected to a single resolved address and waited the full slot
timeout (default 120s) if that address was dead -- a blackholed IPv6 on a
dual-stack host would stall the whole mirror. With the cache now holding the
full address list, retry the next address instead of failing.

In back_wait, a connecting slot probes the resolved address count once, then:
on a refused/failed connect (a new SO_ERROR check at connect completion, since
a failed non-blocking connect is reported writable too) it falls back
immediately; on a stalled connect it falls back after a short per-candidate
deadline (min(timeout, 10s)) rather than the full timeout. The last candidate
keeps the full timeout, so single-address hosts are unchanged. Per-slot state
(current index, count, connect start) lives in struct_back, parallel to the
slot array -- no htsblk/lien_back layout change, so the ABI is untouched.

back_connect_fallback_due() (the deadline decision) and newhttp_addr()'s
address selection are unit-tested through the DNS mock.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-22 20:37:28 +02:00
Xavier Roche
542d6a56b5 Resolve hosts to multiple addresses and cache the full list
The DNS cache kept a single address per host and the resolver copied only
the head of getaddrinfo's result, discarding the rest. That leaves no
fallback when the chosen address is unreachable (e.g. a blackholed IPv6 on a
dual-stack host) -- the root of the "stuck on connect" stalls.

Widen t_dnscache to hold up to HTS_MAXADDRNUM addresses in resolver order and
walk ai_next when resolving. New hts_dns_resolve_all() exposes the list;
hts_dns_resolve2() still returns the first address, so existing callers are
unchanged. newhttp_addr() connects to a chosen candidate index (newhttp() is
the index-0 wrapper), for the connect-fallback path added next.

No ABI change: t_dnscache is engine-internal (httrackp holds only a pointer;
no plugin reads its fields) and the htsblk/lien_back layout is untouched.

The DNS self-test now covers the list path: count, resolver order, the
family filter, and clamping past HTS_MAXADDRNUM.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-22 19:19:48 +02:00
3 changed files with 67 additions and 47 deletions

View File

@@ -4766,51 +4766,64 @@ int hts_read(htsblk * r, char *buff, int size) {
// -- Gestion cache DNS --
// 'RX98
// Free a DNS cache record (coucal value handler).
static void hts_cache_value_free(coucal_opaque arg, coucal_value value) {
void *record = value.ptr;
(void) arg;
freet(record);
}
// opt's DNS cache hashtable, created on first use. Records (t_dnscache*) are
// owned by the table and freed by hts_cache_value_free on coucal_delete.
coucal hts_cache(httrackp *opt) {
// 'capsule' contenant uniquement le cache
t_dnscache *hts_cache(httrackp * opt) {
assertf(opt != NULL);
if (opt->state.dns_cache == NULL) {
coucal cache = coucal_new(0);
coucal_set_name(cache, "dns_cache");
coucal_value_set_value_handler(cache, hts_cache_value_free, NULL);
opt->state.dns_cache = cache;
opt->state.dns_cache = (t_dnscache *) malloct(sizeof(t_dnscache));
memset(opt->state.dns_cache, 0, sizeof(t_dnscache));
}
assertf(opt->state.dns_cache != NULL);
/* first entry is NULL */
assertf(opt->state.dns_cache->iadr == NULL);
return opt->state.dns_cache;
}
// MUST BE LOCKED (coucal is not internally serialized vs FTP/web threads)
// Free DNS cache.
void hts_cache_free(t_dnscache *const root) {
if (root != NULL) {
t_dnscache *cache;
for(cache = root; cache != NULL; ) {
t_dnscache *const next = cache->next;
cache->next = NULL;
freet(cache);
cache = next;
}
}
}
// lock le cache dns pour tout opération d'ajout
// plus prudent quand plusieurs threads peuvent écrire dedans..
// -1: status? 0: libérer 1:locker
// MUST BE LOCKED
// Look up iadr in the DNS cache, filling out[0..min(count,max)-1].
// Returns: -1 not yet tested; 0 negative-cached (not in DNS); >0 address count.
static int hts_ghbn_all(coucal cache, const char *const iadr,
static int hts_ghbn_all(const t_dnscache *cache, const char *const iadr,
SOCaddr *const out, const int max) {
void *ptr;
assertf(out != NULL);
assertf(iadr != NULL);
if (*iadr == '\0') {
return -1;
}
if (coucal_read_pvoid(cache, iadr, &ptr)) { // ok trouvé
const t_dnscache *const record = (const t_dnscache *) ptr;
int i;
/* first entry is empty */
if (cache->iadr == NULL) {
cache = cache->next;
}
for(; cache != NULL; cache = cache->next) {
assertf(cache != NULL);
assertf(cache->iadr != NULL);
assertf(cache->iadr == (const char*) cache + sizeof(t_dnscache));
if (strcmp(cache->iadr, iadr) == 0) { // ok trouvé
int i;
assertf(record->host_count <= HTS_MAXADDRNUM);
for (i = 0; i < record->host_count && i < max; i++) {
assertf(record->host_length[i] <= sizeof(record->host_addr[i]));
SOCaddr_copyaddr2(out[i], record->host_addr[i], record->host_length[i]);
assertf(cache->host_count <= HTS_MAXADDRNUM);
for (i = 0; i < cache->host_count && i < max; i++) {
assertf(cache->host_length[i] <= sizeof(cache->host_addr[i]));
SOCaddr_copyaddr2(out[i], cache->host_addr[i], cache->host_length[i]);
}
return cache->host_count;
}
return record->host_count;
}
return -1;
}
@@ -5075,7 +5088,7 @@ static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
SOCaddr *const out, const int max,
const char **error) {
char BIGSTK iadr[HTS_URLMAXSIZE * 2];
coucal cache = hts_cache(opt); // le cache dns
t_dnscache *cache = hts_cache(opt); // adresse du cache
int count;
assertf(opt != NULL);
@@ -5096,10 +5109,13 @@ static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
if (count >= 0) { // cache hit (0 == negative-cached)
return count;
} else { // non présent dans le cache dns, tester
const size_t iadr_len = strlen(iadr) + 1;
SOCaddr resolved[HTS_MAXADDRNUM];
t_dnscache *record;
int i;
// find queue
for(; cache->next != NULL; cache = cache->next) ;
#if DEBUGDNS
printf("resolving (not cached) %s\n", iadr);
#endif
@@ -5110,18 +5126,22 @@ static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
DEBUG_W("gethostbyname done\n");
#endif
/* attempt to store new entry (coucal owns it and dups the host key) */
record = malloct(sizeof(t_dnscache));
if (record != NULL) {
memset(record, 0, sizeof(*record));
record->host_count = count;
/* attempt to store new entry */
cache->next = malloct(sizeof(t_dnscache) + iadr_len);
if (cache->next != NULL) {
t_dnscache *const next = cache->next;
char *const block = (char*) cache->next;
char *const str = block + sizeof(t_dnscache);
memcpy(str, iadr, iadr_len);
next->iadr = str;
next->host_count = count;
for (i = 0; i < count; i++) {
record->host_length[i] = SOCaddr_size(resolved[i]);
assertf(record->host_length[i] <= sizeof(record->host_addr[i]));
memcpy(record->host_addr[i], &SOCaddr_sockaddr(resolved[i]),
record->host_length[i]);
next->host_length[i] = SOCaddr_size(resolved[i]);
assertf(next->host_length[i] <= sizeof(next->host_addr[i]));
memcpy(next->host_addr[i], &SOCaddr_sockaddr(resolved[i]),
next->host_length[i]);
}
coucal_add_pvoid(cache, iadr, record);
next->next = NULL;
}
/* copy result to caller (cache store may have failed; result still valid)
@@ -5992,14 +6012,14 @@ HTSEXT_API void hts_free_opt(httrackp * opt) {
/* Cache */
if (opt->state.dns_cache != NULL) {
coucal root;
t_dnscache *root;
hts_mutexlock(&opt->state.lock);
root = opt->state.dns_cache;
opt->state.dns_cache = NULL;
hts_mutexrelease(&opt->state.lock);
coucal_delete(&root); // frees records via hts_cache_value_free
hts_cache_free(root);
}
/* Cancel chain */

View File

@@ -147,8 +147,9 @@ struct OLD_htsblk {
#define HTS_DEF_FWSTRUCT_t_dnscache
typedef struct t_dnscache t_dnscache;
#endif
// One DNS cache record, stored as a coucal value keyed by hostname.
struct t_dnscache {
struct t_dnscache *next;
const char *iadr;
// resolved addresses, in resolver (RFC 6724) order; host_count==0 means the
// name does not resolve (negative cache). host_count<=HTS_MAXADDRNUM.
int host_count;
@@ -244,9 +245,8 @@ HTSEXT_API int check_hostname_dns(const char *const hostname);
int ftp_available(void);
#if HTS_DNSCACHE
/* Return opt's DNS cache hashtable (hostname -> t_dnscache record), creating it
on first use. Records are owned by the table and freed on coucal_delete. */
coucal hts_cache(httrackp *opt);
void hts_cache_free(t_dnscache *const cache);
t_dnscache *hts_cache(httrackp * opt);
#endif
// outils divers

View File

@@ -241,7 +241,7 @@ struct htsoptstate {
char *userhttptype;
int verif_backblue_done; /**< backblue.gif/fade.gif already emitted */
int verif_external_status;
coucal dns_cache; /**< DNS resolution cache: hostname -> t_dnscache record */
t_dnscache *dns_cache; /**< DNS resolution cache */
int dns_cache_nthreads; /**< number of in-flight DNS resolver threads */
/* HTML parsing state */
char _hts_errmsg[HTS_CDLMAXSIZE + 256]; /**< last engine error message */