Compare commits

..

1 Commits

Author SHA1 Message Date
Xavier Roche
1ba5ed461d Silence coucal hashtable stats on the default log handler
coucal_delete() logs a per-table stats summary at info level. For tables
without their own handler (webhttrack's NewLangStr/NewLangStrKeys), these
went through default_coucal_loghandler, which printed every level, so
plain `webhttrack` startup dumped two "hashtable ... summary:" lines to
the console.

Drop info-and-below messages there unless debugging is on (hts_dgb_init,
i.e. HTS_LOG / hts_debug); warnings and critical errors still always
print.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-22 09:00:35 +02:00
14 changed files with 125 additions and 1053 deletions

View File

@@ -215,12 +215,9 @@ AC_SUBST(OPENSSL_LIBS)
fi
### Support IPv6
V6_SUPPORT=no
AC_CHECK_LIB(c, getaddrinfo, [V6_FLAG="-DINET6"
V6_SUPPORT=yes
AC_DEFINE(HTS_INET6, 1, [Check for IPv6])], AC_MSG_WARN([*** IPv6 not found IPv6 compatibility disabled]))
AC_SUBST(V6_FLAG)
AC_SUBST(V6_SUPPORT)
### Check for LFS
AC_CHECK_LIB(c, fopen64, [LFS_FLAG="-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE"

View File

@@ -56,7 +56,7 @@ whttrackrundir = $(bindir)
whttrackrun_SCRIPTS = webhttrack
libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
htscache_selftest.c htsdns_selftest.c \
htscache_selftest.c \
htscatchurl.c htsfilters.c htsftp.c htshash.c coucal/coucal.c \
htshelp.c htslib.c htscoremain.c \
htsname.c htsrobots.c htstools.c htswizard.c \
@@ -66,7 +66,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
md5.c \
minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htsdns_selftest.h htscatchurl.h \
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htscatchurl.h \
htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
htsfilters.h htsftp.h htsglobal.h htshash.h coucal/coucal.h \
htshelp.h htsindex.h htslib.h htsmd5.h \

View File

@@ -73,8 +73,6 @@ struct_back *back_new(httrackp *opt, int back_max) {
sback->count = back_max;
sback->lnk = (lien_back *) calloct((back_max + 1), sizeof(lien_back));
sback->connect_fallback = (hts_connect_fallback *) calloct(
(back_max + 1), sizeof(hts_connect_fallback));
sback->ready = coucal_new(0);
hts_set_hash_handler(sback->ready, opt);
coucal_set_name(sback->ready, "back_new");
@@ -85,7 +83,6 @@ struct_back *back_new(httrackp *opt, int back_max) {
sback->lnk[i].r.location = sback->lnk[i].location_buffer;
sback->lnk[i].status = STATUS_FREE;
sback->lnk[i].r.soc = INVALID_SOCKET;
sback->connect_fallback[i].addr_count = -1; // not yet probed
}
return sback;
}
@@ -96,7 +93,6 @@ void back_free(struct_back ** sback) {
freet((*sback)->lnk);
(*sback)->lnk = NULL;
}
freet((*sback)->connect_fallback);
if ((*sback)->ready != NULL) {
coucal_delete(&(*sback)->ready);
(*sback)->ready_size_bytes = 0;
@@ -106,72 +102,6 @@ void back_free(struct_back ** sback) {
}
}
/* Per-candidate connect deadline cap (seconds): a connecting slot with another
address to try waits at most this long before falling back, instead of the
full (default 120s) slot timeout. Caps the dead-IPv6 stall while staying well
above a normal handshake. The last candidate still gets the full timeout. */
#define HTS_CONNECT_FALLBACK_TIMEOUT 10
int back_connect_fallback_due(int addr_index, int addr_count, int elapsed,
int timeout) {
int deadline;
if (addr_index + 1 >= addr_count) // last (or only) candidate: no fallback
return 0;
if (timeout <= 0) // no timeout management: never force it
return 0;
deadline = (timeout < HTS_CONNECT_FALLBACK_TIMEOUT)
? timeout
: HTS_CONNECT_FALLBACK_TIMEOUT;
return elapsed >= deadline;
}
/* Pending-connect result for a non-blocking socket reported ready by select():
0 = connected, >0 = the connect errno (refused, unreachable, ...), -1 if the
probe itself failed. A failed connect is reported writable too, so this is
how success is told from failure without blocking. */
static int connect_socket_error(T_SOC soc) {
int soerr = 0;
socklen_t len = (socklen_t) sizeof(soerr);
if (getsockopt(soc, SOL_SOCKET, SO_ERROR, (char *) &soerr, &len) != 0)
return -1;
return soerr;
}
/* Retry a stuck/failed connecting slot against its next resolved address.
Closes the current socket and starts a non-blocking connect to the next
candidate, leaving the slot in STATUS_CONNECTING. Returns 1 if a new connect
was started, 0 if no fallback address remains (caller fails the slot). */
static int back_connect_next(httrackp *opt, struct_back *sback, int i) {
hts_connect_fallback *const cf = &sback->connect_fallback[i];
lien_back *const back = sback->lnk;
const int next = cf->addr_index + 1;
T_SOC soc;
if (next >= cf->addr_count)
return 0;
if (back[i].r.soc != INVALID_SOCKET) {
deletehttp(&back[i].r);
back[i].r.soc = INVALID_SOCKET;
}
soc = newhttp_addr(opt, back[i].url_adr, &back[i].r, -1, 0, next, NULL);
if (soc == INVALID_SOCKET)
return 0;
back[i].r.soc = soc;
cf->addr_index = next;
cf->connect_start = time_local();
if (back[i].timeout > 0)
back[i].timeout_refresh = cf->connect_start;
back[i].status = STATUS_CONNECTING;
hts_log_print(opt, LOG_DEBUG,
"connect failed, trying next address (%d/%d) for %s", next + 1,
cf->addr_count, back[i].url_adr);
return 1;
}
void back_delete_all(httrackp * opt, cache_back * cache, struct_back * sback) {
if (sback != NULL) {
int i;
@@ -1981,11 +1911,8 @@ int back_add(struct_back * sback, httrackp * opt, cache_back * cache, const char
// ouvrir liaison, envoyer requète
// ne pas traiter ou recevoir l'en tête immédiatement
hts_init_htsblk(&back[p].r);
// memset(&(back[p].r), 0, sizeof(htsblk));
//memset(&(back[p].r), 0, sizeof(htsblk));
back[p].r.location = back[p].location_buffer;
// fresh connect: address list not yet probed, start at the first
sback->connect_fallback[p].addr_index = 0;
sback->connect_fallback[p].addr_count = -1;
// recopier proxy
if ((back[p].r.req.proxy.active = opt->proxy.active)) {
if (StringBuff(opt->proxy.bindhost) != NULL)
@@ -2442,25 +2369,21 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
// en cas de gestion du connect préemptif
#if HTS_XCONN
if (back[i].status == STATUS_CONNECTING) { // connexion
// a connecting slot always carries a live socket; guard anyway so a
// stray INVALID_SOCKET can never reach FD_SET (mirrors the recv branch)
if (back[i].r.soc != INVALID_SOCKET) {
do_wait = 1;
do_wait = 1;
// noter socket write
FD_SET(back[i].r.soc, &fds_c);
// noter socket write
FD_SET(back[i].r.soc, &fds_c);
// noter socket erreur
FD_SET(back[i].r.soc, &fds_e);
// noter socket erreur
FD_SET(back[i].r.soc, &fds_e);
// calculer max
if (max_c) {
max_c = 0;
nfds = back[i].r.soc;
} else if (back[i].r.soc > nfds) {
// ID socket la plus élevée
nfds = back[i].r.soc;
}
// calculer max
if (max_c) {
max_c = 0;
nfds = back[i].r.soc;
} else if (back[i].r.soc > nfds) {
// ID socket la plus élevée
nfds = back[i].r.soc;
}
} else
@@ -2594,20 +2517,8 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
}
// ---- FLAG WRITE MIS A UN?: POUR LE CONNECT
if (back[i].status == STATUS_CONNECTING) { // attendre connect
hts_connect_fallback *const cf = &sback->connect_fallback[i];
int dispo = 0;
// probe the resolved address list once per fresh connect (cache hit:
// the host was resolved when this connect was opened)
if (cf->addr_count < 0 && back[i].r.soc != INVALID_SOCKET &&
!back[i].r.is_file) {
SOCaddr scratch[HTS_MAXADDRNUM];
cf->addr_count = hts_dns_resolve_all(opt, back[i].url_adr, scratch,
HTS_MAXADDRNUM, NULL);
cf->connect_start = time_local();
}
// vérifier l'existance de timeout-check
if (!gestion_timeout)
if (back[i].timeout > 0)
@@ -2615,20 +2526,7 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
// connecté?
dispo = FD_ISSET(back[i].r.soc, &fds_c);
if (dispo) { // socket ready: connect() finished (ok or failed)
// a refused/failed connect is reported writable too; probe SO_ERROR
// and, on failure, fall back to the next address (or fail the slot)
if (connect_socket_error(back[i].r.soc) != 0) {
if (!back_connect_next(opt, sback, i)) {
deletehttp(&back[i].r);
back[i].r.soc = INVALID_SOCKET;
back[i].r.statuscode = STATUSCODE_CONNERROR;
strcpybuff(back[i].r.msg, "Connect Error");
back[i].status = STATUS_READY;
back_set_finished(sback, i);
}
continue; // reconnected (stay connecting) or failed
}
if (dispo) { // ok connected!!
busy_state = 1;
#if HTS_USEOPENSSL
@@ -3986,29 +3884,6 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
if (back[i].status > 0) { // réception/connexion/..
if (back[i].timeout > 0) {
// a stuck connect with a fallback address: retry the next one well
// before the full timeout (dead IPv6 on a dual-stack host, ...)
if (back[i].status == STATUS_CONNECTING) {
const hts_connect_fallback *const cf =
&sback->connect_fallback[i];
if (back_connect_fallback_due(cf->addr_index, cf->addr_count,
(int) (act - cf->connect_start),
back[i].timeout)) {
if (back_connect_next(opt, sback, i)) {
continue; // reconnected to the next candidate
}
// fallback was due but no socket could be opened
// (back_connect_next closed the dead one): stop now rather than
// spin on an invalid fd
back[i].r.soc = INVALID_SOCKET;
back[i].r.statuscode = STATUSCODE_CONNERROR;
strcpybuff(back[i].r.msg, "Connect Error");
back[i].status = STATUS_READY;
back_set_finished(sback, i);
continue;
}
}
//printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout);
if (((int) (act - back[i].timeout_refresh)) >= back[i].timeout) {
hts_log_print(opt, LOG_DEBUG, "connection timed out for %s%s", back[i].url_adr,

View File

@@ -152,15 +152,6 @@ struct lien_adrfilsave {
char save[HTS_URLMAXSIZE * 2]; /**< local save path (with directory) */
};
/** Per-slot connect-fallback bookkeeping (parallel to struct_back.lnk).
Tracks which resolved address the slot is currently connecting to so a
stuck connect can be retried against the next one. */
typedef struct hts_connect_fallback {
int addr_index; /**< candidate being connected (0-based) */
int addr_count; /**< resolved addresses; -1 = not yet probed */
TStamp connect_start; /**< when the current candidate's connect began */
} hts_connect_fallback;
/** The download-slot ring: the set of concurrent transfers in flight.
Allocated/owned by the engine; consumers (status callbacks, the loop)
read it but do not resize or free it. */
@@ -177,7 +168,6 @@ struct struct_back {
int count; /**< number of usable slots (back_max) */
coucal ready; /**< index of slots whose transfer completed */
LLint ready_size_bytes; /**< total bytes buffered in completed slots */
hts_connect_fallback *connect_fallback; /**< per-slot, count+1 entries */
};
typedef struct cache_back_zip_entry cache_back_zip_entry;
@@ -382,13 +372,6 @@ void check_rate(TStamp stat_timestart, int maxrate);
/* Backing (download-slot) scheduler. Operate on the back[] ring (struct_back).
Not thread-safe; call from the single crawl loop. */
/* True if a connecting slot should give up on the current address and try the
next one: a fallback address remains (addr_index+1 < addr_count) and the
candidate has been connecting for at least its deadline, min(timeout, an
internal cap). elapsed/timeout in seconds. Exposed for the -#D self-test. */
int back_connect_fallback_due(int addr_index, int addr_count, int elapsed,
int timeout);
/* How many new sockets may be opened now, honoring maxsoc and the maxconn rate
limit (>=0). _strict ignores reserved-slot headroom; the plain form leaves
room for naming tests and stops at 0 when the stack is nearly full. */

View File

@@ -47,7 +47,6 @@ Please visit our Website: http://www.httrack.com
#include "htscharset.h"
#include "htsencoding.h"
#include "htscache_selftest.h"
#include "htsdns_selftest.h"
#include "htsmd5.h"
#include <ctype.h>
@@ -2461,13 +2460,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
return 1;
}
break;
case 'D': { // DNS resolver/cache self-test (mock getaddrinfo)
const int err = dns_selftests(opt);
printf("dns-selftest: %s\n", err ? "FAIL" : "OK");
htsmain_free();
return err;
} break;
case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
{
int hasFilter = 0;

View File

@@ -1,359 +0,0 @@
/* ------------------------------------------------------------ */
/*
HTTrack Website Copier, Offline Browser for Windows and Unix
Copyright (C) 2026 Xavier Roche and other contributors
SPDX-License-Identifier: GPL-3.0-or-later
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Ethical use: we kindly ask that you NOT use this software to harvest email
addresses or to collect any other private information about people. Doing so
would dishonor our work and waste the many hours we have spent on it.
Please visit our Website: http://www.httrack.com
*/
/* ------------------------------------------------------------ */
/* File: htsdns_selftest.c subroutines: */
/* in-process self-test for the DNS resolver and cache */
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
/* Routes the resolver through a scripted getaddrinfo (hts_resolver_backend)
instead of the network, so resolution and the DNS cache are testable for a
fixed set of scenarios (IPv4/IPv6/dual-stack, errors, family filter,
cache reuse) with no live DNS. */
#define HTS_INTERNAL_BYTECODE
#include "htsdns_selftest.h"
#include "htscore.h"
#include "htslib.h"
#include "htsnet.h"
#include <stdio.h>
#include <string.h>
#if HTS_INET6 != 0
/* IPV6_resolver: 0 = v4+v6, 1 = v4 only, 2 = v6 only (htscoremain -@i). */
extern int IPV6_resolver;
/* One scripted host: either a getaddrinfo error, or an ordered address list. */
typedef struct mock_addr {
int family; /* AF_INET / AF_INET6 */
unsigned char addr[16]; /* 4 (v4) or 16 (v6) meaningful bytes */
} mock_addr;
typedef struct mock_host {
const char *name;
int gai_err; /* non-zero: getaddrinfo returns this */
int naddr;
mock_addr addr[6];
int calls; /* times the backend resolved this host */
} mock_host;
static mock_host mock_hosts[] = {
{"v4only.test", 0, 1, {{AF_INET, {1, 2, 3, 4}}}, 0},
{"v6only.test", 0, 1, {{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 1}}}, 0},
/* dual stack, IPv6 first (RFC 6724 order) then IPv4 */
{"dual.test",
0,
2,
{{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 2}}, {AF_INET, {5, 6, 7, 8}}},
0},
/* dual stack, IPv4 first: distinguishes "keep the first address" from
"prefer a family", so the selection contract is actually pinned. */
{"dual4.test",
0,
2,
{{AF_INET, {9, 10, 11, 12}},
{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 3}}},
0},
/* more addresses than HTS_MAXADDRNUM: the list must clamp to the cap. */
{"many.test",
0,
6,
{{AF_INET, {10, 0, 0, 1}},
{AF_INET, {10, 0, 0, 2}},
{AF_INET, {10, 0, 0, 3}},
{AF_INET, {10, 0, 0, 4}},
{AF_INET, {10, 0, 0, 5}},
{AF_INET, {10, 0, 0, 6}}},
0},
{"nodns.test", EAI_NONAME, 0, {{0}}, 0},
};
static mock_host *mock_find(const char *name) {
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++) {
if (strcmp(mock_hosts[i].name, name) == 0)
return &mock_hosts[i];
}
return NULL;
}
static void mock_reset_calls(void) {
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++)
mock_hosts[i].calls = 0;
}
/* Build one addrinfo node owning its sockaddr (freed by mock_freeaddrinfo). */
static struct addrinfo *mock_mkai(const mock_addr *a) {
struct addrinfo *ai = calloct(1, sizeof(*ai));
ai->ai_family = a->family;
if (a->family == AF_INET) {
struct sockaddr_in *sin = calloct(1, sizeof(*sin));
sin->sin_family = AF_INET;
memcpy(&sin->sin_addr, a->addr, 4);
ai->ai_addr = (struct sockaddr *) sin;
ai->ai_addrlen = sizeof(*sin);
} else {
struct sockaddr_in6 *sin6 = calloct(1, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
memcpy(&sin6->sin6_addr, a->addr, 16);
ai->ai_addr = (struct sockaddr *) sin6;
ai->ai_addrlen = sizeof(*sin6);
}
return ai;
}
static int mock_getaddrinfo(const char *node, const char *service,
const struct addrinfo *hints,
struct addrinfo **res) {
mock_host *const h = mock_find(node);
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
struct addrinfo *head = NULL, *tail = NULL;
(void) service;
*res = NULL;
if (h == NULL)
return EAI_NONAME;
h->calls++; /* a real backend hit; a cached host skips this */
if (h->gai_err != 0)
return h->gai_err;
for (int i = 0; i < h->naddr; i++) {
if (want != PF_UNSPEC && want != h->addr[i].family)
continue; /* honor the requested family (v4/v6 only) */
struct addrinfo *const ai = mock_mkai(&h->addr[i]);
if (head == NULL)
head = ai;
else
tail->ai_next = ai;
tail = ai;
}
if (head == NULL)
return EAI_NONAME; /* filtered to empty, as the libc resolver does */
*res = head;
return 0;
}
static void mock_freeaddrinfo(struct addrinfo *res) {
while (res != NULL) {
struct addrinfo *const next = res->ai_next;
freet(res->ai_addr);
freet(res);
res = next;
}
}
static const hts_resolver_backend mock_backend = {mock_getaddrinfo,
mock_freeaddrinfo};
static int failures = 0;
#define CHECK(cond) \
do { \
if (!(cond)) { \
failures++; \
fprintf(stderr, "dns-selftest: FAIL at %s:%d: %s\n", __FILE__, __LINE__, \
#cond); \
} \
} while (0)
/* Resolve via the uncached entry point; return the address family, or
AF_UNSPEC if the host did not resolve. */
static int resolve_family_nocache(const char *host) {
SOCaddr addr;
const char *err = NULL;
if (hts_dns_resolve_nocache2(host, &addr, &err) == NULL)
return AF_UNSPEC;
return SOCaddr_sinfamily(addr);
}
int dns_selftests(httrackp *opt) {
failures = 0;
hts_dns_set_resolver_backend(&mock_backend);
/* IPv4-only / IPv6-only hosts map to the right family. */
IPV6_resolver = 0;
CHECK(resolve_family_nocache("v4only.test") == AF_INET);
CHECK(resolve_family_nocache("v6only.test") == AF_INET6);
/* Dual-stack: the single-address API returns the *first* resolved address.
Both orderings pin selection by position, not a family preference. The
multi-address API (resolve_all, below) exposes the whole list. */
CHECK(resolve_family_nocache("dual.test") == AF_INET6); /* v6 listed first */
CHECK(resolve_family_nocache("dual4.test") == AF_INET); /* v4 listed first */
/* Unknown host does not resolve. */
CHECK(resolve_family_nocache("nodns.test") == AF_UNSPEC);
/* Family filter (-@i4 / -@i6) selects v4 / v6 out of the dual-stack host. */
IPV6_resolver = 1;
CHECK(resolve_family_nocache("dual.test") == AF_INET);
IPV6_resolver = 2;
CHECK(resolve_family_nocache("dual.test") == AF_INET6);
IPV6_resolver = 0;
/* Cached driver resolves a host once and reuses the *same* address. */
mock_reset_calls();
{
SOCaddr a1, a2;
char ip1[64], ip2[64];
const char *err = NULL;
CHECK(hts_dns_resolve2(opt, "v4only.test", &a1, &err) != NULL);
CHECK(hts_dns_resolve2(opt, "v4only.test", &a2, &err) != NULL);
CHECK(mock_find("v4only.test")->calls == 1);
/* the cache returns the right address, not merely a hit for the key */
SOCaddr_inetntoa(ip1, sizeof(ip1), a1);
SOCaddr_inetntoa(ip2, sizeof(ip2), a2);
CHECK(strcmp(ip1, "1.2.3.4") == 0);
CHECK(strcmp(ip1, ip2) == 0);
}
/* A negative result is cached too: a second lookup does not re-resolve. */
{
SOCaddr a1, a2;
const char *err = NULL;
CHECK(hts_dns_resolve2(opt, "nodns.test", &a1, &err) == NULL);
CHECK(hts_dns_resolve2(opt, "nodns.test", &a2, &err) == NULL);
CHECK(mock_find("nodns.test")->calls == 1); /* resolved once, then cached */
}
/* Multi-address resolution: count and order are the connect-fallback
contract. A dead first address is retried against the next, so both must be
exact. */
mock_reset_calls();
{
SOCaddr addrs[HTS_MAXADDRNUM];
char ip[64];
const char *err = NULL;
/* dual-stack, in resolver order: [0]=v6, [1]=v4 */
CHECK(hts_dns_resolve_all(opt, "dual.test", addrs, HTS_MAXADDRNUM, &err) ==
2);
CHECK(SOCaddr_sinfamily(addrs[0]) == AF_INET6);
CHECK(SOCaddr_sinfamily(addrs[1]) == AF_INET);
SOCaddr_inetntoa(ip, sizeof(ip), addrs[1]);
CHECK(strcmp(ip, "5.6.7.8") == 0);
CHECK(mock_find("dual.test")->calls ==
1); /* one backend hit for the list */
/* single-address host: count 1 */
CHECK(hts_dns_resolve_all(opt, "v4only.test", addrs, HTS_MAXADDRNUM,
&err) == 1);
SOCaddr_inetntoa(ip, sizeof(ip), addrs[0]);
CHECK(strcmp(ip, "1.2.3.4") == 0);
/* does-not-resolve: count 0 (negative), no addresses */
CHECK(hts_dns_resolve_all(opt, "nodns.test", addrs, HTS_MAXADDRNUM, &err) ==
0);
/* more than the cap: the kept list is clamped to HTS_MAXADDRNUM, keeping
the FIRST addresses in resolver order (not some other window) */
CHECK(hts_dns_resolve_all(opt, "many.test", addrs, HTS_MAXADDRNUM, &err) ==
HTS_MAXADDRNUM);
SOCaddr_inetntoa(ip, sizeof(ip), addrs[0]);
CHECK(strcmp(ip, "10.0.0.1") == 0);
SOCaddr_inetntoa(ip, sizeof(ip), addrs[HTS_MAXADDRNUM - 1]);
CHECK(strcmp(ip, "10.0.0.4") == 0);
/* family filter still applies through the list path */
IPV6_resolver = 1;
CHECK(hts_dns_resolve_all(opt, "dual4.test", addrs, HTS_MAXADDRNUM, &err) ==
1);
CHECK(SOCaddr_sinfamily(addrs[0]) == AF_INET);
IPV6_resolver = 0;
}
/* newhttp_addr() must connect to the addr_index-th address, not always the
first: this is what back_connect_next relies on to reach the fallback. */
{
htsblk r;
int count = -1;
T_SOC s;
hts_init_htsblk(&r);
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 0, &count);
CHECK(count == 2);
CHECK(SOCaddr_sinfamily(r.address) == AF_INET6); /* index 0 = v6 */
if (s != INVALID_SOCKET)
deletesoc(s);
hts_init_htsblk(&r);
count = -1;
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 1, &count);
CHECK(count == 2);
CHECK(SOCaddr_sinfamily(r.address) == AF_INET); /* index 1 = v4 */
if (s != INVALID_SOCKET)
deletesoc(s);
/* out-of-range index: no address selected (address stays unset) */
hts_init_htsblk(&r);
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 2, NULL);
CHECK(s == INVALID_SOCKET);
if (s != INVALID_SOCKET)
deletesoc(s);
}
/* Connect-fallback decision (consumer of the multi-address list): when a
stuck connect should abandon the current address for the next one. */
{
/* no fallback for the last/only candidate, whatever the elapsed time */
CHECK(back_connect_fallback_due(0, 1, 9999, 120) == 0);
CHECK(back_connect_fallback_due(1, 2, 9999, 120) == 0);
CHECK(back_connect_fallback_due(3, 4, 9999, 120) == 0);
/* fallback available: wait the per-candidate deadline (cap 10s here) */
CHECK(back_connect_fallback_due(0, 2, 9, 120) == 0);
CHECK(back_connect_fallback_due(0, 2, 10, 120) == 1);
CHECK(back_connect_fallback_due(2, 4, 10, 120) == 1);
/* a shorter slot timeout shortens the deadline (min(timeout, cap)) */
CHECK(back_connect_fallback_due(0, 2, 4, 5) == 0);
CHECK(back_connect_fallback_due(0, 2, 5, 5) == 1);
/* no timeout management: never force a fallback */
CHECK(back_connect_fallback_due(0, 2, 9999, 0) == 0);
}
hts_dns_set_resolver_backend(NULL);
return failures;
}
#else
int dns_selftests(httrackp *opt) {
(void) opt;
return 0; /* resolver seam only exists in the IPv6 build */
}
#endif

View File

@@ -1,51 +0,0 @@
/* ------------------------------------------------------------ */
/*
HTTrack Website Copier, Offline Browser for Windows and Unix
Copyright (C) 2026 Xavier Roche and other contributors
SPDX-License-Identifier: GPL-3.0-or-later
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Ethical use: we kindly ask that you NOT use this software to harvest email
addresses or to collect any other private information about people. Doing so
would dishonor our work and waste the many hours we have spent on it.
Please visit our Website: http://www.httrack.com
*/
/* ------------------------------------------------------------ */
/* File: htsdns_selftest.h */
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
#ifndef HTSDNS_SELFTEST_DEFH
#define HTSDNS_SELFTEST_DEFH
#ifdef HTS_INTERNAL_BYTECODE
#ifndef HTS_DEF_FWSTRUCT_httrackp
#define HTS_DEF_FWSTRUCT_httrackp
typedef struct httrackp httrackp;
#endif
/* Drive the DNS resolver and cache through a scripted (mock) getaddrinfo,
asserting address family, single-address selection, negative caching, the
IPv4/IPv6 family filter, and that a cached host is resolved only once.
Returns the number of failed checks (0 == success). */
int dns_selftests(httrackp *opt);
#endif
#endif

View File

@@ -408,10 +408,6 @@ typedef int T_SOC;
/* Buffer size for a printed network address (IPv4 or IPv6, NUL included). */
#define HTS_MAXADDRLEN 64
/* Max resolved addresses kept per host for connect fallback (dead IPv6 etc.).
*/
#define HTS_MAXADDRNUM 4
#ifdef _WIN32
#else
#define __cdecl

View File

@@ -2297,27 +2297,14 @@ htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) {
// peut ouvrir avec des connect() non bloquants: waitconnect=0/1
T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
int waitconnect) {
return newhttp_addr(opt, _iadr, retour, port, waitconnect, 0, NULL);
}
T_SOC newhttp_addr(httrackp *opt, const char *_iadr, htsblk *retour, int port,
int waitconnect, int addr_index, int *addr_count) {
T_SOC soc; // descipteur de la socket
if (addr_count != NULL) {
*addr_count = 0;
}
if (strcmp(_iadr, "file://") != 0) { /* non fichier */
SOCaddr server;
SOCaddr addrs[HTS_MAXADDRNUM];
int naddr;
const char *error = "unknown error";
// tester un éventuel id:pass et virer id:pass@ si détecté
const char *const iadr = jump_identification_const(_iadr);
const char *resolve_host = iadr;
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
SOCaddr_clear(server);
@@ -2339,6 +2326,7 @@ T_SOC newhttp_addr(httrackp *opt, const char *_iadr, htsblk *retour, int port,
#endif
if (a != NULL) {
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
int i = -1;
iadr2[0] = '\0';
@@ -2349,19 +2337,18 @@ T_SOC newhttp_addr(httrackp *opt, const char *_iadr, htsblk *retour, int port,
// adresse véritable (sans :xx)
strncatbuff(iadr2, iadr, (int) (a - iadr));
resolve_host = iadr2;
}
}
// resolve the full address list and pick the requested candidate; the
// scheduler retries the next index when a connect fails (dead IPv6 etc.)
naddr =
hts_dns_resolve_all(opt, resolve_host, addrs, HTS_MAXADDRNUM, &error);
if (addr_count != NULL) {
*addr_count = naddr;
}
if (addr_index >= 0 && addr_index < naddr) {
SOCaddr_copy_SOCaddr(server, addrs[addr_index]);
// adresse sans le :xx
hts_dns_resolve2(opt, iadr2, &server, &error);
} else {
// adresse normale (port par défaut par la suite)
hts_dns_resolve2(opt, iadr, &server, &error);
}
} else { // port défini
hts_dns_resolve2(opt, iadr, &server, &error);
}
if (!SOCaddr_is_valid(server)) {
@@ -4797,14 +4784,14 @@ void hts_cache_free(t_dnscache *const root) {
// -1: status? 0: libérer 1:locker
// MUST BE LOCKED
// Look up iadr in the DNS cache, filling out[0..min(count,max)-1].
// Returns: -1 not yet tested; 0 negative-cached (not in DNS); >0 address count.
static int hts_ghbn_all(const t_dnscache *cache, const char *const iadr,
SOCaddr *const out, const int max) {
assertf(out != NULL);
// routine pour le cache - retour optionnel à donner à chaque fois
// NULL: nom non encore testé dans le cache
// si h_length==0 alors le nom n'existe pas dans le dns
static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCaddr *const addr) {
assertf(addr != NULL);
assertf(iadr != NULL);
if (*iadr == '\0') {
return -1;
return NULL;
}
/* first entry is empty */
if (cache->iadr == NULL) {
@@ -4815,263 +4802,95 @@ static int hts_ghbn_all(const t_dnscache *cache, const char *const iadr,
assertf(cache->iadr != NULL);
assertf(cache->iadr == (const char*) cache + sizeof(t_dnscache));
if (strcmp(cache->iadr, iadr) == 0) { // ok trouvé
int i;
assertf(cache->host_count <= HTS_MAXADDRNUM);
for (i = 0; i < cache->host_count && i < max; i++) {
assertf(cache->host_length[i] <= sizeof(cache->host_addr[i]));
SOCaddr_copyaddr2(out[i], cache->host_addr[i], cache->host_length[i]);
}
return cache->host_count;
}
}
return -1;
}
#if HTS_INET6 != 0
/* Active resolver backend; defaults to the libc resolver. The self-test
reroutes it to script DNS answers in-process (see
hts_dns_set_resolver_backend). */
static const hts_resolver_backend hts_resolver_libc = {getaddrinfo,
freeaddrinfo};
static const hts_resolver_backend *hts_resolver = &hts_resolver_libc;
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend) {
hts_resolver = (backend != NULL) ? backend : &hts_resolver_libc;
}
/* Debug/test hook: HTTRACK_DEBUG_RESOLVE="host:ip[,ip...]" pins the resolution
of `host` to the listed addresses (curl --resolve style), so the connect
fallback can be exercised deterministically (a dead address first, a live one
next). Any other host resolves normally. Below: an addrinfo backend that owns
its chain (its own freeaddrinfo), so a synthesized and a delegated result
free the same way. */
/* Deep-copy a libc addrinfo chain into our own allocations. */
static struct addrinfo *resolver_dup_chain(const struct addrinfo *src) {
struct addrinfo *head = NULL, *tail = NULL;
for (; src != NULL; src = src->ai_next) {
struct addrinfo *const ai = calloct(1, sizeof(*ai));
ai->ai_family = src->ai_family;
ai->ai_socktype = src->ai_socktype;
ai->ai_protocol = src->ai_protocol;
ai->ai_addrlen = src->ai_addrlen;
ai->ai_addr = malloct(src->ai_addrlen);
memcpy(ai->ai_addr, src->ai_addr, src->ai_addrlen);
if (head == NULL)
head = ai;
else
tail->ai_next = ai;
tail = ai;
}
return head;
}
/* Build one addrinfo node from an IPv4/IPv6 literal, or NULL if it does not
parse or is filtered out by want_family (AF_INET/AF_INET6/PF_UNSPEC). */
static struct addrinfo *resolver_make_ai(const char *ip, int want_family) {
struct addrinfo *ai;
if (strchr(ip, ':') != NULL) { // IPv6 literal
struct sockaddr_in6 sa6;
if (want_family != PF_UNSPEC && want_family != AF_INET6)
return NULL;
memset(&sa6, 0, sizeof(sa6));
if (inet_pton(AF_INET6, ip, &sa6.sin6_addr) != 1)
return NULL;
sa6.sin6_family = AF_INET6;
ai = calloct(1, sizeof(*ai));
ai->ai_family = AF_INET6;
ai->ai_addrlen = sizeof(sa6);
ai->ai_addr = malloct(sizeof(sa6));
memcpy(ai->ai_addr, &sa6, sizeof(sa6));
} else { // IPv4 literal
struct sockaddr_in sa;
if (want_family != PF_UNSPEC && want_family != AF_INET)
return NULL;
memset(&sa, 0, sizeof(sa));
if (inet_pton(AF_INET, ip, &sa.sin_addr) != 1)
return NULL;
sa.sin_family = AF_INET;
ai = calloct(1, sizeof(*ai));
ai->ai_family = AF_INET;
ai->ai_addrlen = sizeof(sa);
ai->ai_addr = malloct(sizeof(sa));
memcpy(ai->ai_addr, &sa, sizeof(sa));
}
return ai;
}
static void override_freeaddrinfo(struct addrinfo *res) {
while (res != NULL) {
struct addrinfo *const next = res->ai_next;
freet(res->ai_addr);
freet(res);
res = next;
}
}
static int override_getaddrinfo(const char *node, const char *service,
const struct addrinfo *hints,
struct addrinfo **res) {
const char *const spec = getenv("HTTRACK_DEBUG_RESOLVE");
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
const char *colon;
*res = NULL;
if (spec != NULL && node != NULL && (colon = strchr(spec, ':')) != NULL &&
(size_t) (colon - spec) == strlen(node) &&
strncmp(spec, node, colon - spec) == 0) {
struct addrinfo *head = NULL, *tail = NULL;
char buf[256];
char *p;
buf[0] = '\0';
strncatbuff(buf, colon + 1, sizeof(buf) - 1);
for (p = strtok(buf, ","); p != NULL; p = strtok(NULL, ",")) {
struct addrinfo *const ai = resolver_make_ai(p, want);
if (ai != NULL) {
if (head == NULL)
head = ai;
else
tail->ai_next = ai;
tail = ai;
if (cache->host_length != 0) { // entrée valide
assertf(cache->host_length <= sizeof(cache->host_addr));
SOCaddr_copyaddr2(*addr, cache->host_addr, cache->host_length);
return addr;
} else { // erreur dans le dns, déja vérifié
SOCaddr_clear(*addr);
return addr;
}
}
if (head == NULL)
return EAI_NONAME;
*res = head;
return 0;
}
return NULL;
}
/* not overridden: delegate to libc, copying into our owned format */
static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
SOCaddr *const addr,
const char **error) {
{
struct addrinfo *sys = NULL;
int gerr = getaddrinfo(node, service, hints, &sys);
if (gerr != 0)
return gerr;
*res = resolver_dup_chain(sys);
freeaddrinfo(sys);
return 0;
}
}
static const hts_resolver_backend hts_resolver_override = {
override_getaddrinfo, override_freeaddrinfo};
/* Install the env override once, unless a backend was already set (self-test).
*/
static void hts_resolver_check_env(void) {
static int checked = 0;
if (!checked) {
checked = 1;
if (hts_resolver == &hts_resolver_libc &&
getenv("HTTRACK_DEBUG_RESOLVE") != NULL) {
hts_resolver = &hts_resolver_override;
}
}
}
#endif
// Resolve hostname into up to max addresses (resolver/RFC 6724 order), no
// cache. Returns the count copied into out[0..count-1]; 0 = does not resolve.
static int hts_dns_resolve_nocache_list_(const char *const hostname,
SOCaddr *const out, const int max,
const char **error) {
int count = 0;
#if HTS_INET6==0
/* IPv4 resolver */
struct hostent *const hp = gethostbyname(hostname);
/* IPv4 resolver */
struct hostent *const hp = gethostbyname(hostname);
if (hp != NULL) {
char **h;
for (h = hp->h_addr_list; count < max && h != NULL && *h != NULL; h++) {
SOCaddr_clear(out[count]);
SOCaddr_copyaddr2(out[count], *h, hp->h_length);
if (SOCaddr_is_valid(out[count]))
count++;
if (hp != NULL) {
SOCaddr_copyaddr2(addr, hp->h_addr_list[0], hp->h_length);
return SOCaddr_is_valid(addr) ? &addr : NULL;
} else {
SOCaddr_clear(*addr);
}
}
#else
/* IPv6 resolver */
struct addrinfo *res = NULL, *cur;
struct addrinfo hints;
int gerr;
/* IPv6 resolver */
struct addrinfo *res = NULL;
struct addrinfo hints;
int gerr;
hts_resolver_check_env();
memset(&hints, 0, sizeof(hints));
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
hints.ai_family = PF_INET;
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
hints.ai_family = PF_INET6;
else // V4 + V6
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
if ((gerr = hts_resolver->getaddrinfo(hostname, NULL, &hints, &res)) == 0) {
for (cur = res; cur != NULL && count < max; cur = cur->ai_next) {
if (cur->ai_addr != NULL && cur->ai_addrlen != 0) {
SOCaddr_clear(out[count]);
SOCaddr_copyaddr2(out[count], cur->ai_addr, cur->ai_addrlen);
if (SOCaddr_is_valid(out[count]))
count++;
SOCaddr_clear(*addr);
memset(&hints, 0, sizeof(hints));
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
hints.ai_family = PF_INET;
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
hints.ai_family = PF_INET6;
else // V4 + V6
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
if ( ( gerr = getaddrinfo(hostname, NULL, &hints, &res) ) == 0) {
if (res != NULL) {
if (res->ai_addr != NULL && res->ai_addrlen != 0) {
SOCaddr_copyaddr2(*addr, res->ai_addr, res->ai_addrlen);
}
}
} else {
if (error != NULL) {
*error = gai_strerror(gerr);
}
}
} else if (error != NULL) {
*error = gai_strerror(gerr);
}
if (res) {
hts_resolver->freeaddrinfo(res);
}
if (res) {
freeaddrinfo(res);
}
#endif
}
return count;
return SOCaddr_is_valid(*addr) ? addr : NULL;
}
// Strip [] around a literal IPv6 ([3ffe:b80:1234:1::1]) the resolver won't
// take, then resolve into a list. Returns the count.
static int hts_dns_resolve_nocache_list(const char *const hostname,
SOCaddr *const out, const int max,
const char **error) {
if (!strnotempty(hostname) || max <= 0) {
return 0;
HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
SOCaddr *const addr, const char **error) {
/* Protection */
if (!strnotempty(hostname)) {
return NULL;
}
/*
Strip [] if any : [3ffe:b80:1234:1::1]
The resolver doesn't seem to handle IP6 addresses in brackets
*/
if ((hostname[0] == '[') && (hostname[strlen(hostname) - 1] == ']')) {
SOCaddr *ret;
size_t size = strlen(hostname);
char *copy = malloct(size + 1);
int count;
assertf(copy != NULL);
copy[0] = '\0';
strncat(copy, hostname + 1, size - 2);
count = hts_dns_resolve_nocache_list_(copy, out, max, error);
ret = hts_dns_resolve_nocache2_(copy, addr, error);
freet(copy);
return count;
return ret;
} else {
return hts_dns_resolve_nocache_list_(hostname, out, max, error);
return hts_dns_resolve_nocache2_(hostname, addr, error);
}
}
HTSEXT_API SOCaddr *hts_dns_resolve_nocache2(const char *const hostname,
SOCaddr *const addr,
const char **error) {
SOCaddr_clear(*addr);
if (hts_dns_resolve_nocache_list(hostname, addr, 1, error) > 0) {
return SOCaddr_is_valid(*addr) ? addr : NULL;
}
return NULL;
}
HTSEXT_API SOCaddr* hts_dns_resolve_nocache(const char *const hostname, SOCaddr *const addr) {
return hts_dns_resolve_nocache2(hostname, addr, NULL);
}
@@ -5082,18 +4901,16 @@ HTSEXT_API int check_hostname_dns(const char *const hostname) {
}
// Needs locking
// Internal DNS cache. Fill out[0..count-1] with up to max addresses for _iadr,
// resolving (and caching the full list) on a miss. Returns the count.
static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
SOCaddr *const out, const int max,
const char **error) {
// cache dns interne à HTS // ** FREE A FAIRE sur la chaine
static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
SOCaddr *const addr, const char **error) {
char BIGSTK iadr[HTS_URLMAXSIZE * 2];
t_dnscache *cache = hts_cache(opt); // adresse du cache
int count;
SOCaddr *sa;
assertf(opt != NULL);
assertf(_iadr != NULL);
assertf(out != NULL);
assertf(addr != NULL);
strcpybuff(iadr, jump_identification_const(_iadr));
// couper éventuel :
@@ -5105,13 +4922,11 @@ static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
}
/* get IP from the dns cache */
count = hts_ghbn_all(cache, iadr, out, max);
if (count >= 0) { // cache hit (0 == negative-cached)
return count;
} else { // non présent dans le cache dns, tester
sa = hts_ghbn(cache, iadr, addr);
if (sa != NULL) {
return SOCaddr_is_valid(*sa) ? sa : NULL;
} else { // non présent dans le cache dns, tester
const size_t iadr_len = strlen(iadr) + 1;
SOCaddr resolved[HTS_MAXADDRNUM];
int i;
// find queue
for(; cache->next != NULL; cache = cache->next) ;
@@ -5120,7 +4935,7 @@ static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
printf("resolving (not cached) %s\n", iadr);
#endif
count = hts_dns_resolve_nocache_list(iadr, resolved, HTS_MAXADDRNUM, error);
sa = hts_dns_resolve_nocache2(iadr, addr, error); // calculer IP host
#if HTS_WIDE_DEBUG
DEBUG_W("gethostbyname done\n");
@@ -5134,45 +4949,28 @@ static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
char *const str = block + sizeof(t_dnscache);
memcpy(str, iadr, iadr_len);
next->iadr = str;
next->host_count = count;
for (i = 0; i < count; i++) {
next->host_length[i] = SOCaddr_size(resolved[i]);
assertf(next->host_length[i] <= sizeof(next->host_addr[i]));
memcpy(next->host_addr[i], &SOCaddr_sockaddr(resolved[i]),
next->host_length[i]);
if (sa != NULL) {
next->host_length = SOCaddr_size(*sa);
assertf(next->host_length <= sizeof(next->host_addr));
memcpy(next->host_addr, &SOCaddr_sockaddr(*sa), next->host_length);
} else {
next->host_length = 0; // non existant dans le dns
}
next->next = NULL;
return sa;
}
/* copy result to caller (cache store may have failed; result still valid)
*/
for (i = 0; i < count && i < max; i++) {
SOCaddr_copy_SOCaddr(out[i], resolved[i]);
}
return count;
} // retour hp du cache
/* return result if any */
return sa;
} // retour hp du cache
}
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
const char **error) {
int count;
if (!strnotempty(iadr) || max <= 0) {
return 0;
}
SOCaddr* hts_dns_resolve2(httrackp * opt, const char *_iadr, SOCaddr *const addr, const char **error) {
SOCaddr *ret;
hts_mutexlock(&opt->state.lock);
count = hts_dns_resolve_list_(opt, iadr, out, max, error);
ret = hts_dns_resolve_(opt, _iadr, addr, error);
hts_mutexrelease(&opt->state.lock);
return count;
}
SOCaddr *hts_dns_resolve2(httrackp *opt, const char *_iadr, SOCaddr *const addr,
const char **error) {
SOCaddr_clear(*addr);
if (hts_dns_resolve_all(opt, _iadr, addr, 1, error) > 0) {
return SOCaddr_is_valid(*addr) ? addr : NULL;
}
return NULL;
return ret;
}
SOCaddr* hts_dns_resolve(httrackp * opt, const char *_iadr, SOCaddr *const addr) {

View File

@@ -150,11 +150,8 @@ typedef struct t_dnscache t_dnscache;
struct t_dnscache {
struct t_dnscache *next;
const char *iadr;
// resolved addresses, in resolver (RFC 6724) order; host_count==0 means the
// name does not resolve (negative cache). host_count<=HTS_MAXADDRNUM.
int host_count;
size_t host_length[HTS_MAXADDRNUM]; // sockaddr length of each (16 or 28)
char host_addr[HTS_MAXADDRNUM][HTS_MAXADDRLEN];
size_t host_length; // length ; (4 or 16) ; 0 for error
char host_addr[HTS_MAXADDRLEN];
};
/* Library internal definictions */
@@ -194,13 +191,6 @@ int http_cookie_header_selftest(t_cookie *cookie, const char *domain,
//int newhttp(char* iadr,char* err=NULL);
T_SOC newhttp(httrackp * opt, const char *iadr, htsblk * retour, int port,
int waitconnect);
/* Like newhttp(), but connect to the addr_index-th resolved address of the host
(0-based) instead of always the first; *addr_count, if non-NULL, is set to
the total resolved addresses. newhttp() == newhttp_addr(...,0,NULL). Used by
the slot scheduler to try the next address when a connect fails (dead IPv6
etc.). */
T_SOC newhttp_addr(httrackp *opt, const char *iadr, htsblk *retour, int port,
int waitconnect, int addr_index, int *addr_count);
HTS_INLINE void deletehttp(htsblk * r);
HTS_INLINE int deleteaddr(htsblk * r);
HTS_INLINE void deletesoc(T_SOC soc);
@@ -225,14 +215,9 @@ void treatfirstline(htsblk * retour, const char *rcvd);
// sous-fonctions
LLint http_xfread1(htsblk * r, int bufl);
/* Cached resolver: fill out[0..count-1] with up to max addresses for iadr (in
resolver order), returning the count (0 = does not resolve, negative-cached).
Resolves once per host; later calls read the DNS cache. Must hold no lock
(brackets opt->state.lock itself). */
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
const char **error);
HTS_INLINE SOCaddr *hts_dns_resolve2(httrackp *opt, const char *iadr,
SOCaddr *const addr, const char **error);
HTS_INLINE SOCaddr* hts_dns_resolve2(httrackp * opt, const char *iadr,
SOCaddr *const addr,
const char **error);
HTS_INLINE SOCaddr* hts_dns_resolve(httrackp * opt, const char *iadr,
SOCaddr *const addr);
HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,

View File

@@ -304,22 +304,6 @@ static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
/** Length type for socket APIs (getsockname, accept, ...). */
typedef socklen_t SOClen;
#if HTS_INET6 != 0
/** Resolver backend: getaddrinfo/freeaddrinfo as a swappable pair, so the
self-test can script DNS answers (families, multiplicity, errors)
in-process. The free function must match its getaddrinfo (a fake allocates
its own chain), hence the pair. */
typedef struct hts_resolver_backend {
int (*getaddrinfo)(const char *node, const char *service,
const struct addrinfo *hints, struct addrinfo **res);
void (*freeaddrinfo)(struct addrinfo *res);
} hts_resolver_backend;
/** Install a resolver backend for the process; NULL restores the libc default.
Test-only seam, not thread-safe; callers must serialize against resolves. */
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend);
#endif
#ifdef __cplusplus
}
#endif

View File

@@ -1,15 +0,0 @@
#!/bin/bash
#
set -euo pipefail
# DNS resolver/cache self-test: a mock getaddrinfo (no network) checks address
# family, single-address selection, the -@i4/-@i6 family filter, and cache reuse.
# The trailing token is required, like the other -# selftests, so a bare command
# line isn't treated as "no arguments" and routed to the usage screen.
out=$(httrack -#D run)
test "$out" = "dns-selftest: OK" || {
echo "expected 'dns-selftest: OK', got: $out" >&2
exit 1
}

View File

@@ -1,110 +0,0 @@
#!/bin/bash
#
# A host that resolves to several addresses must fall back to the next one when
# a connect fails, instead of giving up on the first (dead IPv6 on a dual-stack
# host, ...). HTTRACK_DEBUG_RESOLVE pins "deadhost" to a refused address first
# (127.0.0.2, nothing listening) then the live server (127.0.0.1): the crawl
# only succeeds if httrack retries the second address. A second case pins every
# address to a refused one, so the slot must exhaust the list and error out
# (rather than hang or loop).
set -euo pipefail
: "${top_srcdir:=..}"
if test "${V6_SUPPORT:-}" == "no"; then
echo "no IPv6 support (resolver list/override is IPv6-only), skipping"
exit 77
fi
if ! command -v python3 >/dev/null 2>&1; then
echo "python3 missing, skipping"
exit 77
fi
server="$top_srcdir/tests/local-server.py"
root="$top_srcdir/tests/server-root"
tmpdir=$(mktemp -d)
serverpid=
cleanup() {
if test -n "$serverpid"; then
kill "$serverpid" 2>/dev/null || true
wait "$serverpid" 2>/dev/null || true
fi
rm -rf "$tmpdir"
return 0
}
trap cleanup EXIT
# bind the live server to 127.0.0.1 only, so 127.0.0.2 refuses the connect
python3 "$server" --root "$root" --bind 127.0.0.1 >"$tmpdir/srv.out" 2>"$tmpdir/srv.err" &
serverpid=$!
port=
for _ in $(seq 1 50); do
line=$(head -n1 "$tmpdir/srv.out" 2>/dev/null || true)
if test "${line%% *}" == "PORT"; then
port="${line#PORT }"
break
fi
kill -0 "$serverpid" 2>/dev/null || {
echo "server exited early: $(cat "$tmpdir/srv.err")"
exit 1
}
sleep 0.1
done
test -n "$port" || {
echo "could not discover server port"
exit 1
}
out="$tmpdir/crawl"
HTTRACK_DEBUG_RESOLVE="deadhost:127.0.0.2,127.0.0.1" \
httrack "http://deadhost:$port/simple/basic.html" -O "$out" \
-c1 --robots=0 --timeout=30 --quiet -Z >"$tmpdir/log" 2>&1
log="$out/hts-log.txt"
# the dead address was tried, then the next one (proves the fallback ran)
if ! grep -q "trying next address" "$log"; then
echo "FAIL: no connect fallback happened"
cat "$log"
exit 1
fi
# 0 errors and the file was actually fetched (over the live address)
errs=$(grep -iEc "^[0-9:]*[[:space:]]Error:" "$log" || true)
test "$errs" == "0" || {
echo "FAIL: $errs error(s) reported"
grep -iE "Error:" "$log"
exit 1
}
test -f "$out/deadhost_$port/simple/basic.html" || {
echo "FAIL: basic.html not downloaded via fallback"
find "$out" -type f
exit 1
}
# every address refused: the slot exhausts the list, then errors out (the
# harness timeout would catch a hang/loop; refused connects are instant)
out2="$tmpdir/crawl2"
HTTRACK_DEBUG_RESOLVE="alldead:127.0.0.2,127.0.0.3" \
httrack "http://alldead:$port/simple/basic.html" -O "$out2" \
-c1 --robots=0 --timeout=30 --quiet -Z >"$tmpdir/log2" 2>&1
log2="$out2/hts-log.txt"
grep -q "trying next address" "$log2" || {
echo "FAIL: exhaustion path never tried the fallback address"
cat "$log2"
exit 1
}
grep -iqE "^[0-9:]*[[:space:]]Error:" "$log2" || {
echo "FAIL: all addresses failing did not report an error"
cat "$log2"
exit 1
}
test ! -f "$out2/alldead_$port/simple/basic.html" || {
echo "FAIL: file downloaded despite every address failing"
exit 1
}
echo "OK: connect fallback succeeds, and exhausting all addresses errors out"

View File

@@ -13,7 +13,6 @@ TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
TESTS_ENVIRONMENT += ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS)
TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
TESTS_ENVIRONMENT += V6_SUPPORT=$(V6_SUPPORT)
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
TEST_EXTENSIONS = .test
@@ -30,7 +29,6 @@ TESTS = \
01_engine-cmdline.test \
01_engine-cookies.test \
01_engine-copyopt.test \
01_engine-dns.test \
01_engine-doitlog.test \
01_engine-entities.test \
01_engine-filter.test \
@@ -57,7 +55,6 @@ TESTS = \
15_local-types.test \
16_local-assume.test \
17_local-empty-ct.test \
18_local-update.test \
19_local-connect-fallback.test
18_local-update.test
CLEANFILES = check-network_sh.cache