mirror of
https://github.com/xroche/httrack.git
synced 2026-06-23 10:37:50 +03:00
Compare commits
9 Commits
feature/co
...
dns-multia
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0bea390973 | ||
|
|
67af1c2f0b | ||
|
|
542d6a56b5 | ||
|
|
a5c86e7e89 | ||
|
|
54f5717057 | ||
|
|
40fc9de360 | ||
|
|
4614eefefe | ||
|
|
b0e8262db0 | ||
|
|
addbd3136b |
12
configure.ac
12
configure.ac
@@ -1,6 +1,6 @@
|
||||
AC_PREREQ([2.71])
|
||||
|
||||
AC_INIT([httrack], [3.49.8], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_INIT([httrack], [3.49.9], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_COPYRIGHT([
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 1998-2015 Xavier Roche and other contributors
|
||||
@@ -29,9 +29,10 @@ AC_CONFIG_SRCDIR(src/httrack.c)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
AM_INIT_AUTOMAKE([subdir-objects])
|
||||
# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to
|
||||
# 128), an incompatible ABI break, so bump current and reset revision/age.
|
||||
VERSION_INFO="3:0:0"
|
||||
# 3:1:0: 3.49.9 changed code but not the exported interface vs 3.49.8 (same 164
|
||||
# symbols, no struct-layout change), so bump revision only. (3:0:0 was the htsblk
|
||||
# mime-buffer widening, an ABI break that moved the soname .so.2 -> .so.3.)
|
||||
VERSION_INFO="3:1:0"
|
||||
AM_MAINTAINER_MODE
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
@@ -214,9 +215,12 @@ AC_SUBST(OPENSSL_LIBS)
|
||||
fi
|
||||
|
||||
### Support IPv6
|
||||
V6_SUPPORT=no
|
||||
AC_CHECK_LIB(c, getaddrinfo, [V6_FLAG="-DINET6"
|
||||
V6_SUPPORT=yes
|
||||
AC_DEFINE(HTS_INET6, 1, [Check for IPv6])], AC_MSG_WARN([*** IPv6 not found IPv6 compatibility disabled]))
|
||||
AC_SUBST(V6_FLAG)
|
||||
AC_SUBST(V6_SUPPORT)
|
||||
|
||||
### Check for LFS
|
||||
AC_CHECK_LIB(c, fopen64, [LFS_FLAG="-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE"
|
||||
|
||||
11
debian/changelog
vendored
11
debian/changelog
vendored
@@ -1,3 +1,14 @@
|
||||
httrack (3.49.9-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release: Content-Type and file-type detection fixes (trust a
|
||||
declared Content-Type over a binary URL extension, honor --assume under the
|
||||
delayed type check, keep a known extension against a bogus or empty
|
||||
Content-Type, and avoid an uninitialised read on an empty Content-Type), and
|
||||
restored C++ source-compatibility of the installed headers so reverse
|
||||
dependencies (httraqt) build again.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 21 Jun 2026 17:59:38 +0200
|
||||
|
||||
httrack (3.49.8-2) unstable; urgency=medium
|
||||
|
||||
* Rename libhttrack2 to libhttrack3 to follow the SONAME, which the 3.49.8
|
||||
|
||||
118
debian/copyright
vendored
118
debian/copyright
vendored
@@ -1,21 +1,109 @@
|
||||
This package was debianized by Xavier Roche <roche@httrack.com> on
|
||||
Fri, 27 Sep 2002 16:42:26 +0200
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: httrack
|
||||
Upstream-Contact: Xavier Roche <roche@httrack.com>
|
||||
Source: https://www.httrack.com/
|
||||
|
||||
The current Debian maintainer is Xavier Roche <xavier@debian.org>
|
||||
Files: *
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: GPL-3+
|
||||
Comment:
|
||||
The engine includes contributions from Yann Philippot (src/htsjava.c,
|
||||
src/htsjava.h). htsbasenet.h links against the system OpenSSL library
|
||||
(originally by Eric Young); no OpenSSL/SSLeay code is bundled here.
|
||||
|
||||
Upstream author: Xavier Roche <roche@httrack.com>
|
||||
Files: src/minizip/*
|
||||
Copyright: 1998-2010 Gilles Vollant
|
||||
2007-2008 Even Rouault
|
||||
2009-2010 Mathias Svensson
|
||||
1990-2000 Info-ZIP
|
||||
License: Zlib
|
||||
Comment:
|
||||
The decryption code in src/minizip/crypt.h and src/minizip/unzip.c derives
|
||||
from the Info-ZIP distribution, distributed under the same terms.
|
||||
|
||||
Copyright: 1998-2014 Xavier Roche and other contributors
|
||||
Files: src/md5.c
|
||||
Copyright: 1993 Colin Plumb
|
||||
License: public-domain-md5
|
||||
This code implements the MD5 message-digest algorithm, due to Ron Rivest.
|
||||
It was written by Colin Plumb in 1993, no copyright is claimed. This code
|
||||
is in the public domain; do with it what you wish.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Files: src/coucal/*
|
||||
Copyright: 2013-2014 Xavier Roche
|
||||
License: BSD-3-clause
|
||||
|
||||
On Debian systems, the complete text of the GNU General Public
|
||||
License version 3 can be found in /usr/share/common-licenses/GPL-3 file.
|
||||
Files: src/coucal/murmurhash3.h*
|
||||
Copyright: Austin Appleby
|
||||
License: public-domain-murmurhash3
|
||||
MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Files: html/server/div/com.httrack.WebHTTrack.metainfo.xml
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: FSFAP
|
||||
Copying and distribution of this file, with or without modification, are
|
||||
permitted in any medium without royalty provided the copyright notice and
|
||||
this notice are preserved. This file is offered as-is, without any warranty.
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2002-2026 Xavier Roche <xavier@debian.org>
|
||||
License: GPL-3+
|
||||
|
||||
License: GPL-3+
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public License
|
||||
version 3 can be found in /usr/share/common-licenses/GPL-3.
|
||||
|
||||
License: Zlib
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the
|
||||
use of this software.
|
||||
.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
.
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
License: BSD-3-clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
@@ -4,6 +4,12 @@ HTTrack Website Copier release history:
|
||||
|
||||
This file lists all changes and fixes that have been made for HTTrack
|
||||
|
||||
3.49-9
|
||||
+ Fixed: file-type detection from the Content-Type header: trust a declared type over a binary URL extension, honor --assume under the delayed type check, and keep a known extension against a bogus or empty Content-Type (#267, #29, #56)
|
||||
+ Fixed: an uninitialized-buffer read when the Content-Type is empty (#411)
|
||||
+ Fixed: restored C++ source-compatibility of the installed headers so reverse dependencies (httraqt) build again (#413)
|
||||
+ Changed: multiple internal build, packaging and test-harness improvements
|
||||
|
||||
3.49-8
|
||||
+ New: tunnel HTTPS downloads through the configured HTTP proxy via CONNECT (#85)
|
||||
+ New: parse every candidate URL in <img> and <source> srcset lists (#326)
|
||||
|
||||
@@ -56,7 +56,7 @@ whttrackrundir = $(bindir)
|
||||
whttrackrun_SCRIPTS = webhttrack
|
||||
|
||||
libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
|
||||
htscache_selftest.c \
|
||||
htscache_selftest.c htsdns_selftest.c \
|
||||
htscatchurl.c htsfilters.c htsftp.c htshash.c coucal/coucal.c \
|
||||
htshelp.c htslib.c htscoremain.c \
|
||||
htsname.c htsrobots.c htstools.c htswizard.c \
|
||||
@@ -66,7 +66,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
|
||||
md5.c \
|
||||
minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
|
||||
hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \
|
||||
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htscatchurl.h \
|
||||
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htsdns_selftest.h htscatchurl.h \
|
||||
htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
|
||||
htsfilters.h htsftp.h htsglobal.h htshash.h coucal/coucal.h \
|
||||
htshelp.h htsindex.h htslib.h htsmd5.h \
|
||||
|
||||
153
src/htsback.c
153
src/htsback.c
@@ -73,6 +73,8 @@ struct_back *back_new(httrackp *opt, int back_max) {
|
||||
|
||||
sback->count = back_max;
|
||||
sback->lnk = (lien_back *) calloct((back_max + 1), sizeof(lien_back));
|
||||
sback->connect_fallback = (hts_connect_fallback *) calloct(
|
||||
(back_max + 1), sizeof(hts_connect_fallback));
|
||||
sback->ready = coucal_new(0);
|
||||
hts_set_hash_handler(sback->ready, opt);
|
||||
coucal_set_name(sback->ready, "back_new");
|
||||
@@ -83,6 +85,7 @@ struct_back *back_new(httrackp *opt, int back_max) {
|
||||
sback->lnk[i].r.location = sback->lnk[i].location_buffer;
|
||||
sback->lnk[i].status = STATUS_FREE;
|
||||
sback->lnk[i].r.soc = INVALID_SOCKET;
|
||||
sback->connect_fallback[i].addr_count = -1; // not yet probed
|
||||
}
|
||||
return sback;
|
||||
}
|
||||
@@ -93,6 +96,7 @@ void back_free(struct_back ** sback) {
|
||||
freet((*sback)->lnk);
|
||||
(*sback)->lnk = NULL;
|
||||
}
|
||||
freet((*sback)->connect_fallback);
|
||||
if ((*sback)->ready != NULL) {
|
||||
coucal_delete(&(*sback)->ready);
|
||||
(*sback)->ready_size_bytes = 0;
|
||||
@@ -102,6 +106,72 @@ void back_free(struct_back ** sback) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Per-candidate connect deadline cap (seconds): a connecting slot with another
|
||||
address to try waits at most this long before falling back, instead of the
|
||||
full (default 120s) slot timeout. Caps the dead-IPv6 stall while staying well
|
||||
above a normal handshake. The last candidate still gets the full timeout. */
|
||||
#define HTS_CONNECT_FALLBACK_TIMEOUT 10
|
||||
|
||||
int back_connect_fallback_due(int addr_index, int addr_count, int elapsed,
|
||||
int timeout) {
|
||||
int deadline;
|
||||
|
||||
if (addr_index + 1 >= addr_count) // last (or only) candidate: no fallback
|
||||
return 0;
|
||||
if (timeout <= 0) // no timeout management: never force it
|
||||
return 0;
|
||||
deadline = (timeout < HTS_CONNECT_FALLBACK_TIMEOUT)
|
||||
? timeout
|
||||
: HTS_CONNECT_FALLBACK_TIMEOUT;
|
||||
return elapsed >= deadline;
|
||||
}
|
||||
|
||||
/* Pending-connect result for a non-blocking socket reported ready by select():
|
||||
0 = connected, >0 = the connect errno (refused, unreachable, ...), -1 if the
|
||||
probe itself failed. A failed connect is reported writable too, so this is
|
||||
how success is told from failure without blocking. */
|
||||
static int connect_socket_error(T_SOC soc) {
|
||||
int soerr = 0;
|
||||
socklen_t len = (socklen_t) sizeof(soerr);
|
||||
|
||||
if (getsockopt(soc, SOL_SOCKET, SO_ERROR, (char *) &soerr, &len) != 0)
|
||||
return -1;
|
||||
return soerr;
|
||||
}
|
||||
|
||||
/* Retry a stuck/failed connecting slot against its next resolved address.
|
||||
Closes the current socket and starts a non-blocking connect to the next
|
||||
candidate, leaving the slot in STATUS_CONNECTING. Returns 1 if a new connect
|
||||
was started, 0 if no fallback address remains (caller fails the slot). */
|
||||
static int back_connect_next(httrackp *opt, struct_back *sback, int i) {
|
||||
hts_connect_fallback *const cf = &sback->connect_fallback[i];
|
||||
lien_back *const back = sback->lnk;
|
||||
const int next = cf->addr_index + 1;
|
||||
T_SOC soc;
|
||||
|
||||
if (next >= cf->addr_count)
|
||||
return 0;
|
||||
|
||||
if (back[i].r.soc != INVALID_SOCKET) {
|
||||
deletehttp(&back[i].r);
|
||||
back[i].r.soc = INVALID_SOCKET;
|
||||
}
|
||||
soc = newhttp_addr(opt, back[i].url_adr, &back[i].r, -1, 0, next, NULL);
|
||||
if (soc == INVALID_SOCKET)
|
||||
return 0;
|
||||
|
||||
back[i].r.soc = soc;
|
||||
cf->addr_index = next;
|
||||
cf->connect_start = time_local();
|
||||
if (back[i].timeout > 0)
|
||||
back[i].timeout_refresh = cf->connect_start;
|
||||
back[i].status = STATUS_CONNECTING;
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"connect failed, trying next address (%d/%d) for %s", next + 1,
|
||||
cf->addr_count, back[i].url_adr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void back_delete_all(httrackp * opt, cache_back * cache, struct_back * sback) {
|
||||
if (sback != NULL) {
|
||||
int i;
|
||||
@@ -1911,8 +1981,11 @@ int back_add(struct_back * sback, httrackp * opt, cache_back * cache, const char
|
||||
// ouvrir liaison, envoyer requète
|
||||
// ne pas traiter ou recevoir l'en tête immédiatement
|
||||
hts_init_htsblk(&back[p].r);
|
||||
//memset(&(back[p].r), 0, sizeof(htsblk));
|
||||
// memset(&(back[p].r), 0, sizeof(htsblk));
|
||||
back[p].r.location = back[p].location_buffer;
|
||||
// fresh connect: address list not yet probed, start at the first
|
||||
sback->connect_fallback[p].addr_index = 0;
|
||||
sback->connect_fallback[p].addr_count = -1;
|
||||
// recopier proxy
|
||||
if ((back[p].r.req.proxy.active = opt->proxy.active)) {
|
||||
if (StringBuff(opt->proxy.bindhost) != NULL)
|
||||
@@ -2369,21 +2442,25 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
// en cas de gestion du connect préemptif
|
||||
#if HTS_XCONN
|
||||
if (back[i].status == STATUS_CONNECTING) { // connexion
|
||||
do_wait = 1;
|
||||
// a connecting slot always carries a live socket; guard anyway so a
|
||||
// stray INVALID_SOCKET can never reach FD_SET (mirrors the recv branch)
|
||||
if (back[i].r.soc != INVALID_SOCKET) {
|
||||
do_wait = 1;
|
||||
|
||||
// noter socket write
|
||||
FD_SET(back[i].r.soc, &fds_c);
|
||||
// noter socket write
|
||||
FD_SET(back[i].r.soc, &fds_c);
|
||||
|
||||
// noter socket erreur
|
||||
FD_SET(back[i].r.soc, &fds_e);
|
||||
// noter socket erreur
|
||||
FD_SET(back[i].r.soc, &fds_e);
|
||||
|
||||
// calculer max
|
||||
if (max_c) {
|
||||
max_c = 0;
|
||||
nfds = back[i].r.soc;
|
||||
} else if (back[i].r.soc > nfds) {
|
||||
// ID socket la plus élevée
|
||||
nfds = back[i].r.soc;
|
||||
// calculer max
|
||||
if (max_c) {
|
||||
max_c = 0;
|
||||
nfds = back[i].r.soc;
|
||||
} else if (back[i].r.soc > nfds) {
|
||||
// ID socket la plus élevée
|
||||
nfds = back[i].r.soc;
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
@@ -2517,8 +2594,20 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
}
|
||||
// ---- FLAG WRITE MIS A UN?: POUR LE CONNECT
|
||||
if (back[i].status == STATUS_CONNECTING) { // attendre connect
|
||||
hts_connect_fallback *const cf = &sback->connect_fallback[i];
|
||||
int dispo = 0;
|
||||
|
||||
// probe the resolved address list once per fresh connect (cache hit:
|
||||
// the host was resolved when this connect was opened)
|
||||
if (cf->addr_count < 0 && back[i].r.soc != INVALID_SOCKET &&
|
||||
!back[i].r.is_file) {
|
||||
SOCaddr scratch[HTS_MAXADDRNUM];
|
||||
|
||||
cf->addr_count = hts_dns_resolve_all(opt, back[i].url_adr, scratch,
|
||||
HTS_MAXADDRNUM, NULL);
|
||||
cf->connect_start = time_local();
|
||||
}
|
||||
|
||||
// vérifier l'existance de timeout-check
|
||||
if (!gestion_timeout)
|
||||
if (back[i].timeout > 0)
|
||||
@@ -2526,7 +2615,20 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
|
||||
// connecté?
|
||||
dispo = FD_ISSET(back[i].r.soc, &fds_c);
|
||||
if (dispo) { // ok connected!!
|
||||
if (dispo) { // socket ready: connect() finished (ok or failed)
|
||||
// a refused/failed connect is reported writable too; probe SO_ERROR
|
||||
// and, on failure, fall back to the next address (or fail the slot)
|
||||
if (connect_socket_error(back[i].r.soc) != 0) {
|
||||
if (!back_connect_next(opt, sback, i)) {
|
||||
deletehttp(&back[i].r);
|
||||
back[i].r.soc = INVALID_SOCKET;
|
||||
back[i].r.statuscode = STATUSCODE_CONNERROR;
|
||||
strcpybuff(back[i].r.msg, "Connect Error");
|
||||
back[i].status = STATUS_READY;
|
||||
back_set_finished(sback, i);
|
||||
}
|
||||
continue; // reconnected (stay connecting) or failed
|
||||
}
|
||||
busy_state = 1;
|
||||
|
||||
#if HTS_USEOPENSSL
|
||||
@@ -3884,6 +3986,29 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
|
||||
if (back[i].status > 0) { // réception/connexion/..
|
||||
if (back[i].timeout > 0) {
|
||||
// a stuck connect with a fallback address: retry the next one well
|
||||
// before the full timeout (dead IPv6 on a dual-stack host, ...)
|
||||
if (back[i].status == STATUS_CONNECTING) {
|
||||
const hts_connect_fallback *const cf =
|
||||
&sback->connect_fallback[i];
|
||||
|
||||
if (back_connect_fallback_due(cf->addr_index, cf->addr_count,
|
||||
(int) (act - cf->connect_start),
|
||||
back[i].timeout)) {
|
||||
if (back_connect_next(opt, sback, i)) {
|
||||
continue; // reconnected to the next candidate
|
||||
}
|
||||
// fallback was due but no socket could be opened
|
||||
// (back_connect_next closed the dead one): stop now rather than
|
||||
// spin on an invalid fd
|
||||
back[i].r.soc = INVALID_SOCKET;
|
||||
back[i].r.statuscode = STATUSCODE_CONNERROR;
|
||||
strcpybuff(back[i].r.msg, "Connect Error");
|
||||
back[i].status = STATUS_READY;
|
||||
back_set_finished(sback, i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
//printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout);
|
||||
if (((int) (act - back[i].timeout_refresh)) >= back[i].timeout) {
|
||||
hts_log_print(opt, LOG_DEBUG, "connection timed out for %s%s", back[i].url_adr,
|
||||
|
||||
@@ -3703,9 +3703,9 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->maxsoc > 0)
|
||||
to->maxsoc = from->maxsoc;
|
||||
|
||||
/* hts_boolean/enum fields are unsigned (GCC), so a bare `> -1` unset-guard
|
||||
is always false; cast to int to keep the -1 "unset" sentinel test. */
|
||||
if ((int) from->nearlink > -1)
|
||||
/* hts_tristate fields use HTS_DEFAULT (-1) for "unspecified": copy_htsopt
|
||||
skips them so the target keeps its value. */
|
||||
if (from->nearlink > -1)
|
||||
to->nearlink = from->nearlink;
|
||||
|
||||
if (from->timeout > -1)
|
||||
@@ -3732,10 +3732,10 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->hostcontrol > -1)
|
||||
to->hostcontrol = from->hostcontrol;
|
||||
|
||||
if ((int) from->errpage > -1)
|
||||
if (from->errpage > -1)
|
||||
to->errpage = from->errpage;
|
||||
|
||||
if ((int) from->parseall > -1)
|
||||
if (from->parseall > -1)
|
||||
to->parseall = from->parseall;
|
||||
|
||||
// test all: bit 8 de travel
|
||||
|
||||
@@ -152,6 +152,15 @@ struct lien_adrfilsave {
|
||||
char save[HTS_URLMAXSIZE * 2]; /**< local save path (with directory) */
|
||||
};
|
||||
|
||||
/** Per-slot connect-fallback bookkeeping (parallel to struct_back.lnk).
|
||||
Tracks which resolved address the slot is currently connecting to so a
|
||||
stuck connect can be retried against the next one. */
|
||||
typedef struct hts_connect_fallback {
|
||||
int addr_index; /**< candidate being connected (0-based) */
|
||||
int addr_count; /**< resolved addresses; -1 = not yet probed */
|
||||
TStamp connect_start; /**< when the current candidate's connect began */
|
||||
} hts_connect_fallback;
|
||||
|
||||
/** The download-slot ring: the set of concurrent transfers in flight.
|
||||
Allocated/owned by the engine; consumers (status callbacks, the loop)
|
||||
read it but do not resize or free it. */
|
||||
@@ -168,6 +177,7 @@ struct struct_back {
|
||||
int count; /**< number of usable slots (back_max) */
|
||||
coucal ready; /**< index of slots whose transfer completed */
|
||||
LLint ready_size_bytes; /**< total bytes buffered in completed slots */
|
||||
hts_connect_fallback *connect_fallback; /**< per-slot, count+1 entries */
|
||||
};
|
||||
|
||||
typedef struct cache_back_zip_entry cache_back_zip_entry;
|
||||
@@ -372,6 +382,13 @@ void check_rate(TStamp stat_timestart, int maxrate);
|
||||
/* Backing (download-slot) scheduler. Operate on the back[] ring (struct_back).
|
||||
Not thread-safe; call from the single crawl loop. */
|
||||
|
||||
/* True if a connecting slot should give up on the current address and try the
|
||||
next one: a fallback address remains (addr_index+1 < addr_count) and the
|
||||
candidate has been connecting for at least its deadline, min(timeout, an
|
||||
internal cap). elapsed/timeout in seconds. Exposed for the -#D self-test. */
|
||||
int back_connect_fallback_due(int addr_index, int addr_count, int elapsed,
|
||||
int timeout);
|
||||
|
||||
/* How many new sockets may be opened now, honoring maxsoc and the maxconn rate
|
||||
limit (>=0). _strict ignores reserved-slot headroom; the plain form leaves
|
||||
room for naming tests and stops at 0 when the stack is nearly full. */
|
||||
|
||||
@@ -47,6 +47,7 @@ Please visit our Website: http://www.httrack.com
|
||||
#include "htscharset.h"
|
||||
#include "htsencoding.h"
|
||||
#include "htscache_selftest.h"
|
||||
#include "htsdns_selftest.h"
|
||||
#include "htsmd5.h"
|
||||
|
||||
#include <ctype.h>
|
||||
@@ -2460,6 +2461,13 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'D': { // DNS resolver/cache self-test (mock getaddrinfo)
|
||||
const int err = dns_selftests(opt);
|
||||
|
||||
printf("dns-selftest: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
} break;
|
||||
case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
|
||||
{
|
||||
int hasFilter = 0;
|
||||
@@ -3166,6 +3174,16 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (to->parseall != HTS_FALSE)
|
||||
err = 1;
|
||||
|
||||
/* HTS_DEFAULT (-1) is "unspecified": copy_htsopt must skip it,
|
||||
leaving the target intact. Only a signed (int-backed) field
|
||||
can hold -1, so this also guards the type against regressing
|
||||
to an unsigned hts_boolean. */
|
||||
from->parseall = HTS_DEFAULT;
|
||||
to->parseall = HTS_TRUE;
|
||||
copy_htsopt(from, to);
|
||||
if (to->parseall != HTS_TRUE)
|
||||
err = 1;
|
||||
|
||||
hts_free_opt(from);
|
||||
hts_free_opt(to);
|
||||
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
|
||||
|
||||
359
src/htsdns_selftest.c
Normal file
359
src/htsdns_selftest.c
Normal file
@@ -0,0 +1,359 @@
|
||||
/* ------------------------------------------------------------ */
|
||||
/*
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 2026 Xavier Roche and other contributors
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Ethical use: we kindly ask that you NOT use this software to harvest email
|
||||
addresses or to collect any other private information about people. Doing so
|
||||
would dishonor our work and waste the many hours we have spent on it.
|
||||
|
||||
Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* File: htsdns_selftest.c subroutines: */
|
||||
/* in-process self-test for the DNS resolver and cache */
|
||||
/* Author: Xavier Roche */
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
/* Routes the resolver through a scripted getaddrinfo (hts_resolver_backend)
|
||||
instead of the network, so resolution and the DNS cache are testable for a
|
||||
fixed set of scenarios (IPv4/IPv6/dual-stack, errors, family filter,
|
||||
cache reuse) with no live DNS. */
|
||||
|
||||
#define HTS_INTERNAL_BYTECODE
|
||||
|
||||
#include "htsdns_selftest.h"
|
||||
|
||||
#include "htscore.h"
|
||||
#include "htslib.h"
|
||||
#include "htsnet.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if HTS_INET6 != 0
|
||||
|
||||
/* IPV6_resolver: 0 = v4+v6, 1 = v4 only, 2 = v6 only (htscoremain -@i). */
|
||||
extern int IPV6_resolver;
|
||||
|
||||
/* One scripted host: either a getaddrinfo error, or an ordered address list. */
|
||||
typedef struct mock_addr {
|
||||
int family; /* AF_INET / AF_INET6 */
|
||||
unsigned char addr[16]; /* 4 (v4) or 16 (v6) meaningful bytes */
|
||||
} mock_addr;
|
||||
|
||||
typedef struct mock_host {
|
||||
const char *name;
|
||||
int gai_err; /* non-zero: getaddrinfo returns this */
|
||||
int naddr;
|
||||
mock_addr addr[6];
|
||||
int calls; /* times the backend resolved this host */
|
||||
} mock_host;
|
||||
|
||||
static mock_host mock_hosts[] = {
|
||||
{"v4only.test", 0, 1, {{AF_INET, {1, 2, 3, 4}}}, 0},
|
||||
{"v6only.test", 0, 1, {{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 1}}}, 0},
|
||||
/* dual stack, IPv6 first (RFC 6724 order) then IPv4 */
|
||||
{"dual.test",
|
||||
0,
|
||||
2,
|
||||
{{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 2}}, {AF_INET, {5, 6, 7, 8}}},
|
||||
0},
|
||||
/* dual stack, IPv4 first: distinguishes "keep the first address" from
|
||||
"prefer a family", so the selection contract is actually pinned. */
|
||||
{"dual4.test",
|
||||
0,
|
||||
2,
|
||||
{{AF_INET, {9, 10, 11, 12}},
|
||||
{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 3}}},
|
||||
0},
|
||||
/* more addresses than HTS_MAXADDRNUM: the list must clamp to the cap. */
|
||||
{"many.test",
|
||||
0,
|
||||
6,
|
||||
{{AF_INET, {10, 0, 0, 1}},
|
||||
{AF_INET, {10, 0, 0, 2}},
|
||||
{AF_INET, {10, 0, 0, 3}},
|
||||
{AF_INET, {10, 0, 0, 4}},
|
||||
{AF_INET, {10, 0, 0, 5}},
|
||||
{AF_INET, {10, 0, 0, 6}}},
|
||||
0},
|
||||
{"nodns.test", EAI_NONAME, 0, {{0}}, 0},
|
||||
};
|
||||
|
||||
static mock_host *mock_find(const char *name) {
|
||||
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++) {
|
||||
if (strcmp(mock_hosts[i].name, name) == 0)
|
||||
return &mock_hosts[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mock_reset_calls(void) {
|
||||
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++)
|
||||
mock_hosts[i].calls = 0;
|
||||
}
|
||||
|
||||
/* Build one addrinfo node owning its sockaddr (freed by mock_freeaddrinfo). */
|
||||
static struct addrinfo *mock_mkai(const mock_addr *a) {
|
||||
struct addrinfo *ai = calloct(1, sizeof(*ai));
|
||||
|
||||
ai->ai_family = a->family;
|
||||
if (a->family == AF_INET) {
|
||||
struct sockaddr_in *sin = calloct(1, sizeof(*sin));
|
||||
|
||||
sin->sin_family = AF_INET;
|
||||
memcpy(&sin->sin_addr, a->addr, 4);
|
||||
ai->ai_addr = (struct sockaddr *) sin;
|
||||
ai->ai_addrlen = sizeof(*sin);
|
||||
} else {
|
||||
struct sockaddr_in6 *sin6 = calloct(1, sizeof(*sin6));
|
||||
|
||||
sin6->sin6_family = AF_INET6;
|
||||
memcpy(&sin6->sin6_addr, a->addr, 16);
|
||||
ai->ai_addr = (struct sockaddr *) sin6;
|
||||
ai->ai_addrlen = sizeof(*sin6);
|
||||
}
|
||||
return ai;
|
||||
}
|
||||
|
||||
static int mock_getaddrinfo(const char *node, const char *service,
|
||||
const struct addrinfo *hints,
|
||||
struct addrinfo **res) {
|
||||
mock_host *const h = mock_find(node);
|
||||
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
|
||||
(void) service;
|
||||
*res = NULL;
|
||||
if (h == NULL)
|
||||
return EAI_NONAME;
|
||||
h->calls++; /* a real backend hit; a cached host skips this */
|
||||
if (h->gai_err != 0)
|
||||
return h->gai_err;
|
||||
for (int i = 0; i < h->naddr; i++) {
|
||||
if (want != PF_UNSPEC && want != h->addr[i].family)
|
||||
continue; /* honor the requested family (v4/v6 only) */
|
||||
struct addrinfo *const ai = mock_mkai(&h->addr[i]);
|
||||
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
if (head == NULL)
|
||||
return EAI_NONAME; /* filtered to empty, as the libc resolver does */
|
||||
*res = head;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mock_freeaddrinfo(struct addrinfo *res) {
|
||||
while (res != NULL) {
|
||||
struct addrinfo *const next = res->ai_next;
|
||||
|
||||
freet(res->ai_addr);
|
||||
freet(res);
|
||||
res = next;
|
||||
}
|
||||
}
|
||||
|
||||
static const hts_resolver_backend mock_backend = {mock_getaddrinfo,
|
||||
mock_freeaddrinfo};
|
||||
|
||||
static int failures = 0;
|
||||
|
||||
#define CHECK(cond) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
failures++; \
|
||||
fprintf(stderr, "dns-selftest: FAIL at %s:%d: %s\n", __FILE__, __LINE__, \
|
||||
#cond); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Resolve via the uncached entry point; return the address family, or
|
||||
AF_UNSPEC if the host did not resolve. */
|
||||
static int resolve_family_nocache(const char *host) {
|
||||
SOCaddr addr;
|
||||
const char *err = NULL;
|
||||
|
||||
if (hts_dns_resolve_nocache2(host, &addr, &err) == NULL)
|
||||
return AF_UNSPEC;
|
||||
return SOCaddr_sinfamily(addr);
|
||||
}
|
||||
|
||||
int dns_selftests(httrackp *opt) {
|
||||
failures = 0;
|
||||
hts_dns_set_resolver_backend(&mock_backend);
|
||||
|
||||
/* IPv4-only / IPv6-only hosts map to the right family. */
|
||||
IPV6_resolver = 0;
|
||||
CHECK(resolve_family_nocache("v4only.test") == AF_INET);
|
||||
CHECK(resolve_family_nocache("v6only.test") == AF_INET6);
|
||||
|
||||
/* Dual-stack: the single-address API returns the *first* resolved address.
|
||||
Both orderings pin selection by position, not a family preference. The
|
||||
multi-address API (resolve_all, below) exposes the whole list. */
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET6); /* v6 listed first */
|
||||
CHECK(resolve_family_nocache("dual4.test") == AF_INET); /* v4 listed first */
|
||||
|
||||
/* Unknown host does not resolve. */
|
||||
CHECK(resolve_family_nocache("nodns.test") == AF_UNSPEC);
|
||||
|
||||
/* Family filter (-@i4 / -@i6) selects v4 / v6 out of the dual-stack host. */
|
||||
IPV6_resolver = 1;
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET);
|
||||
IPV6_resolver = 2;
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET6);
|
||||
IPV6_resolver = 0;
|
||||
|
||||
/* Cached driver resolves a host once and reuses the *same* address. */
|
||||
mock_reset_calls();
|
||||
{
|
||||
SOCaddr a1, a2;
|
||||
char ip1[64], ip2[64];
|
||||
const char *err = NULL;
|
||||
|
||||
CHECK(hts_dns_resolve2(opt, "v4only.test", &a1, &err) != NULL);
|
||||
CHECK(hts_dns_resolve2(opt, "v4only.test", &a2, &err) != NULL);
|
||||
CHECK(mock_find("v4only.test")->calls == 1);
|
||||
/* the cache returns the right address, not merely a hit for the key */
|
||||
SOCaddr_inetntoa(ip1, sizeof(ip1), a1);
|
||||
SOCaddr_inetntoa(ip2, sizeof(ip2), a2);
|
||||
CHECK(strcmp(ip1, "1.2.3.4") == 0);
|
||||
CHECK(strcmp(ip1, ip2) == 0);
|
||||
}
|
||||
|
||||
/* A negative result is cached too: a second lookup does not re-resolve. */
|
||||
{
|
||||
SOCaddr a1, a2;
|
||||
const char *err = NULL;
|
||||
|
||||
CHECK(hts_dns_resolve2(opt, "nodns.test", &a1, &err) == NULL);
|
||||
CHECK(hts_dns_resolve2(opt, "nodns.test", &a2, &err) == NULL);
|
||||
CHECK(mock_find("nodns.test")->calls == 1); /* resolved once, then cached */
|
||||
}
|
||||
|
||||
/* Multi-address resolution: count and order are the connect-fallback
|
||||
contract. A dead first address is retried against the next, so both must be
|
||||
exact. */
|
||||
mock_reset_calls();
|
||||
{
|
||||
SOCaddr addrs[HTS_MAXADDRNUM];
|
||||
char ip[64];
|
||||
const char *err = NULL;
|
||||
|
||||
/* dual-stack, in resolver order: [0]=v6, [1]=v4 */
|
||||
CHECK(hts_dns_resolve_all(opt, "dual.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
2);
|
||||
CHECK(SOCaddr_sinfamily(addrs[0]) == AF_INET6);
|
||||
CHECK(SOCaddr_sinfamily(addrs[1]) == AF_INET);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[1]);
|
||||
CHECK(strcmp(ip, "5.6.7.8") == 0);
|
||||
CHECK(mock_find("dual.test")->calls ==
|
||||
1); /* one backend hit for the list */
|
||||
|
||||
/* single-address host: count 1 */
|
||||
CHECK(hts_dns_resolve_all(opt, "v4only.test", addrs, HTS_MAXADDRNUM,
|
||||
&err) == 1);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[0]);
|
||||
CHECK(strcmp(ip, "1.2.3.4") == 0);
|
||||
|
||||
/* does-not-resolve: count 0 (negative), no addresses */
|
||||
CHECK(hts_dns_resolve_all(opt, "nodns.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
0);
|
||||
|
||||
/* more than the cap: the kept list is clamped to HTS_MAXADDRNUM, keeping
|
||||
the FIRST addresses in resolver order (not some other window) */
|
||||
CHECK(hts_dns_resolve_all(opt, "many.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
HTS_MAXADDRNUM);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[0]);
|
||||
CHECK(strcmp(ip, "10.0.0.1") == 0);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[HTS_MAXADDRNUM - 1]);
|
||||
CHECK(strcmp(ip, "10.0.0.4") == 0);
|
||||
|
||||
/* family filter still applies through the list path */
|
||||
IPV6_resolver = 1;
|
||||
CHECK(hts_dns_resolve_all(opt, "dual4.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
1);
|
||||
CHECK(SOCaddr_sinfamily(addrs[0]) == AF_INET);
|
||||
IPV6_resolver = 0;
|
||||
}
|
||||
|
||||
/* newhttp_addr() must connect to the addr_index-th address, not always the
|
||||
first: this is what back_connect_next relies on to reach the fallback. */
|
||||
{
|
||||
htsblk r;
|
||||
int count = -1;
|
||||
T_SOC s;
|
||||
|
||||
hts_init_htsblk(&r);
|
||||
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 0, &count);
|
||||
CHECK(count == 2);
|
||||
CHECK(SOCaddr_sinfamily(r.address) == AF_INET6); /* index 0 = v6 */
|
||||
if (s != INVALID_SOCKET)
|
||||
deletesoc(s);
|
||||
|
||||
hts_init_htsblk(&r);
|
||||
count = -1;
|
||||
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 1, &count);
|
||||
CHECK(count == 2);
|
||||
CHECK(SOCaddr_sinfamily(r.address) == AF_INET); /* index 1 = v4 */
|
||||
if (s != INVALID_SOCKET)
|
||||
deletesoc(s);
|
||||
|
||||
/* out-of-range index: no address selected (address stays unset) */
|
||||
hts_init_htsblk(&r);
|
||||
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 2, NULL);
|
||||
CHECK(s == INVALID_SOCKET);
|
||||
if (s != INVALID_SOCKET)
|
||||
deletesoc(s);
|
||||
}
|
||||
|
||||
/* Connect-fallback decision (consumer of the multi-address list): when a
|
||||
stuck connect should abandon the current address for the next one. */
|
||||
{
|
||||
/* no fallback for the last/only candidate, whatever the elapsed time */
|
||||
CHECK(back_connect_fallback_due(0, 1, 9999, 120) == 0);
|
||||
CHECK(back_connect_fallback_due(1, 2, 9999, 120) == 0);
|
||||
CHECK(back_connect_fallback_due(3, 4, 9999, 120) == 0);
|
||||
/* fallback available: wait the per-candidate deadline (cap 10s here) */
|
||||
CHECK(back_connect_fallback_due(0, 2, 9, 120) == 0);
|
||||
CHECK(back_connect_fallback_due(0, 2, 10, 120) == 1);
|
||||
CHECK(back_connect_fallback_due(2, 4, 10, 120) == 1);
|
||||
/* a shorter slot timeout shortens the deadline (min(timeout, cap)) */
|
||||
CHECK(back_connect_fallback_due(0, 2, 4, 5) == 0);
|
||||
CHECK(back_connect_fallback_due(0, 2, 5, 5) == 1);
|
||||
/* no timeout management: never force a fallback */
|
||||
CHECK(back_connect_fallback_due(0, 2, 9999, 0) == 0);
|
||||
}
|
||||
|
||||
hts_dns_set_resolver_backend(NULL);
|
||||
return failures;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int dns_selftests(httrackp *opt) {
|
||||
(void) opt;
|
||||
return 0; /* resolver seam only exists in the IPv6 build */
|
||||
}
|
||||
|
||||
#endif
|
||||
51
src/htsdns_selftest.h
Normal file
51
src/htsdns_selftest.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/* ------------------------------------------------------------ */
|
||||
/*
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 2026 Xavier Roche and other contributors
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Ethical use: we kindly ask that you NOT use this software to harvest email
|
||||
addresses or to collect any other private information about people. Doing so
|
||||
would dishonor our work and waste the many hours we have spent on it.
|
||||
|
||||
Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* File: htsdns_selftest.h */
|
||||
/* Author: Xavier Roche */
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
#ifndef HTSDNS_SELFTEST_DEFH
|
||||
#define HTSDNS_SELFTEST_DEFH
|
||||
|
||||
#ifdef HTS_INTERNAL_BYTECODE
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
||||
#define HTS_DEF_FWSTRUCT_httrackp
|
||||
typedef struct httrackp httrackp;
|
||||
#endif
|
||||
|
||||
/* Drive the DNS resolver and cache through a scripted (mock) getaddrinfo,
|
||||
asserting address family, single-address selection, negative caching, the
|
||||
IPv4/IPv6 family filter, and that a cached host is resolved only once.
|
||||
Returns the number of failed checks (0 == success). */
|
||||
int dns_selftests(httrackp *opt);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -43,8 +43,8 @@ Please visit our Website: http://www.httrack.com
|
||||
configure.ac, decoupled from these). VERSION is the display form, VERSIONID
|
||||
the dotted numeric form, AFF_VERSION the short form shown in footers,
|
||||
LIB_VERSION the data/cache format generation. */
|
||||
#define HTTRACK_VERSION "3.49-8"
|
||||
#define HTTRACK_VERSIONID "3.49.8"
|
||||
#define HTTRACK_VERSION "3.49-9"
|
||||
#define HTTRACK_VERSIONID "3.49.9"
|
||||
#define HTTRACK_AFF_VERSION "3.x"
|
||||
#define HTTRACK_LIB_VERSION "2.0"
|
||||
|
||||
@@ -247,13 +247,23 @@ Please visit our Website: http://www.httrack.com
|
||||
#define HTS_NOPARAM "(none)"
|
||||
#define HTS_NOPARAM2 "\"(none)\""
|
||||
|
||||
/* Boolean flag for option fields and API yes/no returns. An enum (not C bool)
|
||||
so it stays int-sized: option fields keep the httrackp layout/ABI, and a
|
||||
return type stays compatible with the int it replaces. */
|
||||
/* Boolean flag for option fields and API yes/no returns. Int-backed, not an
|
||||
enum: an enum makes C++ reject `field = 1` / `f(0)` on the exported fields
|
||||
and params. Int-sized, so the httrackp layout and the ABI are unchanged. */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
#define HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
|
||||
typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
typedef int hts_boolean;
|
||||
#define HTS_FALSE 0
|
||||
#define HTS_TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
#define HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
/* Tri-state hts_boolean: HTS_DEFAULT (-1) = "unspecified" (copy_htsopt leaves
|
||||
the target untouched); HTS_FALSE/HTS_TRUE = off/on. */
|
||||
typedef int hts_tristate;
|
||||
#define HTS_DEFAULT (-1)
|
||||
#endif
|
||||
|
||||
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||
@@ -398,6 +408,10 @@ typedef int T_SOC;
|
||||
/* Buffer size for a printed network address (IPv4 or IPv6, NUL included). */
|
||||
#define HTS_MAXADDRLEN 64
|
||||
|
||||
/* Max resolved addresses kept per host for connect fallback (dead IPv6 etc.).
|
||||
*/
|
||||
#define HTS_MAXADDRNUM 4
|
||||
|
||||
#ifdef _WIN32
|
||||
#else
|
||||
#define __cdecl
|
||||
|
||||
423
src/htslib.c
423
src/htslib.c
@@ -2297,14 +2297,27 @@ htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) {
|
||||
// peut ouvrir avec des connect() non bloquants: waitconnect=0/1
|
||||
T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
||||
int waitconnect) {
|
||||
return newhttp_addr(opt, _iadr, retour, port, waitconnect, 0, NULL);
|
||||
}
|
||||
|
||||
T_SOC newhttp_addr(httrackp *opt, const char *_iadr, htsblk *retour, int port,
|
||||
int waitconnect, int addr_index, int *addr_count) {
|
||||
T_SOC soc; // descipteur de la socket
|
||||
|
||||
if (addr_count != NULL) {
|
||||
*addr_count = 0;
|
||||
}
|
||||
|
||||
if (strcmp(_iadr, "file://") != 0) { /* non fichier */
|
||||
SOCaddr server;
|
||||
SOCaddr addrs[HTS_MAXADDRNUM];
|
||||
int naddr;
|
||||
const char *error = "unknown error";
|
||||
|
||||
// tester un éventuel id:pass et virer id:pass@ si détecté
|
||||
const char *const iadr = jump_identification_const(_iadr);
|
||||
const char *resolve_host = iadr;
|
||||
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
|
||||
|
||||
SOCaddr_clear(server);
|
||||
|
||||
@@ -2326,7 +2339,6 @@ T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
||||
#endif
|
||||
|
||||
if (a != NULL) {
|
||||
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
|
||||
int i = -1;
|
||||
|
||||
iadr2[0] = '\0';
|
||||
@@ -2337,18 +2349,19 @@ T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
||||
|
||||
// adresse véritable (sans :xx)
|
||||
strncatbuff(iadr2, iadr, (int) (a - iadr));
|
||||
|
||||
// adresse sans le :xx
|
||||
hts_dns_resolve2(opt, iadr2, &server, &error);
|
||||
|
||||
} else {
|
||||
|
||||
// adresse normale (port par défaut par la suite)
|
||||
hts_dns_resolve2(opt, iadr, &server, &error);
|
||||
resolve_host = iadr2;
|
||||
}
|
||||
}
|
||||
|
||||
} else { // port défini
|
||||
hts_dns_resolve2(opt, iadr, &server, &error);
|
||||
// resolve the full address list and pick the requested candidate; the
|
||||
// scheduler retries the next index when a connect fails (dead IPv6 etc.)
|
||||
naddr =
|
||||
hts_dns_resolve_all(opt, resolve_host, addrs, HTS_MAXADDRNUM, &error);
|
||||
if (addr_count != NULL) {
|
||||
*addr_count = naddr;
|
||||
}
|
||||
if (addr_index >= 0 && addr_index < naddr) {
|
||||
SOCaddr_copy_SOCaddr(server, addrs[addr_index]);
|
||||
}
|
||||
|
||||
if (!SOCaddr_is_valid(server)) {
|
||||
@@ -4784,14 +4797,14 @@ void hts_cache_free(t_dnscache *const root) {
|
||||
// -1: status? 0: libérer 1:locker
|
||||
|
||||
// MUST BE LOCKED
|
||||
// routine pour le cache - retour optionnel à donner à chaque fois
|
||||
// NULL: nom non encore testé dans le cache
|
||||
// si h_length==0 alors le nom n'existe pas dans le dns
|
||||
static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCaddr *const addr) {
|
||||
assertf(addr != NULL);
|
||||
// Look up iadr in the DNS cache, filling out[0..min(count,max)-1].
|
||||
// Returns: -1 not yet tested; 0 negative-cached (not in DNS); >0 address count.
|
||||
static int hts_ghbn_all(const t_dnscache *cache, const char *const iadr,
|
||||
SOCaddr *const out, const int max) {
|
||||
assertf(out != NULL);
|
||||
assertf(iadr != NULL);
|
||||
if (*iadr == '\0') {
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
/* first entry is empty */
|
||||
if (cache->iadr == NULL) {
|
||||
@@ -4802,95 +4815,263 @@ static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCadd
|
||||
assertf(cache->iadr != NULL);
|
||||
assertf(cache->iadr == (const char*) cache + sizeof(t_dnscache));
|
||||
if (strcmp(cache->iadr, iadr) == 0) { // ok trouvé
|
||||
if (cache->host_length != 0) { // entrée valide
|
||||
assertf(cache->host_length <= sizeof(cache->host_addr));
|
||||
SOCaddr_copyaddr2(*addr, cache->host_addr, cache->host_length);
|
||||
return addr;
|
||||
} else { // erreur dans le dns, déja vérifié
|
||||
SOCaddr_clear(*addr);
|
||||
return addr;
|
||||
int i;
|
||||
|
||||
assertf(cache->host_count <= HTS_MAXADDRNUM);
|
||||
for (i = 0; i < cache->host_count && i < max; i++) {
|
||||
assertf(cache->host_length[i] <= sizeof(cache->host_addr[i]));
|
||||
SOCaddr_copyaddr2(out[i], cache->host_addr[i], cache->host_length[i]);
|
||||
}
|
||||
return cache->host_count;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
|
||||
SOCaddr *const addr,
|
||||
const char **error) {
|
||||
#if HTS_INET6 != 0
|
||||
/* Active resolver backend; defaults to the libc resolver. The self-test
|
||||
reroutes it to script DNS answers in-process (see
|
||||
hts_dns_set_resolver_backend). */
|
||||
static const hts_resolver_backend hts_resolver_libc = {getaddrinfo,
|
||||
freeaddrinfo};
|
||||
static const hts_resolver_backend *hts_resolver = &hts_resolver_libc;
|
||||
|
||||
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend) {
|
||||
hts_resolver = (backend != NULL) ? backend : &hts_resolver_libc;
|
||||
}
|
||||
|
||||
/* Debug/test hook: HTTRACK_DEBUG_RESOLVE="host:ip[,ip...]" pins the resolution
|
||||
of `host` to the listed addresses (curl --resolve style), so the connect
|
||||
fallback can be exercised deterministically (a dead address first, a live one
|
||||
next). Any other host resolves normally. Below: an addrinfo backend that owns
|
||||
its chain (its own freeaddrinfo), so a synthesized and a delegated result
|
||||
free the same way. */
|
||||
|
||||
/* Deep-copy a libc addrinfo chain into our own allocations. */
|
||||
static struct addrinfo *resolver_dup_chain(const struct addrinfo *src) {
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
|
||||
for (; src != NULL; src = src->ai_next) {
|
||||
struct addrinfo *const ai = calloct(1, sizeof(*ai));
|
||||
|
||||
ai->ai_family = src->ai_family;
|
||||
ai->ai_socktype = src->ai_socktype;
|
||||
ai->ai_protocol = src->ai_protocol;
|
||||
ai->ai_addrlen = src->ai_addrlen;
|
||||
ai->ai_addr = malloct(src->ai_addrlen);
|
||||
memcpy(ai->ai_addr, src->ai_addr, src->ai_addrlen);
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
/* Build one addrinfo node from an IPv4/IPv6 literal, or NULL if it does not
|
||||
parse or is filtered out by want_family (AF_INET/AF_INET6/PF_UNSPEC). */
|
||||
static struct addrinfo *resolver_make_ai(const char *ip, int want_family) {
|
||||
struct addrinfo *ai;
|
||||
|
||||
if (strchr(ip, ':') != NULL) { // IPv6 literal
|
||||
struct sockaddr_in6 sa6;
|
||||
|
||||
if (want_family != PF_UNSPEC && want_family != AF_INET6)
|
||||
return NULL;
|
||||
memset(&sa6, 0, sizeof(sa6));
|
||||
if (inet_pton(AF_INET6, ip, &sa6.sin6_addr) != 1)
|
||||
return NULL;
|
||||
sa6.sin6_family = AF_INET6;
|
||||
ai = calloct(1, sizeof(*ai));
|
||||
ai->ai_family = AF_INET6;
|
||||
ai->ai_addrlen = sizeof(sa6);
|
||||
ai->ai_addr = malloct(sizeof(sa6));
|
||||
memcpy(ai->ai_addr, &sa6, sizeof(sa6));
|
||||
} else { // IPv4 literal
|
||||
struct sockaddr_in sa;
|
||||
|
||||
if (want_family != PF_UNSPEC && want_family != AF_INET)
|
||||
return NULL;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
if (inet_pton(AF_INET, ip, &sa.sin_addr) != 1)
|
||||
return NULL;
|
||||
sa.sin_family = AF_INET;
|
||||
ai = calloct(1, sizeof(*ai));
|
||||
ai->ai_family = AF_INET;
|
||||
ai->ai_addrlen = sizeof(sa);
|
||||
ai->ai_addr = malloct(sizeof(sa));
|
||||
memcpy(ai->ai_addr, &sa, sizeof(sa));
|
||||
}
|
||||
return ai;
|
||||
}
|
||||
|
||||
static void override_freeaddrinfo(struct addrinfo *res) {
|
||||
while (res != NULL) {
|
||||
struct addrinfo *const next = res->ai_next;
|
||||
|
||||
freet(res->ai_addr);
|
||||
freet(res);
|
||||
res = next;
|
||||
}
|
||||
}
|
||||
|
||||
static int override_getaddrinfo(const char *node, const char *service,
|
||||
const struct addrinfo *hints,
|
||||
struct addrinfo **res) {
|
||||
const char *const spec = getenv("HTTRACK_DEBUG_RESOLVE");
|
||||
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
|
||||
const char *colon;
|
||||
|
||||
*res = NULL;
|
||||
if (spec != NULL && node != NULL && (colon = strchr(spec, ':')) != NULL &&
|
||||
(size_t) (colon - spec) == strlen(node) &&
|
||||
strncmp(spec, node, colon - spec) == 0) {
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
char buf[256];
|
||||
char *p;
|
||||
|
||||
buf[0] = '\0';
|
||||
strncatbuff(buf, colon + 1, sizeof(buf) - 1);
|
||||
for (p = strtok(buf, ","); p != NULL; p = strtok(NULL, ",")) {
|
||||
struct addrinfo *const ai = resolver_make_ai(p, want);
|
||||
|
||||
if (ai != NULL) {
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
}
|
||||
if (head == NULL)
|
||||
return EAI_NONAME;
|
||||
*res = head;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* not overridden: delegate to libc, copying into our owned format */
|
||||
{
|
||||
#if HTS_INET6==0
|
||||
/* IPv4 resolver */
|
||||
struct hostent *const hp = gethostbyname(hostname);
|
||||
struct addrinfo *sys = NULL;
|
||||
int gerr = getaddrinfo(node, service, hints, &sys);
|
||||
|
||||
if (hp != NULL) {
|
||||
SOCaddr_copyaddr2(addr, hp->h_addr_list[0], hp->h_length);
|
||||
return SOCaddr_is_valid(addr) ? &addr : NULL;
|
||||
} else {
|
||||
SOCaddr_clear(*addr);
|
||||
}
|
||||
#else
|
||||
/* IPv6 resolver */
|
||||
struct addrinfo *res = NULL;
|
||||
struct addrinfo hints;
|
||||
int gerr;
|
||||
|
||||
SOCaddr_clear(*addr);
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
|
||||
hints.ai_family = PF_INET;
|
||||
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
|
||||
hints.ai_family = PF_INET6;
|
||||
else // V4 + V6
|
||||
hints.ai_family = PF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_protocol = IPPROTO_TCP;
|
||||
if ( ( gerr = getaddrinfo(hostname, NULL, &hints, &res) ) == 0) {
|
||||
if (res != NULL) {
|
||||
if (res->ai_addr != NULL && res->ai_addrlen != 0) {
|
||||
SOCaddr_copyaddr2(*addr, res->ai_addr, res->ai_addrlen);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (error != NULL) {
|
||||
*error = gai_strerror(gerr);
|
||||
}
|
||||
}
|
||||
if (res) {
|
||||
freeaddrinfo(res);
|
||||
}
|
||||
#endif
|
||||
if (gerr != 0)
|
||||
return gerr;
|
||||
*res = resolver_dup_chain(sys);
|
||||
freeaddrinfo(sys);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
||||
}
|
||||
|
||||
HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
|
||||
SOCaddr *const addr, const char **error) {
|
||||
/* Protection */
|
||||
if (!strnotempty(hostname)) {
|
||||
return NULL;
|
||||
}
|
||||
static const hts_resolver_backend hts_resolver_override = {
|
||||
override_getaddrinfo, override_freeaddrinfo};
|
||||
|
||||
/*
|
||||
Strip [] if any : [3ffe:b80:1234:1::1]
|
||||
The resolver doesn't seem to handle IP6 addresses in brackets
|
||||
*/
|
||||
/* Install the env override once, unless a backend was already set (self-test).
|
||||
*/
|
||||
static void hts_resolver_check_env(void) {
|
||||
static int checked = 0;
|
||||
|
||||
if (!checked) {
|
||||
checked = 1;
|
||||
if (hts_resolver == &hts_resolver_libc &&
|
||||
getenv("HTTRACK_DEBUG_RESOLVE") != NULL) {
|
||||
hts_resolver = &hts_resolver_override;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Resolve hostname into up to max addresses (resolver/RFC 6724 order), no
|
||||
// cache. Returns the count copied into out[0..count-1]; 0 = does not resolve.
|
||||
static int hts_dns_resolve_nocache_list_(const char *const hostname,
|
||||
SOCaddr *const out, const int max,
|
||||
const char **error) {
|
||||
int count = 0;
|
||||
|
||||
#if HTS_INET6==0
|
||||
/* IPv4 resolver */
|
||||
struct hostent *const hp = gethostbyname(hostname);
|
||||
|
||||
if (hp != NULL) {
|
||||
char **h;
|
||||
|
||||
for (h = hp->h_addr_list; count < max && h != NULL && *h != NULL; h++) {
|
||||
SOCaddr_clear(out[count]);
|
||||
SOCaddr_copyaddr2(out[count], *h, hp->h_length);
|
||||
if (SOCaddr_is_valid(out[count]))
|
||||
count++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* IPv6 resolver */
|
||||
struct addrinfo *res = NULL, *cur;
|
||||
struct addrinfo hints;
|
||||
int gerr;
|
||||
|
||||
hts_resolver_check_env();
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
|
||||
hints.ai_family = PF_INET;
|
||||
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
|
||||
hints.ai_family = PF_INET6;
|
||||
else // V4 + V6
|
||||
hints.ai_family = PF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_protocol = IPPROTO_TCP;
|
||||
if ((gerr = hts_resolver->getaddrinfo(hostname, NULL, &hints, &res)) == 0) {
|
||||
for (cur = res; cur != NULL && count < max; cur = cur->ai_next) {
|
||||
if (cur->ai_addr != NULL && cur->ai_addrlen != 0) {
|
||||
SOCaddr_clear(out[count]);
|
||||
SOCaddr_copyaddr2(out[count], cur->ai_addr, cur->ai_addrlen);
|
||||
if (SOCaddr_is_valid(out[count]))
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else if (error != NULL) {
|
||||
*error = gai_strerror(gerr);
|
||||
}
|
||||
if (res) {
|
||||
hts_resolver->freeaddrinfo(res);
|
||||
}
|
||||
#endif
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
// Strip [] around a literal IPv6 ([3ffe:b80:1234:1::1]) the resolver won't
|
||||
// take, then resolve into a list. Returns the count.
|
||||
static int hts_dns_resolve_nocache_list(const char *const hostname,
|
||||
SOCaddr *const out, const int max,
|
||||
const char **error) {
|
||||
if (!strnotempty(hostname) || max <= 0) {
|
||||
return 0;
|
||||
}
|
||||
if ((hostname[0] == '[') && (hostname[strlen(hostname) - 1] == ']')) {
|
||||
SOCaddr *ret;
|
||||
size_t size = strlen(hostname);
|
||||
char *copy = malloct(size + 1);
|
||||
int count;
|
||||
|
||||
assertf(copy != NULL);
|
||||
copy[0] = '\0';
|
||||
strncat(copy, hostname + 1, size - 2);
|
||||
ret = hts_dns_resolve_nocache2_(copy, addr, error);
|
||||
count = hts_dns_resolve_nocache_list_(copy, out, max, error);
|
||||
freet(copy);
|
||||
return ret;
|
||||
return count;
|
||||
} else {
|
||||
return hts_dns_resolve_nocache2_(hostname, addr, error);
|
||||
return hts_dns_resolve_nocache_list_(hostname, out, max, error);
|
||||
}
|
||||
}
|
||||
|
||||
HTSEXT_API SOCaddr *hts_dns_resolve_nocache2(const char *const hostname,
|
||||
SOCaddr *const addr,
|
||||
const char **error) {
|
||||
SOCaddr_clear(*addr);
|
||||
if (hts_dns_resolve_nocache_list(hostname, addr, 1, error) > 0) {
|
||||
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
HTSEXT_API SOCaddr* hts_dns_resolve_nocache(const char *const hostname, SOCaddr *const addr) {
|
||||
return hts_dns_resolve_nocache2(hostname, addr, NULL);
|
||||
}
|
||||
@@ -4901,16 +5082,18 @@ HTSEXT_API int check_hostname_dns(const char *const hostname) {
|
||||
}
|
||||
|
||||
// Needs locking
|
||||
// cache dns interne à HTS // ** FREE A FAIRE sur la chaine
|
||||
static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
SOCaddr *const addr, const char **error) {
|
||||
// Internal DNS cache. Fill out[0..count-1] with up to max addresses for _iadr,
|
||||
// resolving (and caching the full list) on a miss. Returns the count.
|
||||
static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
|
||||
SOCaddr *const out, const int max,
|
||||
const char **error) {
|
||||
char BIGSTK iadr[HTS_URLMAXSIZE * 2];
|
||||
t_dnscache *cache = hts_cache(opt); // adresse du cache
|
||||
SOCaddr *sa;
|
||||
int count;
|
||||
|
||||
assertf(opt != NULL);
|
||||
assertf(_iadr != NULL);
|
||||
assertf(addr != NULL);
|
||||
assertf(out != NULL);
|
||||
|
||||
strcpybuff(iadr, jump_identification_const(_iadr));
|
||||
// couper éventuel :
|
||||
@@ -4922,11 +5105,13 @@ static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
}
|
||||
|
||||
/* get IP from the dns cache */
|
||||
sa = hts_ghbn(cache, iadr, addr);
|
||||
if (sa != NULL) {
|
||||
return SOCaddr_is_valid(*sa) ? sa : NULL;
|
||||
} else { // non présent dans le cache dns, tester
|
||||
count = hts_ghbn_all(cache, iadr, out, max);
|
||||
if (count >= 0) { // cache hit (0 == negative-cached)
|
||||
return count;
|
||||
} else { // non présent dans le cache dns, tester
|
||||
const size_t iadr_len = strlen(iadr) + 1;
|
||||
SOCaddr resolved[HTS_MAXADDRNUM];
|
||||
int i;
|
||||
|
||||
// find queue
|
||||
for(; cache->next != NULL; cache = cache->next) ;
|
||||
@@ -4935,7 +5120,7 @@ static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
printf("resolving (not cached) %s\n", iadr);
|
||||
#endif
|
||||
|
||||
sa = hts_dns_resolve_nocache2(iadr, addr, error); // calculer IP host
|
||||
count = hts_dns_resolve_nocache_list(iadr, resolved, HTS_MAXADDRNUM, error);
|
||||
|
||||
#if HTS_WIDE_DEBUG
|
||||
DEBUG_W("gethostbyname done\n");
|
||||
@@ -4949,28 +5134,45 @@ static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
char *const str = block + sizeof(t_dnscache);
|
||||
memcpy(str, iadr, iadr_len);
|
||||
next->iadr = str;
|
||||
if (sa != NULL) {
|
||||
next->host_length = SOCaddr_size(*sa);
|
||||
assertf(next->host_length <= sizeof(next->host_addr));
|
||||
memcpy(next->host_addr, &SOCaddr_sockaddr(*sa), next->host_length);
|
||||
} else {
|
||||
next->host_length = 0; // non existant dans le dns
|
||||
next->host_count = count;
|
||||
for (i = 0; i < count; i++) {
|
||||
next->host_length[i] = SOCaddr_size(resolved[i]);
|
||||
assertf(next->host_length[i] <= sizeof(next->host_addr[i]));
|
||||
memcpy(next->host_addr[i], &SOCaddr_sockaddr(resolved[i]),
|
||||
next->host_length[i]);
|
||||
}
|
||||
next->next = NULL;
|
||||
return sa;
|
||||
}
|
||||
|
||||
/* return result if any */
|
||||
return sa;
|
||||
} // retour hp du cache
|
||||
/* copy result to caller (cache store may have failed; result still valid)
|
||||
*/
|
||||
for (i = 0; i < count && i < max; i++) {
|
||||
SOCaddr_copy_SOCaddr(out[i], resolved[i]);
|
||||
}
|
||||
return count;
|
||||
} // retour hp du cache
|
||||
}
|
||||
|
||||
SOCaddr* hts_dns_resolve2(httrackp * opt, const char *_iadr, SOCaddr *const addr, const char **error) {
|
||||
SOCaddr *ret;
|
||||
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
|
||||
const char **error) {
|
||||
int count;
|
||||
|
||||
if (!strnotempty(iadr) || max <= 0) {
|
||||
return 0;
|
||||
}
|
||||
hts_mutexlock(&opt->state.lock);
|
||||
ret = hts_dns_resolve_(opt, _iadr, addr, error);
|
||||
count = hts_dns_resolve_list_(opt, iadr, out, max, error);
|
||||
hts_mutexrelease(&opt->state.lock);
|
||||
return ret;
|
||||
return count;
|
||||
}
|
||||
|
||||
SOCaddr *hts_dns_resolve2(httrackp *opt, const char *_iadr, SOCaddr *const addr,
|
||||
const char **error) {
|
||||
SOCaddr_clear(*addr);
|
||||
if (hts_dns_resolve_all(opt, _iadr, addr, 1, error) > 0) {
|
||||
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SOCaddr* hts_dns_resolve(httrackp * opt, const char *_iadr, SOCaddr *const addr) {
|
||||
@@ -5305,6 +5507,11 @@ static int get_loglevel_from_coucal(coucal_loglevel level) {
|
||||
static void default_coucal_loghandler(void *arg, coucal_loglevel level,
|
||||
const char* format, va_list args) {
|
||||
|
||||
/* informational chatter (hashtable stats on delete, etc.) only when
|
||||
debugging; keep warnings and critical errors always visible. */
|
||||
if (level > coucal_log_warning && hts_dgb_init <= 0) {
|
||||
return;
|
||||
}
|
||||
if (level <= coucal_log_warning) {
|
||||
fprintf(stderr, "** warning: ");
|
||||
}
|
||||
|
||||
25
src/htslib.h
25
src/htslib.h
@@ -150,8 +150,11 @@ typedef struct t_dnscache t_dnscache;
|
||||
struct t_dnscache {
|
||||
struct t_dnscache *next;
|
||||
const char *iadr;
|
||||
size_t host_length; // length ; (4 or 16) ; 0 for error
|
||||
char host_addr[HTS_MAXADDRLEN];
|
||||
// resolved addresses, in resolver (RFC 6724) order; host_count==0 means the
|
||||
// name does not resolve (negative cache). host_count<=HTS_MAXADDRNUM.
|
||||
int host_count;
|
||||
size_t host_length[HTS_MAXADDRNUM]; // sockaddr length of each (16 or 28)
|
||||
char host_addr[HTS_MAXADDRNUM][HTS_MAXADDRLEN];
|
||||
};
|
||||
|
||||
/* Library internal definictions */
|
||||
@@ -191,6 +194,13 @@ int http_cookie_header_selftest(t_cookie *cookie, const char *domain,
|
||||
//int newhttp(char* iadr,char* err=NULL);
|
||||
T_SOC newhttp(httrackp * opt, const char *iadr, htsblk * retour, int port,
|
||||
int waitconnect);
|
||||
/* Like newhttp(), but connect to the addr_index-th resolved address of the host
|
||||
(0-based) instead of always the first; *addr_count, if non-NULL, is set to
|
||||
the total resolved addresses. newhttp() == newhttp_addr(...,0,NULL). Used by
|
||||
the slot scheduler to try the next address when a connect fails (dead IPv6
|
||||
etc.). */
|
||||
T_SOC newhttp_addr(httrackp *opt, const char *iadr, htsblk *retour, int port,
|
||||
int waitconnect, int addr_index, int *addr_count);
|
||||
HTS_INLINE void deletehttp(htsblk * r);
|
||||
HTS_INLINE int deleteaddr(htsblk * r);
|
||||
HTS_INLINE void deletesoc(T_SOC soc);
|
||||
@@ -215,9 +225,14 @@ void treatfirstline(htsblk * retour, const char *rcvd);
|
||||
|
||||
// sous-fonctions
|
||||
LLint http_xfread1(htsblk * r, int bufl);
|
||||
HTS_INLINE SOCaddr* hts_dns_resolve2(httrackp * opt, const char *iadr,
|
||||
SOCaddr *const addr,
|
||||
const char **error);
|
||||
/* Cached resolver: fill out[0..count-1] with up to max addresses for iadr (in
|
||||
resolver order), returning the count (0 = does not resolve, negative-cached).
|
||||
Resolves once per host; later calls read the DNS cache. Must hold no lock
|
||||
(brackets opt->state.lock itself). */
|
||||
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
|
||||
const char **error);
|
||||
HTS_INLINE SOCaddr *hts_dns_resolve2(httrackp *opt, const char *iadr,
|
||||
SOCaddr *const addr, const char **error);
|
||||
HTS_INLINE SOCaddr* hts_dns_resolve(httrackp * opt, const char *iadr,
|
||||
SOCaddr *const addr);
|
||||
HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
|
||||
|
||||
16
src/htsnet.h
16
src/htsnet.h
@@ -304,6 +304,22 @@ static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||
/** Length type for socket APIs (getsockname, accept, ...). */
|
||||
typedef socklen_t SOClen;
|
||||
|
||||
#if HTS_INET6 != 0
|
||||
/** Resolver backend: getaddrinfo/freeaddrinfo as a swappable pair, so the
|
||||
self-test can script DNS answers (families, multiplicity, errors)
|
||||
in-process. The free function must match its getaddrinfo (a fake allocates
|
||||
its own chain), hence the pair. */
|
||||
typedef struct hts_resolver_backend {
|
||||
int (*getaddrinfo)(const char *node, const char *service,
|
||||
const struct addrinfo *hints, struct addrinfo **res);
|
||||
void (*freeaddrinfo)(struct addrinfo *res);
|
||||
} hts_resolver_backend;
|
||||
|
||||
/** Install a resolver backend for the process; NULL restores the libc default.
|
||||
Test-only seam, not thread-safe; callers must serialize against resolves. */
|
||||
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
10
src/htsopt.h
10
src/htsopt.h
@@ -428,11 +428,11 @@ struct httrackp {
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
hts_boolean makeindex; /**< build a top-level index.html */
|
||||
hts_boolean kindex; /**< build a keyword index */
|
||||
hts_boolean delete_old; /**< delete locally obsolete files after update */
|
||||
hts_tristate delete_old; /**< delete locally obsolete files after update */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
@@ -465,13 +465,13 @@ struct httrackp {
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_tristate errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
hts_boolean external; /**< render external links as error pages */
|
||||
hts_tristate external; /**< render external links as error pages */
|
||||
hts_boolean passprivacy; /**< strip passwords from external links */
|
||||
hts_boolean includequery; /**< include the query string in saved names */
|
||||
hts_boolean mirror_first_page; /**< only mirror the links of the first page */
|
||||
@@ -485,7 +485,7 @@ struct httrackp {
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
|
||||
15
tests/01_engine-dns.test
Normal file
15
tests/01_engine-dns.test
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# DNS resolver/cache self-test: a mock getaddrinfo (no network) checks address
|
||||
# family, single-address selection, the -@i4/-@i6 family filter, and cache reuse.
|
||||
# The trailing token is required, like the other -# selftests, so a bare command
|
||||
# line isn't treated as "no arguments" and routed to the usage screen.
|
||||
out=$(httrack -#D run)
|
||||
|
||||
test "$out" = "dns-selftest: OK" || {
|
||||
echo "expected 'dns-selftest: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
110
tests/19_local-connect-fallback.test
Normal file
110
tests/19_local-connect-fallback.test
Normal file
@@ -0,0 +1,110 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# A host that resolves to several addresses must fall back to the next one when
|
||||
# a connect fails, instead of giving up on the first (dead IPv6 on a dual-stack
|
||||
# host, ...). HTTRACK_DEBUG_RESOLVE pins "deadhost" to a refused address first
|
||||
# (127.0.0.2, nothing listening) then the live server (127.0.0.1): the crawl
|
||||
# only succeeds if httrack retries the second address. A second case pins every
|
||||
# address to a refused one, so the slot must exhaust the list and error out
|
||||
# (rather than hang or loop).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
if test "${V6_SUPPORT:-}" == "no"; then
|
||||
echo "no IPv6 support (resolver list/override is IPv6-only), skipping"
|
||||
exit 77
|
||||
fi
|
||||
if ! command -v python3 >/dev/null 2>&1; then
|
||||
echo "python3 missing, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
server="$top_srcdir/tests/local-server.py"
|
||||
root="$top_srcdir/tests/server-root"
|
||||
tmpdir=$(mktemp -d)
|
||||
serverpid=
|
||||
|
||||
cleanup() {
|
||||
if test -n "$serverpid"; then
|
||||
kill "$serverpid" 2>/dev/null || true
|
||||
wait "$serverpid" 2>/dev/null || true
|
||||
fi
|
||||
rm -rf "$tmpdir"
|
||||
return 0
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# bind the live server to 127.0.0.1 only, so 127.0.0.2 refuses the connect
|
||||
python3 "$server" --root "$root" --bind 127.0.0.1 >"$tmpdir/srv.out" 2>"$tmpdir/srv.err" &
|
||||
serverpid=$!
|
||||
port=
|
||||
for _ in $(seq 1 50); do
|
||||
line=$(head -n1 "$tmpdir/srv.out" 2>/dev/null || true)
|
||||
if test "${line%% *}" == "PORT"; then
|
||||
port="${line#PORT }"
|
||||
break
|
||||
fi
|
||||
kill -0 "$serverpid" 2>/dev/null || {
|
||||
echo "server exited early: $(cat "$tmpdir/srv.err")"
|
||||
exit 1
|
||||
}
|
||||
sleep 0.1
|
||||
done
|
||||
test -n "$port" || {
|
||||
echo "could not discover server port"
|
||||
exit 1
|
||||
}
|
||||
|
||||
out="$tmpdir/crawl"
|
||||
HTTRACK_DEBUG_RESOLVE="deadhost:127.0.0.2,127.0.0.1" \
|
||||
httrack "http://deadhost:$port/simple/basic.html" -O "$out" \
|
||||
-c1 --robots=0 --timeout=30 --quiet -Z >"$tmpdir/log" 2>&1
|
||||
|
||||
log="$out/hts-log.txt"
|
||||
|
||||
# the dead address was tried, then the next one (proves the fallback ran)
|
||||
if ! grep -q "trying next address" "$log"; then
|
||||
echo "FAIL: no connect fallback happened"
|
||||
cat "$log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 0 errors and the file was actually fetched (over the live address)
|
||||
errs=$(grep -iEc "^[0-9:]*[[:space:]]Error:" "$log" || true)
|
||||
test "$errs" == "0" || {
|
||||
echo "FAIL: $errs error(s) reported"
|
||||
grep -iE "Error:" "$log"
|
||||
exit 1
|
||||
}
|
||||
test -f "$out/deadhost_$port/simple/basic.html" || {
|
||||
echo "FAIL: basic.html not downloaded via fallback"
|
||||
find "$out" -type f
|
||||
exit 1
|
||||
}
|
||||
|
||||
# every address refused: the slot exhausts the list, then errors out (the
|
||||
# harness timeout would catch a hang/loop; refused connects are instant)
|
||||
out2="$tmpdir/crawl2"
|
||||
HTTRACK_DEBUG_RESOLVE="alldead:127.0.0.2,127.0.0.3" \
|
||||
httrack "http://alldead:$port/simple/basic.html" -O "$out2" \
|
||||
-c1 --robots=0 --timeout=30 --quiet -Z >"$tmpdir/log2" 2>&1
|
||||
log2="$out2/hts-log.txt"
|
||||
|
||||
grep -q "trying next address" "$log2" || {
|
||||
echo "FAIL: exhaustion path never tried the fallback address"
|
||||
cat "$log2"
|
||||
exit 1
|
||||
}
|
||||
grep -iqE "^[0-9:]*[[:space:]]Error:" "$log2" || {
|
||||
echo "FAIL: all addresses failing did not report an error"
|
||||
cat "$log2"
|
||||
exit 1
|
||||
}
|
||||
test ! -f "$out2/alldead_$port/simple/basic.html" || {
|
||||
echo "FAIL: file downloaded despite every address failing"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "OK: connect fallback succeeds, and exhausting all addresses errors out"
|
||||
@@ -13,6 +13,7 @@ TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
|
||||
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
|
||||
TESTS_ENVIRONMENT += ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS)
|
||||
TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
|
||||
TESTS_ENVIRONMENT += V6_SUPPORT=$(V6_SUPPORT)
|
||||
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
|
||||
|
||||
TEST_EXTENSIONS = .test
|
||||
@@ -29,6 +30,7 @@ TESTS = \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-cookies.test \
|
||||
01_engine-copyopt.test \
|
||||
01_engine-dns.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
@@ -55,6 +57,7 @@ TESTS = \
|
||||
15_local-types.test \
|
||||
16_local-assume.test \
|
||||
17_local-empty-ct.test \
|
||||
18_local-update.test
|
||||
18_local-update.test \
|
||||
19_local-connect-fallback.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
Reference in New Issue
Block a user