mirror of
https://github.com/xroche/httrack.git
synced 2026-06-23 18:48:30 +03:00
Compare commits
30 Commits
3.49.8
...
dns-multia
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0bea390973 | ||
|
|
67af1c2f0b | ||
|
|
542d6a56b5 | ||
|
|
a5c86e7e89 | ||
|
|
54f5717057 | ||
|
|
40fc9de360 | ||
|
|
4614eefefe | ||
|
|
b0e8262db0 | ||
|
|
addbd3136b | ||
|
|
a64c4cd160 | ||
|
|
1611dbcabf | ||
|
|
099501ee50 | ||
|
|
1b9eefa3b4 | ||
|
|
9c8d3a41eb | ||
|
|
ae77cd9d6d | ||
|
|
51b8dcd81c | ||
|
|
bcce664143 | ||
|
|
7a24add87c | ||
|
|
2308e7bafd | ||
|
|
ef5691fc47 | ||
|
|
0a6eb73903 | ||
|
|
fdb243e5a2 | ||
|
|
f8546e146d | ||
|
|
b7f602f2eb | ||
|
|
550100b56a | ||
|
|
33ddb27243 | ||
|
|
4606dfbf66 | ||
|
|
a6f1b9a3dd | ||
|
|
fb35d6a0f1 | ||
|
|
8a270fec03 |
5
.flake8
Normal file
5
.flake8
Normal file
@@ -0,0 +1,5 @@
|
||||
[flake8]
|
||||
# Match black's formatting so the two tools don't fight.
|
||||
max-line-length = 88
|
||||
# E203/W503 conflict with black's slice and line-break style.
|
||||
extend-ignore = E203, W503
|
||||
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
@@ -227,7 +227,8 @@ jobs:
|
||||
# Validate the Debian packaging via the same script maintainers release with.
|
||||
# One amd64/gcc run is enough: packaging (control/rules/manifest/lintian/quilt
|
||||
# source build) is arch- and compiler-independent, and the build matrix above
|
||||
# already covers compile portability. lintian runs with --fail-on=error.
|
||||
# already covers compile portability. mkdeb.sh runs lintian as an explicit gate
|
||||
# (debuild does not propagate lintian's exit) with --fail-on=error,warning.
|
||||
deb:
|
||||
name: deb package (lintian)
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
12
configure.ac
12
configure.ac
@@ -1,6 +1,6 @@
|
||||
AC_PREREQ([2.71])
|
||||
|
||||
AC_INIT([httrack], [3.49.8], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_INIT([httrack], [3.49.9], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_COPYRIGHT([
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 1998-2015 Xavier Roche and other contributors
|
||||
@@ -29,9 +29,10 @@ AC_CONFIG_SRCDIR(src/httrack.c)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
AM_INIT_AUTOMAKE([subdir-objects])
|
||||
# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to
|
||||
# 128), an incompatible ABI break, so bump current and reset revision/age.
|
||||
VERSION_INFO="3:0:0"
|
||||
# 3:1:0: 3.49.9 changed code but not the exported interface vs 3.49.8 (same 164
|
||||
# symbols, no struct-layout change), so bump revision only. (3:0:0 was the htsblk
|
||||
# mime-buffer widening, an ABI break that moved the soname .so.2 -> .so.3.)
|
||||
VERSION_INFO="3:1:0"
|
||||
AM_MAINTAINER_MODE
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
@@ -214,9 +215,12 @@ AC_SUBST(OPENSSL_LIBS)
|
||||
fi
|
||||
|
||||
### Support IPv6
|
||||
V6_SUPPORT=no
|
||||
AC_CHECK_LIB(c, getaddrinfo, [V6_FLAG="-DINET6"
|
||||
V6_SUPPORT=yes
|
||||
AC_DEFINE(HTS_INET6, 1, [Check for IPv6])], AC_MSG_WARN([*** IPv6 not found IPv6 compatibility disabled]))
|
||||
AC_SUBST(V6_FLAG)
|
||||
AC_SUBST(V6_SUPPORT)
|
||||
|
||||
### Check for LFS
|
||||
AC_CHECK_LIB(c, fopen64, [LFS_FLAG="-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE"
|
||||
|
||||
24
debian/changelog
vendored
24
debian/changelog
vendored
@@ -1,3 +1,27 @@
|
||||
httrack (3.49.9-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release: Content-Type and file-type detection fixes (trust a
|
||||
declared Content-Type over a binary URL extension, honor --assume under the
|
||||
delayed type check, keep a known extension against a bogus or empty
|
||||
Content-Type, and avoid an uninitialised read on an empty Content-Type), and
|
||||
restored C++ source-compatibility of the installed headers so reverse
|
||||
dependencies (httraqt) build again.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 21 Jun 2026 17:59:38 +0200
|
||||
|
||||
httrack (3.49.8-2) unstable; urgency=medium
|
||||
|
||||
* Rename libhttrack2 to libhttrack3 to follow the SONAME, which the 3.49.8
|
||||
ABI bump moved to libhttrack.so.3 (package-name-doesnt-match-sonames). In
|
||||
3.49.8-1 the libhttrack2.files glob still matched .so.2, so the runtime
|
||||
libraries fell through into the httrack package and libhttrack2 shipped no
|
||||
library. The new .files uses a .so.3* wildcard so a future SONAME bump no
|
||||
longer silently misplaces the libraries. New binary package, via NEW.
|
||||
* Drop the stale debian/libhttrack-swf1.files: the swf module is no longer
|
||||
built and no libhttrack-swf1 package exists.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sat, 20 Jun 2026 14:42:13 +0200
|
||||
|
||||
httrack (3.49.8-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release: HTTPS-proxy CONNECT tunnelling and wider srcset
|
||||
|
||||
6
debian/control
vendored
6
debian/control
vendored
@@ -58,13 +58,13 @@ Description: webhttrack common files
|
||||
This package is the common files of webhttrack, website copier and
|
||||
mirroring utility
|
||||
|
||||
Package: libhttrack2
|
||||
Package: libhttrack3
|
||||
Architecture: any
|
||||
Multi-Arch: same
|
||||
Section: libs
|
||||
Replaces: libhttrack1
|
||||
Conflicts: libhttrack1
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}
|
||||
Replaces: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Breaks: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Description: Httrack website copier library
|
||||
This package is the library part of httrack, website copier and mirroring
|
||||
utility
|
||||
|
||||
118
debian/copyright
vendored
118
debian/copyright
vendored
@@ -1,21 +1,109 @@
|
||||
This package was debianized by Xavier Roche <roche@httrack.com> on
|
||||
Fri, 27 Sep 2002 16:42:26 +0200
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: httrack
|
||||
Upstream-Contact: Xavier Roche <roche@httrack.com>
|
||||
Source: https://www.httrack.com/
|
||||
|
||||
The current Debian maintainer is Xavier Roche <xavier@debian.org>
|
||||
Files: *
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: GPL-3+
|
||||
Comment:
|
||||
The engine includes contributions from Yann Philippot (src/htsjava.c,
|
||||
src/htsjava.h). htsbasenet.h links against the system OpenSSL library
|
||||
(originally by Eric Young); no OpenSSL/SSLeay code is bundled here.
|
||||
|
||||
Upstream author: Xavier Roche <roche@httrack.com>
|
||||
Files: src/minizip/*
|
||||
Copyright: 1998-2010 Gilles Vollant
|
||||
2007-2008 Even Rouault
|
||||
2009-2010 Mathias Svensson
|
||||
1990-2000 Info-ZIP
|
||||
License: Zlib
|
||||
Comment:
|
||||
The decryption code in src/minizip/crypt.h and src/minizip/unzip.c derives
|
||||
from the Info-ZIP distribution, distributed under the same terms.
|
||||
|
||||
Copyright: 1998-2014 Xavier Roche and other contributors
|
||||
Files: src/md5.c
|
||||
Copyright: 1993 Colin Plumb
|
||||
License: public-domain-md5
|
||||
This code implements the MD5 message-digest algorithm, due to Ron Rivest.
|
||||
It was written by Colin Plumb in 1993, no copyright is claimed. This code
|
||||
is in the public domain; do with it what you wish.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Files: src/coucal/*
|
||||
Copyright: 2013-2014 Xavier Roche
|
||||
License: BSD-3-clause
|
||||
|
||||
On Debian systems, the complete text of the GNU General Public
|
||||
License version 3 can be found in /usr/share/common-licenses/GPL-3 file.
|
||||
Files: src/coucal/murmurhash3.h*
|
||||
Copyright: Austin Appleby
|
||||
License: public-domain-murmurhash3
|
||||
MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Files: html/server/div/com.httrack.WebHTTrack.metainfo.xml
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: FSFAP
|
||||
Copying and distribution of this file, with or without modification, are
|
||||
permitted in any medium without royalty provided the copyright notice and
|
||||
this notice are preserved. This file is offered as-is, without any warranty.
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2002-2026 Xavier Roche <xavier@debian.org>
|
||||
License: GPL-3+
|
||||
|
||||
License: GPL-3+
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public License
|
||||
version 3 can be found in /usr/share/common-licenses/GPL-3.
|
||||
|
||||
License: Zlib
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the
|
||||
use of this software.
|
||||
.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
.
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
License: BSD-3-clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
3
debian/httrack-doc.lintian-overrides
vendored
3
debian/httrack-doc.lintian-overrides
vendored
@@ -4,3 +4,6 @@
|
||||
# so the path lives in the display pointer, not the override -- match with '*'.
|
||||
httrack-doc: extra-license-file *
|
||||
httrack-doc: package-contains-documentation-outside-usr-share-doc *
|
||||
# search.sh is a sample CGI shipped alongside the HTML manual, not meant to be
|
||||
# run from the package tree; it stays non-executable by design.
|
||||
httrack-doc: script-not-executable *
|
||||
|
||||
2
debian/libhttrack-swf1.files
vendored
2
debian/libhttrack-swf1.files
vendored
@@ -1,2 +0,0 @@
|
||||
usr/lib/*/libhtsswf.so.1.0.0
|
||||
usr/lib/*/libhtsswf.so.1
|
||||
5
debian/libhttrack2.files
vendored
5
debian/libhttrack2.files
vendored
@@ -1,5 +0,0 @@
|
||||
usr/lib/*/libhttrack.so.2.0.49
|
||||
usr/lib/*/libhttrack.so.2
|
||||
usr/lib/*/libhtsjava.so.2.0.49
|
||||
usr/lib/*/libhtsjava.so.2
|
||||
usr/share/httrack/templates
|
||||
3
debian/libhttrack2.lintian-overrides
vendored
3
debian/libhttrack2.lintian-overrides
vendored
@@ -1,3 +0,0 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME and a strict =version dependency, see debian/rules).
|
||||
libhttrack2: no-symbols-control-file usr/lib/*
|
||||
3
debian/libhttrack3.files
vendored
Normal file
3
debian/libhttrack3.files
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
usr/lib/*/libhttrack.so.3*
|
||||
usr/lib/*/libhtsjava.so.3*
|
||||
usr/share/httrack/templates
|
||||
3
debian/libhttrack3.lintian-overrides
vendored
Normal file
3
debian/libhttrack3.lintian-overrides
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME plus a >= upstream-version dependency, see debian/rules).
|
||||
libhttrack3: no-symbols-control-file usr/lib/*
|
||||
2
debian/rules
vendored
2
debian/rules
vendored
@@ -135,7 +135,7 @@ binary-arch: build install
|
||||
dh_makeshlibs -a -X/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest --version-info
|
||||
dh_installdeb -a
|
||||
# we depend on the current version (ABI may change)
|
||||
dh_shlibdeps -a -ldebian/libhttrack2/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_shlibdeps -a -ldebian/libhttrack3/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_gencontrol -a
|
||||
dh_md5sums -a
|
||||
dh_builddeb -a
|
||||
|
||||
@@ -4,6 +4,12 @@ HTTrack Website Copier release history:
|
||||
|
||||
This file lists all changes and fixes that have been made for HTTrack
|
||||
|
||||
3.49-9
|
||||
+ Fixed: file-type detection from the Content-Type header: trust a declared type over a binary URL extension, honor --assume under the delayed type check, and keep a known extension against a bogus or empty Content-Type (#267, #29, #56)
|
||||
+ Fixed: an uninitialized-buffer read when the Content-Type is empty (#411)
|
||||
+ Fixed: restored C++ source-compatibility of the installed headers so reverse dependencies (httraqt) build again (#413)
|
||||
+ Changed: multiple internal build, packaging and test-harness improvements
|
||||
|
||||
3.49-8
|
||||
+ New: tunnel HTTPS downloads through the configured HTTP proxy via CONNECT (#85)
|
||||
+ New: parse every candidate URL in <img> and <source> srcset lists (#326)
|
||||
|
||||
@@ -56,7 +56,7 @@ whttrackrundir = $(bindir)
|
||||
whttrackrun_SCRIPTS = webhttrack
|
||||
|
||||
libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
|
||||
htscache_selftest.c \
|
||||
htscache_selftest.c htsdns_selftest.c \
|
||||
htscatchurl.c htsfilters.c htsftp.c htshash.c coucal/coucal.c \
|
||||
htshelp.c htslib.c htscoremain.c \
|
||||
htsname.c htsrobots.c htstools.c htswizard.c \
|
||||
@@ -66,7 +66,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
|
||||
md5.c \
|
||||
minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
|
||||
hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \
|
||||
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htscatchurl.h \
|
||||
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htsdns_selftest.h htscatchurl.h \
|
||||
htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
|
||||
htsfilters.h htsftp.h htsglobal.h htshash.h coucal/coucal.h \
|
||||
htshelp.h htsindex.h htslib.h htsmd5.h \
|
||||
|
||||
153
src/htsback.c
153
src/htsback.c
@@ -73,6 +73,8 @@ struct_back *back_new(httrackp *opt, int back_max) {
|
||||
|
||||
sback->count = back_max;
|
||||
sback->lnk = (lien_back *) calloct((back_max + 1), sizeof(lien_back));
|
||||
sback->connect_fallback = (hts_connect_fallback *) calloct(
|
||||
(back_max + 1), sizeof(hts_connect_fallback));
|
||||
sback->ready = coucal_new(0);
|
||||
hts_set_hash_handler(sback->ready, opt);
|
||||
coucal_set_name(sback->ready, "back_new");
|
||||
@@ -83,6 +85,7 @@ struct_back *back_new(httrackp *opt, int back_max) {
|
||||
sback->lnk[i].r.location = sback->lnk[i].location_buffer;
|
||||
sback->lnk[i].status = STATUS_FREE;
|
||||
sback->lnk[i].r.soc = INVALID_SOCKET;
|
||||
sback->connect_fallback[i].addr_count = -1; // not yet probed
|
||||
}
|
||||
return sback;
|
||||
}
|
||||
@@ -93,6 +96,7 @@ void back_free(struct_back ** sback) {
|
||||
freet((*sback)->lnk);
|
||||
(*sback)->lnk = NULL;
|
||||
}
|
||||
freet((*sback)->connect_fallback);
|
||||
if ((*sback)->ready != NULL) {
|
||||
coucal_delete(&(*sback)->ready);
|
||||
(*sback)->ready_size_bytes = 0;
|
||||
@@ -102,6 +106,72 @@ void back_free(struct_back ** sback) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Per-candidate connect deadline cap (seconds): a connecting slot with another
|
||||
address to try waits at most this long before falling back, instead of the
|
||||
full (default 120s) slot timeout. Caps the dead-IPv6 stall while staying well
|
||||
above a normal handshake. The last candidate still gets the full timeout. */
|
||||
#define HTS_CONNECT_FALLBACK_TIMEOUT 10
|
||||
|
||||
int back_connect_fallback_due(int addr_index, int addr_count, int elapsed,
|
||||
int timeout) {
|
||||
int deadline;
|
||||
|
||||
if (addr_index + 1 >= addr_count) // last (or only) candidate: no fallback
|
||||
return 0;
|
||||
if (timeout <= 0) // no timeout management: never force it
|
||||
return 0;
|
||||
deadline = (timeout < HTS_CONNECT_FALLBACK_TIMEOUT)
|
||||
? timeout
|
||||
: HTS_CONNECT_FALLBACK_TIMEOUT;
|
||||
return elapsed >= deadline;
|
||||
}
|
||||
|
||||
/* Pending-connect result for a non-blocking socket reported ready by select():
|
||||
0 = connected, >0 = the connect errno (refused, unreachable, ...), -1 if the
|
||||
probe itself failed. A failed connect is reported writable too, so this is
|
||||
how success is told from failure without blocking. */
|
||||
static int connect_socket_error(T_SOC soc) {
|
||||
int soerr = 0;
|
||||
socklen_t len = (socklen_t) sizeof(soerr);
|
||||
|
||||
if (getsockopt(soc, SOL_SOCKET, SO_ERROR, (char *) &soerr, &len) != 0)
|
||||
return -1;
|
||||
return soerr;
|
||||
}
|
||||
|
||||
/* Retry a stuck/failed connecting slot against its next resolved address.
|
||||
Closes the current socket and starts a non-blocking connect to the next
|
||||
candidate, leaving the slot in STATUS_CONNECTING. Returns 1 if a new connect
|
||||
was started, 0 if no fallback address remains (caller fails the slot). */
|
||||
static int back_connect_next(httrackp *opt, struct_back *sback, int i) {
|
||||
hts_connect_fallback *const cf = &sback->connect_fallback[i];
|
||||
lien_back *const back = sback->lnk;
|
||||
const int next = cf->addr_index + 1;
|
||||
T_SOC soc;
|
||||
|
||||
if (next >= cf->addr_count)
|
||||
return 0;
|
||||
|
||||
if (back[i].r.soc != INVALID_SOCKET) {
|
||||
deletehttp(&back[i].r);
|
||||
back[i].r.soc = INVALID_SOCKET;
|
||||
}
|
||||
soc = newhttp_addr(opt, back[i].url_adr, &back[i].r, -1, 0, next, NULL);
|
||||
if (soc == INVALID_SOCKET)
|
||||
return 0;
|
||||
|
||||
back[i].r.soc = soc;
|
||||
cf->addr_index = next;
|
||||
cf->connect_start = time_local();
|
||||
if (back[i].timeout > 0)
|
||||
back[i].timeout_refresh = cf->connect_start;
|
||||
back[i].status = STATUS_CONNECTING;
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"connect failed, trying next address (%d/%d) for %s", next + 1,
|
||||
cf->addr_count, back[i].url_adr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void back_delete_all(httrackp * opt, cache_back * cache, struct_back * sback) {
|
||||
if (sback != NULL) {
|
||||
int i;
|
||||
@@ -1911,8 +1981,11 @@ int back_add(struct_back * sback, httrackp * opt, cache_back * cache, const char
|
||||
// ouvrir liaison, envoyer requète
|
||||
// ne pas traiter ou recevoir l'en tête immédiatement
|
||||
hts_init_htsblk(&back[p].r);
|
||||
//memset(&(back[p].r), 0, sizeof(htsblk));
|
||||
// memset(&(back[p].r), 0, sizeof(htsblk));
|
||||
back[p].r.location = back[p].location_buffer;
|
||||
// fresh connect: address list not yet probed, start at the first
|
||||
sback->connect_fallback[p].addr_index = 0;
|
||||
sback->connect_fallback[p].addr_count = -1;
|
||||
// recopier proxy
|
||||
if ((back[p].r.req.proxy.active = opt->proxy.active)) {
|
||||
if (StringBuff(opt->proxy.bindhost) != NULL)
|
||||
@@ -2369,21 +2442,25 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
// en cas de gestion du connect préemptif
|
||||
#if HTS_XCONN
|
||||
if (back[i].status == STATUS_CONNECTING) { // connexion
|
||||
do_wait = 1;
|
||||
// a connecting slot always carries a live socket; guard anyway so a
|
||||
// stray INVALID_SOCKET can never reach FD_SET (mirrors the recv branch)
|
||||
if (back[i].r.soc != INVALID_SOCKET) {
|
||||
do_wait = 1;
|
||||
|
||||
// noter socket write
|
||||
FD_SET(back[i].r.soc, &fds_c);
|
||||
// noter socket write
|
||||
FD_SET(back[i].r.soc, &fds_c);
|
||||
|
||||
// noter socket erreur
|
||||
FD_SET(back[i].r.soc, &fds_e);
|
||||
// noter socket erreur
|
||||
FD_SET(back[i].r.soc, &fds_e);
|
||||
|
||||
// calculer max
|
||||
if (max_c) {
|
||||
max_c = 0;
|
||||
nfds = back[i].r.soc;
|
||||
} else if (back[i].r.soc > nfds) {
|
||||
// ID socket la plus élevée
|
||||
nfds = back[i].r.soc;
|
||||
// calculer max
|
||||
if (max_c) {
|
||||
max_c = 0;
|
||||
nfds = back[i].r.soc;
|
||||
} else if (back[i].r.soc > nfds) {
|
||||
// ID socket la plus élevée
|
||||
nfds = back[i].r.soc;
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
@@ -2517,8 +2594,20 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
}
|
||||
// ---- FLAG WRITE MIS A UN?: POUR LE CONNECT
|
||||
if (back[i].status == STATUS_CONNECTING) { // attendre connect
|
||||
hts_connect_fallback *const cf = &sback->connect_fallback[i];
|
||||
int dispo = 0;
|
||||
|
||||
// probe the resolved address list once per fresh connect (cache hit:
|
||||
// the host was resolved when this connect was opened)
|
||||
if (cf->addr_count < 0 && back[i].r.soc != INVALID_SOCKET &&
|
||||
!back[i].r.is_file) {
|
||||
SOCaddr scratch[HTS_MAXADDRNUM];
|
||||
|
||||
cf->addr_count = hts_dns_resolve_all(opt, back[i].url_adr, scratch,
|
||||
HTS_MAXADDRNUM, NULL);
|
||||
cf->connect_start = time_local();
|
||||
}
|
||||
|
||||
// vérifier l'existance de timeout-check
|
||||
if (!gestion_timeout)
|
||||
if (back[i].timeout > 0)
|
||||
@@ -2526,7 +2615,20 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
|
||||
// connecté?
|
||||
dispo = FD_ISSET(back[i].r.soc, &fds_c);
|
||||
if (dispo) { // ok connected!!
|
||||
if (dispo) { // socket ready: connect() finished (ok or failed)
|
||||
// a refused/failed connect is reported writable too; probe SO_ERROR
|
||||
// and, on failure, fall back to the next address (or fail the slot)
|
||||
if (connect_socket_error(back[i].r.soc) != 0) {
|
||||
if (!back_connect_next(opt, sback, i)) {
|
||||
deletehttp(&back[i].r);
|
||||
back[i].r.soc = INVALID_SOCKET;
|
||||
back[i].r.statuscode = STATUSCODE_CONNERROR;
|
||||
strcpybuff(back[i].r.msg, "Connect Error");
|
||||
back[i].status = STATUS_READY;
|
||||
back_set_finished(sback, i);
|
||||
}
|
||||
continue; // reconnected (stay connecting) or failed
|
||||
}
|
||||
busy_state = 1;
|
||||
|
||||
#if HTS_USEOPENSSL
|
||||
@@ -3884,6 +3986,29 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
|
||||
if (back[i].status > 0) { // réception/connexion/..
|
||||
if (back[i].timeout > 0) {
|
||||
// a stuck connect with a fallback address: retry the next one well
|
||||
// before the full timeout (dead IPv6 on a dual-stack host, ...)
|
||||
if (back[i].status == STATUS_CONNECTING) {
|
||||
const hts_connect_fallback *const cf =
|
||||
&sback->connect_fallback[i];
|
||||
|
||||
if (back_connect_fallback_due(cf->addr_index, cf->addr_count,
|
||||
(int) (act - cf->connect_start),
|
||||
back[i].timeout)) {
|
||||
if (back_connect_next(opt, sback, i)) {
|
||||
continue; // reconnected to the next candidate
|
||||
}
|
||||
// fallback was due but no socket could be opened
|
||||
// (back_connect_next closed the dead one): stop now rather than
|
||||
// spin on an invalid fd
|
||||
back[i].r.soc = INVALID_SOCKET;
|
||||
back[i].r.statuscode = STATUSCODE_CONNERROR;
|
||||
strcpybuff(back[i].r.msg, "Connect Error");
|
||||
back[i].status = STATUS_READY;
|
||||
back_set_finished(sback, i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
//printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout);
|
||||
if (((int) (act - back[i].timeout_refresh)) >= back[i].timeout) {
|
||||
hts_log_print(opt, LOG_DEBUG, "connection timed out for %s%s", back[i].url_adr,
|
||||
|
||||
@@ -3703,9 +3703,9 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->maxsoc > 0)
|
||||
to->maxsoc = from->maxsoc;
|
||||
|
||||
/* hts_boolean/enum fields are unsigned (GCC), so a bare `> -1` unset-guard
|
||||
is always false; cast to int to keep the -1 "unset" sentinel test. */
|
||||
if ((int) from->nearlink > -1)
|
||||
/* hts_tristate fields use HTS_DEFAULT (-1) for "unspecified": copy_htsopt
|
||||
skips them so the target keeps its value. */
|
||||
if (from->nearlink > -1)
|
||||
to->nearlink = from->nearlink;
|
||||
|
||||
if (from->timeout > -1)
|
||||
@@ -3732,10 +3732,10 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->hostcontrol > -1)
|
||||
to->hostcontrol = from->hostcontrol;
|
||||
|
||||
if ((int) from->errpage > -1)
|
||||
if (from->errpage > -1)
|
||||
to->errpage = from->errpage;
|
||||
|
||||
if ((int) from->parseall > -1)
|
||||
if (from->parseall > -1)
|
||||
to->parseall = from->parseall;
|
||||
|
||||
// test all: bit 8 de travel
|
||||
|
||||
@@ -152,6 +152,15 @@ struct lien_adrfilsave {
|
||||
char save[HTS_URLMAXSIZE * 2]; /**< local save path (with directory) */
|
||||
};
|
||||
|
||||
/** Per-slot connect-fallback bookkeeping (parallel to struct_back.lnk).
|
||||
Tracks which resolved address the slot is currently connecting to so a
|
||||
stuck connect can be retried against the next one. */
|
||||
typedef struct hts_connect_fallback {
|
||||
int addr_index; /**< candidate being connected (0-based) */
|
||||
int addr_count; /**< resolved addresses; -1 = not yet probed */
|
||||
TStamp connect_start; /**< when the current candidate's connect began */
|
||||
} hts_connect_fallback;
|
||||
|
||||
/** The download-slot ring: the set of concurrent transfers in flight.
|
||||
Allocated/owned by the engine; consumers (status callbacks, the loop)
|
||||
read it but do not resize or free it. */
|
||||
@@ -168,6 +177,7 @@ struct struct_back {
|
||||
int count; /**< number of usable slots (back_max) */
|
||||
coucal ready; /**< index of slots whose transfer completed */
|
||||
LLint ready_size_bytes; /**< total bytes buffered in completed slots */
|
||||
hts_connect_fallback *connect_fallback; /**< per-slot, count+1 entries */
|
||||
};
|
||||
|
||||
typedef struct cache_back_zip_entry cache_back_zip_entry;
|
||||
@@ -372,6 +382,13 @@ void check_rate(TStamp stat_timestart, int maxrate);
|
||||
/* Backing (download-slot) scheduler. Operate on the back[] ring (struct_back).
|
||||
Not thread-safe; call from the single crawl loop. */
|
||||
|
||||
/* True if a connecting slot should give up on the current address and try the
|
||||
next one: a fallback address remains (addr_index+1 < addr_count) and the
|
||||
candidate has been connecting for at least its deadline, min(timeout, an
|
||||
internal cap). elapsed/timeout in seconds. Exposed for the -#D self-test. */
|
||||
int back_connect_fallback_due(int addr_index, int addr_count, int elapsed,
|
||||
int timeout);
|
||||
|
||||
/* How many new sockets may be opened now, honoring maxsoc and the maxconn rate
|
||||
limit (>=0). _strict ignores reserved-slot headroom; the plain form leaves
|
||||
room for naming tests and stops at 0 when the stack is nearly full. */
|
||||
|
||||
@@ -47,6 +47,7 @@ Please visit our Website: http://www.httrack.com
|
||||
#include "htscharset.h"
|
||||
#include "htsencoding.h"
|
||||
#include "htscache_selftest.h"
|
||||
#include "htsdns_selftest.h"
|
||||
#include "htsmd5.h"
|
||||
|
||||
#include <ctype.h>
|
||||
@@ -2460,6 +2461,13 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'D': { // DNS resolver/cache self-test (mock getaddrinfo)
|
||||
const int err = dns_selftests(opt);
|
||||
|
||||
printf("dns-selftest: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
} break;
|
||||
case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
|
||||
{
|
||||
int hasFilter = 0;
|
||||
@@ -2579,7 +2587,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
(r.size >= 0) ? r.size : (-r.size));
|
||||
if (r.contenttype >= 0) {
|
||||
fprintf(stdout, "Content-Type: %s\r\n",
|
||||
r.contenttype);
|
||||
hts_effective_mime(r.contenttype));
|
||||
}
|
||||
if (r.cdispo[0]) {
|
||||
fprintf(stdout, "Content-Disposition: %s\r\n",
|
||||
@@ -3166,6 +3174,16 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (to->parseall != HTS_FALSE)
|
||||
err = 1;
|
||||
|
||||
/* HTS_DEFAULT (-1) is "unspecified": copy_htsopt must skip it,
|
||||
leaving the target intact. Only a signed (int-backed) field
|
||||
can hold -1, so this also guards the type against regressing
|
||||
to an unsigned hts_boolean. */
|
||||
from->parseall = HTS_DEFAULT;
|
||||
to->parseall = HTS_TRUE;
|
||||
copy_htsopt(from, to);
|
||||
if (to->parseall != HTS_TRUE)
|
||||
err = 1;
|
||||
|
||||
hts_free_opt(from);
|
||||
hts_free_opt(to);
|
||||
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
|
||||
|
||||
359
src/htsdns_selftest.c
Normal file
359
src/htsdns_selftest.c
Normal file
@@ -0,0 +1,359 @@
|
||||
/* ------------------------------------------------------------ */
|
||||
/*
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 2026 Xavier Roche and other contributors
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Ethical use: we kindly ask that you NOT use this software to harvest email
|
||||
addresses or to collect any other private information about people. Doing so
|
||||
would dishonor our work and waste the many hours we have spent on it.
|
||||
|
||||
Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* File: htsdns_selftest.c subroutines: */
|
||||
/* in-process self-test for the DNS resolver and cache */
|
||||
/* Author: Xavier Roche */
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
/* Routes the resolver through a scripted getaddrinfo (hts_resolver_backend)
|
||||
instead of the network, so resolution and the DNS cache are testable for a
|
||||
fixed set of scenarios (IPv4/IPv6/dual-stack, errors, family filter,
|
||||
cache reuse) with no live DNS. */
|
||||
|
||||
#define HTS_INTERNAL_BYTECODE
|
||||
|
||||
#include "htsdns_selftest.h"
|
||||
|
||||
#include "htscore.h"
|
||||
#include "htslib.h"
|
||||
#include "htsnet.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if HTS_INET6 != 0
|
||||
|
||||
/* IPV6_resolver: 0 = v4+v6, 1 = v4 only, 2 = v6 only (htscoremain -@i). */
|
||||
extern int IPV6_resolver;
|
||||
|
||||
/* One scripted host: either a getaddrinfo error, or an ordered address list. */
|
||||
typedef struct mock_addr {
|
||||
int family; /* AF_INET / AF_INET6 */
|
||||
unsigned char addr[16]; /* 4 (v4) or 16 (v6) meaningful bytes */
|
||||
} mock_addr;
|
||||
|
||||
typedef struct mock_host {
|
||||
const char *name;
|
||||
int gai_err; /* non-zero: getaddrinfo returns this */
|
||||
int naddr;
|
||||
mock_addr addr[6];
|
||||
int calls; /* times the backend resolved this host */
|
||||
} mock_host;
|
||||
|
||||
static mock_host mock_hosts[] = {
|
||||
{"v4only.test", 0, 1, {{AF_INET, {1, 2, 3, 4}}}, 0},
|
||||
{"v6only.test", 0, 1, {{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 1}}}, 0},
|
||||
/* dual stack, IPv6 first (RFC 6724 order) then IPv4 */
|
||||
{"dual.test",
|
||||
0,
|
||||
2,
|
||||
{{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 2}}, {AF_INET, {5, 6, 7, 8}}},
|
||||
0},
|
||||
/* dual stack, IPv4 first: distinguishes "keep the first address" from
|
||||
"prefer a family", so the selection contract is actually pinned. */
|
||||
{"dual4.test",
|
||||
0,
|
||||
2,
|
||||
{{AF_INET, {9, 10, 11, 12}},
|
||||
{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 3}}},
|
||||
0},
|
||||
/* more addresses than HTS_MAXADDRNUM: the list must clamp to the cap. */
|
||||
{"many.test",
|
||||
0,
|
||||
6,
|
||||
{{AF_INET, {10, 0, 0, 1}},
|
||||
{AF_INET, {10, 0, 0, 2}},
|
||||
{AF_INET, {10, 0, 0, 3}},
|
||||
{AF_INET, {10, 0, 0, 4}},
|
||||
{AF_INET, {10, 0, 0, 5}},
|
||||
{AF_INET, {10, 0, 0, 6}}},
|
||||
0},
|
||||
{"nodns.test", EAI_NONAME, 0, {{0}}, 0},
|
||||
};
|
||||
|
||||
static mock_host *mock_find(const char *name) {
|
||||
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++) {
|
||||
if (strcmp(mock_hosts[i].name, name) == 0)
|
||||
return &mock_hosts[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mock_reset_calls(void) {
|
||||
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++)
|
||||
mock_hosts[i].calls = 0;
|
||||
}
|
||||
|
||||
/* Build one addrinfo node owning its sockaddr (freed by mock_freeaddrinfo). */
|
||||
static struct addrinfo *mock_mkai(const mock_addr *a) {
|
||||
struct addrinfo *ai = calloct(1, sizeof(*ai));
|
||||
|
||||
ai->ai_family = a->family;
|
||||
if (a->family == AF_INET) {
|
||||
struct sockaddr_in *sin = calloct(1, sizeof(*sin));
|
||||
|
||||
sin->sin_family = AF_INET;
|
||||
memcpy(&sin->sin_addr, a->addr, 4);
|
||||
ai->ai_addr = (struct sockaddr *) sin;
|
||||
ai->ai_addrlen = sizeof(*sin);
|
||||
} else {
|
||||
struct sockaddr_in6 *sin6 = calloct(1, sizeof(*sin6));
|
||||
|
||||
sin6->sin6_family = AF_INET6;
|
||||
memcpy(&sin6->sin6_addr, a->addr, 16);
|
||||
ai->ai_addr = (struct sockaddr *) sin6;
|
||||
ai->ai_addrlen = sizeof(*sin6);
|
||||
}
|
||||
return ai;
|
||||
}
|
||||
|
||||
static int mock_getaddrinfo(const char *node, const char *service,
|
||||
const struct addrinfo *hints,
|
||||
struct addrinfo **res) {
|
||||
mock_host *const h = mock_find(node);
|
||||
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
|
||||
(void) service;
|
||||
*res = NULL;
|
||||
if (h == NULL)
|
||||
return EAI_NONAME;
|
||||
h->calls++; /* a real backend hit; a cached host skips this */
|
||||
if (h->gai_err != 0)
|
||||
return h->gai_err;
|
||||
for (int i = 0; i < h->naddr; i++) {
|
||||
if (want != PF_UNSPEC && want != h->addr[i].family)
|
||||
continue; /* honor the requested family (v4/v6 only) */
|
||||
struct addrinfo *const ai = mock_mkai(&h->addr[i]);
|
||||
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
if (head == NULL)
|
||||
return EAI_NONAME; /* filtered to empty, as the libc resolver does */
|
||||
*res = head;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mock_freeaddrinfo(struct addrinfo *res) {
|
||||
while (res != NULL) {
|
||||
struct addrinfo *const next = res->ai_next;
|
||||
|
||||
freet(res->ai_addr);
|
||||
freet(res);
|
||||
res = next;
|
||||
}
|
||||
}
|
||||
|
||||
static const hts_resolver_backend mock_backend = {mock_getaddrinfo,
|
||||
mock_freeaddrinfo};
|
||||
|
||||
static int failures = 0;
|
||||
|
||||
#define CHECK(cond) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
failures++; \
|
||||
fprintf(stderr, "dns-selftest: FAIL at %s:%d: %s\n", __FILE__, __LINE__, \
|
||||
#cond); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Resolve via the uncached entry point; return the address family, or
|
||||
AF_UNSPEC if the host did not resolve. */
|
||||
static int resolve_family_nocache(const char *host) {
|
||||
SOCaddr addr;
|
||||
const char *err = NULL;
|
||||
|
||||
if (hts_dns_resolve_nocache2(host, &addr, &err) == NULL)
|
||||
return AF_UNSPEC;
|
||||
return SOCaddr_sinfamily(addr);
|
||||
}
|
||||
|
||||
int dns_selftests(httrackp *opt) {
|
||||
failures = 0;
|
||||
hts_dns_set_resolver_backend(&mock_backend);
|
||||
|
||||
/* IPv4-only / IPv6-only hosts map to the right family. */
|
||||
IPV6_resolver = 0;
|
||||
CHECK(resolve_family_nocache("v4only.test") == AF_INET);
|
||||
CHECK(resolve_family_nocache("v6only.test") == AF_INET6);
|
||||
|
||||
/* Dual-stack: the single-address API returns the *first* resolved address.
|
||||
Both orderings pin selection by position, not a family preference. The
|
||||
multi-address API (resolve_all, below) exposes the whole list. */
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET6); /* v6 listed first */
|
||||
CHECK(resolve_family_nocache("dual4.test") == AF_INET); /* v4 listed first */
|
||||
|
||||
/* Unknown host does not resolve. */
|
||||
CHECK(resolve_family_nocache("nodns.test") == AF_UNSPEC);
|
||||
|
||||
/* Family filter (-@i4 / -@i6) selects v4 / v6 out of the dual-stack host. */
|
||||
IPV6_resolver = 1;
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET);
|
||||
IPV6_resolver = 2;
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET6);
|
||||
IPV6_resolver = 0;
|
||||
|
||||
/* Cached driver resolves a host once and reuses the *same* address. */
|
||||
mock_reset_calls();
|
||||
{
|
||||
SOCaddr a1, a2;
|
||||
char ip1[64], ip2[64];
|
||||
const char *err = NULL;
|
||||
|
||||
CHECK(hts_dns_resolve2(opt, "v4only.test", &a1, &err) != NULL);
|
||||
CHECK(hts_dns_resolve2(opt, "v4only.test", &a2, &err) != NULL);
|
||||
CHECK(mock_find("v4only.test")->calls == 1);
|
||||
/* the cache returns the right address, not merely a hit for the key */
|
||||
SOCaddr_inetntoa(ip1, sizeof(ip1), a1);
|
||||
SOCaddr_inetntoa(ip2, sizeof(ip2), a2);
|
||||
CHECK(strcmp(ip1, "1.2.3.4") == 0);
|
||||
CHECK(strcmp(ip1, ip2) == 0);
|
||||
}
|
||||
|
||||
/* A negative result is cached too: a second lookup does not re-resolve. */
|
||||
{
|
||||
SOCaddr a1, a2;
|
||||
const char *err = NULL;
|
||||
|
||||
CHECK(hts_dns_resolve2(opt, "nodns.test", &a1, &err) == NULL);
|
||||
CHECK(hts_dns_resolve2(opt, "nodns.test", &a2, &err) == NULL);
|
||||
CHECK(mock_find("nodns.test")->calls == 1); /* resolved once, then cached */
|
||||
}
|
||||
|
||||
/* Multi-address resolution: count and order are the connect-fallback
|
||||
contract. A dead first address is retried against the next, so both must be
|
||||
exact. */
|
||||
mock_reset_calls();
|
||||
{
|
||||
SOCaddr addrs[HTS_MAXADDRNUM];
|
||||
char ip[64];
|
||||
const char *err = NULL;
|
||||
|
||||
/* dual-stack, in resolver order: [0]=v6, [1]=v4 */
|
||||
CHECK(hts_dns_resolve_all(opt, "dual.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
2);
|
||||
CHECK(SOCaddr_sinfamily(addrs[0]) == AF_INET6);
|
||||
CHECK(SOCaddr_sinfamily(addrs[1]) == AF_INET);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[1]);
|
||||
CHECK(strcmp(ip, "5.6.7.8") == 0);
|
||||
CHECK(mock_find("dual.test")->calls ==
|
||||
1); /* one backend hit for the list */
|
||||
|
||||
/* single-address host: count 1 */
|
||||
CHECK(hts_dns_resolve_all(opt, "v4only.test", addrs, HTS_MAXADDRNUM,
|
||||
&err) == 1);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[0]);
|
||||
CHECK(strcmp(ip, "1.2.3.4") == 0);
|
||||
|
||||
/* does-not-resolve: count 0 (negative), no addresses */
|
||||
CHECK(hts_dns_resolve_all(opt, "nodns.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
0);
|
||||
|
||||
/* more than the cap: the kept list is clamped to HTS_MAXADDRNUM, keeping
|
||||
the FIRST addresses in resolver order (not some other window) */
|
||||
CHECK(hts_dns_resolve_all(opt, "many.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
HTS_MAXADDRNUM);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[0]);
|
||||
CHECK(strcmp(ip, "10.0.0.1") == 0);
|
||||
SOCaddr_inetntoa(ip, sizeof(ip), addrs[HTS_MAXADDRNUM - 1]);
|
||||
CHECK(strcmp(ip, "10.0.0.4") == 0);
|
||||
|
||||
/* family filter still applies through the list path */
|
||||
IPV6_resolver = 1;
|
||||
CHECK(hts_dns_resolve_all(opt, "dual4.test", addrs, HTS_MAXADDRNUM, &err) ==
|
||||
1);
|
||||
CHECK(SOCaddr_sinfamily(addrs[0]) == AF_INET);
|
||||
IPV6_resolver = 0;
|
||||
}
|
||||
|
||||
/* newhttp_addr() must connect to the addr_index-th address, not always the
|
||||
first: this is what back_connect_next relies on to reach the fallback. */
|
||||
{
|
||||
htsblk r;
|
||||
int count = -1;
|
||||
T_SOC s;
|
||||
|
||||
hts_init_htsblk(&r);
|
||||
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 0, &count);
|
||||
CHECK(count == 2);
|
||||
CHECK(SOCaddr_sinfamily(r.address) == AF_INET6); /* index 0 = v6 */
|
||||
if (s != INVALID_SOCKET)
|
||||
deletesoc(s);
|
||||
|
||||
hts_init_htsblk(&r);
|
||||
count = -1;
|
||||
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 1, &count);
|
||||
CHECK(count == 2);
|
||||
CHECK(SOCaddr_sinfamily(r.address) == AF_INET); /* index 1 = v4 */
|
||||
if (s != INVALID_SOCKET)
|
||||
deletesoc(s);
|
||||
|
||||
/* out-of-range index: no address selected (address stays unset) */
|
||||
hts_init_htsblk(&r);
|
||||
s = newhttp_addr(opt, "dual.test", &r, 80, 0, 2, NULL);
|
||||
CHECK(s == INVALID_SOCKET);
|
||||
if (s != INVALID_SOCKET)
|
||||
deletesoc(s);
|
||||
}
|
||||
|
||||
/* Connect-fallback decision (consumer of the multi-address list): when a
|
||||
stuck connect should abandon the current address for the next one. */
|
||||
{
|
||||
/* no fallback for the last/only candidate, whatever the elapsed time */
|
||||
CHECK(back_connect_fallback_due(0, 1, 9999, 120) == 0);
|
||||
CHECK(back_connect_fallback_due(1, 2, 9999, 120) == 0);
|
||||
CHECK(back_connect_fallback_due(3, 4, 9999, 120) == 0);
|
||||
/* fallback available: wait the per-candidate deadline (cap 10s here) */
|
||||
CHECK(back_connect_fallback_due(0, 2, 9, 120) == 0);
|
||||
CHECK(back_connect_fallback_due(0, 2, 10, 120) == 1);
|
||||
CHECK(back_connect_fallback_due(2, 4, 10, 120) == 1);
|
||||
/* a shorter slot timeout shortens the deadline (min(timeout, cap)) */
|
||||
CHECK(back_connect_fallback_due(0, 2, 4, 5) == 0);
|
||||
CHECK(back_connect_fallback_due(0, 2, 5, 5) == 1);
|
||||
/* no timeout management: never force a fallback */
|
||||
CHECK(back_connect_fallback_due(0, 2, 9999, 0) == 0);
|
||||
}
|
||||
|
||||
hts_dns_set_resolver_backend(NULL);
|
||||
return failures;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int dns_selftests(httrackp *opt) {
|
||||
(void) opt;
|
||||
return 0; /* resolver seam only exists in the IPv6 build */
|
||||
}
|
||||
|
||||
#endif
|
||||
51
src/htsdns_selftest.h
Normal file
51
src/htsdns_selftest.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/* ------------------------------------------------------------ */
|
||||
/*
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 2026 Xavier Roche and other contributors
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Ethical use: we kindly ask that you NOT use this software to harvest email
|
||||
addresses or to collect any other private information about people. Doing so
|
||||
would dishonor our work and waste the many hours we have spent on it.
|
||||
|
||||
Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* File: htsdns_selftest.h */
|
||||
/* Author: Xavier Roche */
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
#ifndef HTSDNS_SELFTEST_DEFH
|
||||
#define HTSDNS_SELFTEST_DEFH
|
||||
|
||||
#ifdef HTS_INTERNAL_BYTECODE
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
||||
#define HTS_DEF_FWSTRUCT_httrackp
|
||||
typedef struct httrackp httrackp;
|
||||
#endif
|
||||
|
||||
/* Drive the DNS resolver and cache through a scripted (mock) getaddrinfo,
|
||||
asserting address family, single-address selection, negative caching, the
|
||||
IPv4/IPv6 family filter, and that a cached host is resolved only once.
|
||||
Returns the number of failed checks (0 == success). */
|
||||
int dns_selftests(httrackp *opt);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -43,8 +43,8 @@ Please visit our Website: http://www.httrack.com
|
||||
configure.ac, decoupled from these). VERSION is the display form, VERSIONID
|
||||
the dotted numeric form, AFF_VERSION the short form shown in footers,
|
||||
LIB_VERSION the data/cache format generation. */
|
||||
#define HTTRACK_VERSION "3.49-8"
|
||||
#define HTTRACK_VERSIONID "3.49.8"
|
||||
#define HTTRACK_VERSION "3.49-9"
|
||||
#define HTTRACK_VERSIONID "3.49.9"
|
||||
#define HTTRACK_AFF_VERSION "3.x"
|
||||
#define HTTRACK_LIB_VERSION "2.0"
|
||||
|
||||
@@ -247,13 +247,23 @@ Please visit our Website: http://www.httrack.com
|
||||
#define HTS_NOPARAM "(none)"
|
||||
#define HTS_NOPARAM2 "\"(none)\""
|
||||
|
||||
/* Boolean flag for option fields and API yes/no returns. An enum (not C bool)
|
||||
so it stays int-sized: option fields keep the httrackp layout/ABI, and a
|
||||
return type stays compatible with the int it replaces. */
|
||||
/* Boolean flag for option fields and API yes/no returns. Int-backed, not an
|
||||
enum: an enum makes C++ reject `field = 1` / `f(0)` on the exported fields
|
||||
and params. Int-sized, so the httrackp layout and the ABI are unchanged. */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
#define HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
|
||||
typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
typedef int hts_boolean;
|
||||
#define HTS_FALSE 0
|
||||
#define HTS_TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
#define HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
/* Tri-state hts_boolean: HTS_DEFAULT (-1) = "unspecified" (copy_htsopt leaves
|
||||
the target untouched); HTS_FALSE/HTS_TRUE = off/on. */
|
||||
typedef int hts_tristate;
|
||||
#define HTS_DEFAULT (-1)
|
||||
#endif
|
||||
|
||||
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||
@@ -398,6 +408,10 @@ typedef int T_SOC;
|
||||
/* Buffer size for a printed network address (IPv4 or IPv6, NUL included). */
|
||||
#define HTS_MAXADDRLEN 64
|
||||
|
||||
/* Max resolved addresses kept per host for connect fallback (dead IPv6 etc.).
|
||||
*/
|
||||
#define HTS_MAXADDRNUM 4
|
||||
|
||||
#ifdef _WIN32
|
||||
#else
|
||||
#define __cdecl
|
||||
|
||||
446
src/htslib.c
446
src/htslib.c
@@ -1423,7 +1423,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
else
|
||||
infostatuscode(retour->msg, retour->statuscode);
|
||||
// type MIME par défaut2
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
} else { // pas de code!
|
||||
retour->statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(retour->msg, "Unknown response structure");
|
||||
@@ -1438,7 +1438,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
retour->statuscode = HTTP_OK;
|
||||
retour->keep_alive = 0;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
} else if (strnotempty(a)) {
|
||||
retour->statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(retour->msg, "Unknown (not HTTP/xx) response structure");
|
||||
@@ -1447,7 +1447,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
retour->statuscode = HTTP_OK;
|
||||
retour->keep_alive = 0;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
}
|
||||
}
|
||||
} else { // vide!
|
||||
@@ -1458,7 +1458,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
/* This is dirty .. */
|
||||
retour->statuscode = HTTP_OK;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1589,11 +1589,15 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
|
||||
}
|
||||
}
|
||||
}
|
||||
sscanf(rcvd + p, "%s", tempo);
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype, "application/octet-stream-unknown"); // erreur
|
||||
// An empty/whitespace Content-Type value yields no token: keep the
|
||||
// sentinel default rather than reading an uninitialized tempo.
|
||||
if (sscanf(rcvd + p, "%s", tempo) == 1) {
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype,
|
||||
"application/octet-stream-unknown"); // erreur
|
||||
}
|
||||
}
|
||||
} else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
|
||||
// Content-Range: bytes 0-70870/70871
|
||||
@@ -2293,14 +2297,27 @@ htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) {
|
||||
// peut ouvrir avec des connect() non bloquants: waitconnect=0/1
|
||||
T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
||||
int waitconnect) {
|
||||
return newhttp_addr(opt, _iadr, retour, port, waitconnect, 0, NULL);
|
||||
}
|
||||
|
||||
T_SOC newhttp_addr(httrackp *opt, const char *_iadr, htsblk *retour, int port,
|
||||
int waitconnect, int addr_index, int *addr_count) {
|
||||
T_SOC soc; // descipteur de la socket
|
||||
|
||||
if (addr_count != NULL) {
|
||||
*addr_count = 0;
|
||||
}
|
||||
|
||||
if (strcmp(_iadr, "file://") != 0) { /* non fichier */
|
||||
SOCaddr server;
|
||||
SOCaddr addrs[HTS_MAXADDRNUM];
|
||||
int naddr;
|
||||
const char *error = "unknown error";
|
||||
|
||||
// tester un éventuel id:pass et virer id:pass@ si détecté
|
||||
const char *const iadr = jump_identification_const(_iadr);
|
||||
const char *resolve_host = iadr;
|
||||
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
|
||||
|
||||
SOCaddr_clear(server);
|
||||
|
||||
@@ -2322,7 +2339,6 @@ T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
||||
#endif
|
||||
|
||||
if (a != NULL) {
|
||||
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
|
||||
int i = -1;
|
||||
|
||||
iadr2[0] = '\0';
|
||||
@@ -2333,18 +2349,19 @@ T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
||||
|
||||
// adresse véritable (sans :xx)
|
||||
strncatbuff(iadr2, iadr, (int) (a - iadr));
|
||||
|
||||
// adresse sans le :xx
|
||||
hts_dns_resolve2(opt, iadr2, &server, &error);
|
||||
|
||||
} else {
|
||||
|
||||
// adresse normale (port par défaut par la suite)
|
||||
hts_dns_resolve2(opt, iadr, &server, &error);
|
||||
resolve_host = iadr2;
|
||||
}
|
||||
}
|
||||
|
||||
} else { // port défini
|
||||
hts_dns_resolve2(opt, iadr, &server, &error);
|
||||
// resolve the full address list and pick the requested candidate; the
|
||||
// scheduler retries the next index when a connect fails (dead IPv6 etc.)
|
||||
naddr =
|
||||
hts_dns_resolve_all(opt, resolve_host, addrs, HTS_MAXADDRNUM, &error);
|
||||
if (addr_count != NULL) {
|
||||
*addr_count = naddr;
|
||||
}
|
||||
if (addr_index >= 0 && addr_index < naddr) {
|
||||
SOCaddr_copy_SOCaddr(server, addrs[addr_index]);
|
||||
}
|
||||
|
||||
if (!SOCaddr_is_valid(server)) {
|
||||
@@ -4310,6 +4327,7 @@ int give_mimext(char *s, size_t ssize, const char *st) {
|
||||
int ok = 0;
|
||||
int j = 0;
|
||||
|
||||
st = hts_effective_mime(st); /* no declared type: derive an html ext */
|
||||
s[0] = '\0';
|
||||
while((!ok) && (strnotempty(hts_mime[j][1]))) {
|
||||
if (strfield2(hts_mime[j][0], st)) {
|
||||
@@ -4779,14 +4797,14 @@ void hts_cache_free(t_dnscache *const root) {
|
||||
// -1: status? 0: libérer 1:locker
|
||||
|
||||
// MUST BE LOCKED
|
||||
// routine pour le cache - retour optionnel à donner à chaque fois
|
||||
// NULL: nom non encore testé dans le cache
|
||||
// si h_length==0 alors le nom n'existe pas dans le dns
|
||||
static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCaddr *const addr) {
|
||||
assertf(addr != NULL);
|
||||
// Look up iadr in the DNS cache, filling out[0..min(count,max)-1].
|
||||
// Returns: -1 not yet tested; 0 negative-cached (not in DNS); >0 address count.
|
||||
static int hts_ghbn_all(const t_dnscache *cache, const char *const iadr,
|
||||
SOCaddr *const out, const int max) {
|
||||
assertf(out != NULL);
|
||||
assertf(iadr != NULL);
|
||||
if (*iadr == '\0') {
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
/* first entry is empty */
|
||||
if (cache->iadr == NULL) {
|
||||
@@ -4797,95 +4815,263 @@ static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCadd
|
||||
assertf(cache->iadr != NULL);
|
||||
assertf(cache->iadr == (const char*) cache + sizeof(t_dnscache));
|
||||
if (strcmp(cache->iadr, iadr) == 0) { // ok trouvé
|
||||
if (cache->host_length != 0) { // entrée valide
|
||||
assertf(cache->host_length <= sizeof(cache->host_addr));
|
||||
SOCaddr_copyaddr2(*addr, cache->host_addr, cache->host_length);
|
||||
return addr;
|
||||
} else { // erreur dans le dns, déja vérifié
|
||||
SOCaddr_clear(*addr);
|
||||
return addr;
|
||||
int i;
|
||||
|
||||
assertf(cache->host_count <= HTS_MAXADDRNUM);
|
||||
for (i = 0; i < cache->host_count && i < max; i++) {
|
||||
assertf(cache->host_length[i] <= sizeof(cache->host_addr[i]));
|
||||
SOCaddr_copyaddr2(out[i], cache->host_addr[i], cache->host_length[i]);
|
||||
}
|
||||
return cache->host_count;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
|
||||
SOCaddr *const addr,
|
||||
const char **error) {
|
||||
#if HTS_INET6 != 0
|
||||
/* Active resolver backend; defaults to the libc resolver. The self-test
|
||||
reroutes it to script DNS answers in-process (see
|
||||
hts_dns_set_resolver_backend). */
|
||||
static const hts_resolver_backend hts_resolver_libc = {getaddrinfo,
|
||||
freeaddrinfo};
|
||||
static const hts_resolver_backend *hts_resolver = &hts_resolver_libc;
|
||||
|
||||
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend) {
|
||||
hts_resolver = (backend != NULL) ? backend : &hts_resolver_libc;
|
||||
}
|
||||
|
||||
/* Debug/test hook: HTTRACK_DEBUG_RESOLVE="host:ip[,ip...]" pins the resolution
|
||||
of `host` to the listed addresses (curl --resolve style), so the connect
|
||||
fallback can be exercised deterministically (a dead address first, a live one
|
||||
next). Any other host resolves normally. Below: an addrinfo backend that owns
|
||||
its chain (its own freeaddrinfo), so a synthesized and a delegated result
|
||||
free the same way. */
|
||||
|
||||
/* Deep-copy a libc addrinfo chain into our own allocations. */
|
||||
static struct addrinfo *resolver_dup_chain(const struct addrinfo *src) {
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
|
||||
for (; src != NULL; src = src->ai_next) {
|
||||
struct addrinfo *const ai = calloct(1, sizeof(*ai));
|
||||
|
||||
ai->ai_family = src->ai_family;
|
||||
ai->ai_socktype = src->ai_socktype;
|
||||
ai->ai_protocol = src->ai_protocol;
|
||||
ai->ai_addrlen = src->ai_addrlen;
|
||||
ai->ai_addr = malloct(src->ai_addrlen);
|
||||
memcpy(ai->ai_addr, src->ai_addr, src->ai_addrlen);
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
/* Build one addrinfo node from an IPv4/IPv6 literal, or NULL if it does not
|
||||
parse or is filtered out by want_family (AF_INET/AF_INET6/PF_UNSPEC). */
|
||||
static struct addrinfo *resolver_make_ai(const char *ip, int want_family) {
|
||||
struct addrinfo *ai;
|
||||
|
||||
if (strchr(ip, ':') != NULL) { // IPv6 literal
|
||||
struct sockaddr_in6 sa6;
|
||||
|
||||
if (want_family != PF_UNSPEC && want_family != AF_INET6)
|
||||
return NULL;
|
||||
memset(&sa6, 0, sizeof(sa6));
|
||||
if (inet_pton(AF_INET6, ip, &sa6.sin6_addr) != 1)
|
||||
return NULL;
|
||||
sa6.sin6_family = AF_INET6;
|
||||
ai = calloct(1, sizeof(*ai));
|
||||
ai->ai_family = AF_INET6;
|
||||
ai->ai_addrlen = sizeof(sa6);
|
||||
ai->ai_addr = malloct(sizeof(sa6));
|
||||
memcpy(ai->ai_addr, &sa6, sizeof(sa6));
|
||||
} else { // IPv4 literal
|
||||
struct sockaddr_in sa;
|
||||
|
||||
if (want_family != PF_UNSPEC && want_family != AF_INET)
|
||||
return NULL;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
if (inet_pton(AF_INET, ip, &sa.sin_addr) != 1)
|
||||
return NULL;
|
||||
sa.sin_family = AF_INET;
|
||||
ai = calloct(1, sizeof(*ai));
|
||||
ai->ai_family = AF_INET;
|
||||
ai->ai_addrlen = sizeof(sa);
|
||||
ai->ai_addr = malloct(sizeof(sa));
|
||||
memcpy(ai->ai_addr, &sa, sizeof(sa));
|
||||
}
|
||||
return ai;
|
||||
}
|
||||
|
||||
static void override_freeaddrinfo(struct addrinfo *res) {
|
||||
while (res != NULL) {
|
||||
struct addrinfo *const next = res->ai_next;
|
||||
|
||||
freet(res->ai_addr);
|
||||
freet(res);
|
||||
res = next;
|
||||
}
|
||||
}
|
||||
|
||||
static int override_getaddrinfo(const char *node, const char *service,
|
||||
const struct addrinfo *hints,
|
||||
struct addrinfo **res) {
|
||||
const char *const spec = getenv("HTTRACK_DEBUG_RESOLVE");
|
||||
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
|
||||
const char *colon;
|
||||
|
||||
*res = NULL;
|
||||
if (spec != NULL && node != NULL && (colon = strchr(spec, ':')) != NULL &&
|
||||
(size_t) (colon - spec) == strlen(node) &&
|
||||
strncmp(spec, node, colon - spec) == 0) {
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
char buf[256];
|
||||
char *p;
|
||||
|
||||
buf[0] = '\0';
|
||||
strncatbuff(buf, colon + 1, sizeof(buf) - 1);
|
||||
for (p = strtok(buf, ","); p != NULL; p = strtok(NULL, ",")) {
|
||||
struct addrinfo *const ai = resolver_make_ai(p, want);
|
||||
|
||||
if (ai != NULL) {
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
}
|
||||
if (head == NULL)
|
||||
return EAI_NONAME;
|
||||
*res = head;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* not overridden: delegate to libc, copying into our owned format */
|
||||
{
|
||||
#if HTS_INET6==0
|
||||
/* IPv4 resolver */
|
||||
struct hostent *const hp = gethostbyname(hostname);
|
||||
struct addrinfo *sys = NULL;
|
||||
int gerr = getaddrinfo(node, service, hints, &sys);
|
||||
|
||||
if (hp != NULL) {
|
||||
SOCaddr_copyaddr2(addr, hp->h_addr_list[0], hp->h_length);
|
||||
return SOCaddr_is_valid(addr) ? &addr : NULL;
|
||||
} else {
|
||||
SOCaddr_clear(*addr);
|
||||
}
|
||||
#else
|
||||
/* IPv6 resolver */
|
||||
struct addrinfo *res = NULL;
|
||||
struct addrinfo hints;
|
||||
int gerr;
|
||||
|
||||
SOCaddr_clear(*addr);
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
|
||||
hints.ai_family = PF_INET;
|
||||
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
|
||||
hints.ai_family = PF_INET6;
|
||||
else // V4 + V6
|
||||
hints.ai_family = PF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_protocol = IPPROTO_TCP;
|
||||
if ( ( gerr = getaddrinfo(hostname, NULL, &hints, &res) ) == 0) {
|
||||
if (res != NULL) {
|
||||
if (res->ai_addr != NULL && res->ai_addrlen != 0) {
|
||||
SOCaddr_copyaddr2(*addr, res->ai_addr, res->ai_addrlen);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (error != NULL) {
|
||||
*error = gai_strerror(gerr);
|
||||
}
|
||||
}
|
||||
if (res) {
|
||||
freeaddrinfo(res);
|
||||
}
|
||||
#endif
|
||||
if (gerr != 0)
|
||||
return gerr;
|
||||
*res = resolver_dup_chain(sys);
|
||||
freeaddrinfo(sys);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
||||
}
|
||||
|
||||
HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
|
||||
SOCaddr *const addr, const char **error) {
|
||||
/* Protection */
|
||||
if (!strnotempty(hostname)) {
|
||||
return NULL;
|
||||
}
|
||||
static const hts_resolver_backend hts_resolver_override = {
|
||||
override_getaddrinfo, override_freeaddrinfo};
|
||||
|
||||
/*
|
||||
Strip [] if any : [3ffe:b80:1234:1::1]
|
||||
The resolver doesn't seem to handle IP6 addresses in brackets
|
||||
*/
|
||||
/* Install the env override once, unless a backend was already set (self-test).
|
||||
*/
|
||||
static void hts_resolver_check_env(void) {
|
||||
static int checked = 0;
|
||||
|
||||
if (!checked) {
|
||||
checked = 1;
|
||||
if (hts_resolver == &hts_resolver_libc &&
|
||||
getenv("HTTRACK_DEBUG_RESOLVE") != NULL) {
|
||||
hts_resolver = &hts_resolver_override;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Resolve hostname into up to max addresses (resolver/RFC 6724 order), no
|
||||
// cache. Returns the count copied into out[0..count-1]; 0 = does not resolve.
|
||||
static int hts_dns_resolve_nocache_list_(const char *const hostname,
|
||||
SOCaddr *const out, const int max,
|
||||
const char **error) {
|
||||
int count = 0;
|
||||
|
||||
#if HTS_INET6==0
|
||||
/* IPv4 resolver */
|
||||
struct hostent *const hp = gethostbyname(hostname);
|
||||
|
||||
if (hp != NULL) {
|
||||
char **h;
|
||||
|
||||
for (h = hp->h_addr_list; count < max && h != NULL && *h != NULL; h++) {
|
||||
SOCaddr_clear(out[count]);
|
||||
SOCaddr_copyaddr2(out[count], *h, hp->h_length);
|
||||
if (SOCaddr_is_valid(out[count]))
|
||||
count++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* IPv6 resolver */
|
||||
struct addrinfo *res = NULL, *cur;
|
||||
struct addrinfo hints;
|
||||
int gerr;
|
||||
|
||||
hts_resolver_check_env();
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
|
||||
hints.ai_family = PF_INET;
|
||||
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
|
||||
hints.ai_family = PF_INET6;
|
||||
else // V4 + V6
|
||||
hints.ai_family = PF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_protocol = IPPROTO_TCP;
|
||||
if ((gerr = hts_resolver->getaddrinfo(hostname, NULL, &hints, &res)) == 0) {
|
||||
for (cur = res; cur != NULL && count < max; cur = cur->ai_next) {
|
||||
if (cur->ai_addr != NULL && cur->ai_addrlen != 0) {
|
||||
SOCaddr_clear(out[count]);
|
||||
SOCaddr_copyaddr2(out[count], cur->ai_addr, cur->ai_addrlen);
|
||||
if (SOCaddr_is_valid(out[count]))
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else if (error != NULL) {
|
||||
*error = gai_strerror(gerr);
|
||||
}
|
||||
if (res) {
|
||||
hts_resolver->freeaddrinfo(res);
|
||||
}
|
||||
#endif
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
// Strip [] around a literal IPv6 ([3ffe:b80:1234:1::1]) the resolver won't
|
||||
// take, then resolve into a list. Returns the count.
|
||||
static int hts_dns_resolve_nocache_list(const char *const hostname,
|
||||
SOCaddr *const out, const int max,
|
||||
const char **error) {
|
||||
if (!strnotempty(hostname) || max <= 0) {
|
||||
return 0;
|
||||
}
|
||||
if ((hostname[0] == '[') && (hostname[strlen(hostname) - 1] == ']')) {
|
||||
SOCaddr *ret;
|
||||
size_t size = strlen(hostname);
|
||||
char *copy = malloct(size + 1);
|
||||
int count;
|
||||
|
||||
assertf(copy != NULL);
|
||||
copy[0] = '\0';
|
||||
strncat(copy, hostname + 1, size - 2);
|
||||
ret = hts_dns_resolve_nocache2_(copy, addr, error);
|
||||
count = hts_dns_resolve_nocache_list_(copy, out, max, error);
|
||||
freet(copy);
|
||||
return ret;
|
||||
return count;
|
||||
} else {
|
||||
return hts_dns_resolve_nocache2_(hostname, addr, error);
|
||||
return hts_dns_resolve_nocache_list_(hostname, out, max, error);
|
||||
}
|
||||
}
|
||||
|
||||
HTSEXT_API SOCaddr *hts_dns_resolve_nocache2(const char *const hostname,
|
||||
SOCaddr *const addr,
|
||||
const char **error) {
|
||||
SOCaddr_clear(*addr);
|
||||
if (hts_dns_resolve_nocache_list(hostname, addr, 1, error) > 0) {
|
||||
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
HTSEXT_API SOCaddr* hts_dns_resolve_nocache(const char *const hostname, SOCaddr *const addr) {
|
||||
return hts_dns_resolve_nocache2(hostname, addr, NULL);
|
||||
}
|
||||
@@ -4896,16 +5082,18 @@ HTSEXT_API int check_hostname_dns(const char *const hostname) {
|
||||
}
|
||||
|
||||
// Needs locking
|
||||
// cache dns interne à HTS // ** FREE A FAIRE sur la chaine
|
||||
static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
SOCaddr *const addr, const char **error) {
|
||||
// Internal DNS cache. Fill out[0..count-1] with up to max addresses for _iadr,
|
||||
// resolving (and caching the full list) on a miss. Returns the count.
|
||||
static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
|
||||
SOCaddr *const out, const int max,
|
||||
const char **error) {
|
||||
char BIGSTK iadr[HTS_URLMAXSIZE * 2];
|
||||
t_dnscache *cache = hts_cache(opt); // adresse du cache
|
||||
SOCaddr *sa;
|
||||
int count;
|
||||
|
||||
assertf(opt != NULL);
|
||||
assertf(_iadr != NULL);
|
||||
assertf(addr != NULL);
|
||||
assertf(out != NULL);
|
||||
|
||||
strcpybuff(iadr, jump_identification_const(_iadr));
|
||||
// couper éventuel :
|
||||
@@ -4917,11 +5105,13 @@ static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
}
|
||||
|
||||
/* get IP from the dns cache */
|
||||
sa = hts_ghbn(cache, iadr, addr);
|
||||
if (sa != NULL) {
|
||||
return SOCaddr_is_valid(*sa) ? sa : NULL;
|
||||
} else { // non présent dans le cache dns, tester
|
||||
count = hts_ghbn_all(cache, iadr, out, max);
|
||||
if (count >= 0) { // cache hit (0 == negative-cached)
|
||||
return count;
|
||||
} else { // non présent dans le cache dns, tester
|
||||
const size_t iadr_len = strlen(iadr) + 1;
|
||||
SOCaddr resolved[HTS_MAXADDRNUM];
|
||||
int i;
|
||||
|
||||
// find queue
|
||||
for(; cache->next != NULL; cache = cache->next) ;
|
||||
@@ -4930,7 +5120,7 @@ static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
printf("resolving (not cached) %s\n", iadr);
|
||||
#endif
|
||||
|
||||
sa = hts_dns_resolve_nocache2(iadr, addr, error); // calculer IP host
|
||||
count = hts_dns_resolve_nocache_list(iadr, resolved, HTS_MAXADDRNUM, error);
|
||||
|
||||
#if HTS_WIDE_DEBUG
|
||||
DEBUG_W("gethostbyname done\n");
|
||||
@@ -4944,28 +5134,45 @@ static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
|
||||
char *const str = block + sizeof(t_dnscache);
|
||||
memcpy(str, iadr, iadr_len);
|
||||
next->iadr = str;
|
||||
if (sa != NULL) {
|
||||
next->host_length = SOCaddr_size(*sa);
|
||||
assertf(next->host_length <= sizeof(next->host_addr));
|
||||
memcpy(next->host_addr, &SOCaddr_sockaddr(*sa), next->host_length);
|
||||
} else {
|
||||
next->host_length = 0; // non existant dans le dns
|
||||
next->host_count = count;
|
||||
for (i = 0; i < count; i++) {
|
||||
next->host_length[i] = SOCaddr_size(resolved[i]);
|
||||
assertf(next->host_length[i] <= sizeof(next->host_addr[i]));
|
||||
memcpy(next->host_addr[i], &SOCaddr_sockaddr(resolved[i]),
|
||||
next->host_length[i]);
|
||||
}
|
||||
next->next = NULL;
|
||||
return sa;
|
||||
}
|
||||
|
||||
/* return result if any */
|
||||
return sa;
|
||||
} // retour hp du cache
|
||||
/* copy result to caller (cache store may have failed; result still valid)
|
||||
*/
|
||||
for (i = 0; i < count && i < max; i++) {
|
||||
SOCaddr_copy_SOCaddr(out[i], resolved[i]);
|
||||
}
|
||||
return count;
|
||||
} // retour hp du cache
|
||||
}
|
||||
|
||||
SOCaddr* hts_dns_resolve2(httrackp * opt, const char *_iadr, SOCaddr *const addr, const char **error) {
|
||||
SOCaddr *ret;
|
||||
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
|
||||
const char **error) {
|
||||
int count;
|
||||
|
||||
if (!strnotempty(iadr) || max <= 0) {
|
||||
return 0;
|
||||
}
|
||||
hts_mutexlock(&opt->state.lock);
|
||||
ret = hts_dns_resolve_(opt, _iadr, addr, error);
|
||||
count = hts_dns_resolve_list_(opt, iadr, out, max, error);
|
||||
hts_mutexrelease(&opt->state.lock);
|
||||
return ret;
|
||||
return count;
|
||||
}
|
||||
|
||||
SOCaddr *hts_dns_resolve2(httrackp *opt, const char *_iadr, SOCaddr *const addr,
|
||||
const char **error) {
|
||||
SOCaddr_clear(*addr);
|
||||
if (hts_dns_resolve_all(opt, _iadr, addr, 1, error) > 0) {
|
||||
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SOCaddr* hts_dns_resolve(httrackp * opt, const char *_iadr, SOCaddr *const addr) {
|
||||
@@ -5300,6 +5507,11 @@ static int get_loglevel_from_coucal(coucal_loglevel level) {
|
||||
static void default_coucal_loghandler(void *arg, coucal_loglevel level,
|
||||
const char* format, va_list args) {
|
||||
|
||||
/* informational chatter (hashtable stats on delete, etc.) only when
|
||||
debugging; keep warnings and critical errors always visible. */
|
||||
if (level > coucal_log_warning && hts_dgb_init <= 0) {
|
||||
return;
|
||||
}
|
||||
if (level <= coucal_log_warning) {
|
||||
fprintf(stderr, "** warning: ");
|
||||
}
|
||||
|
||||
43
src/htslib.h
43
src/htslib.h
@@ -150,8 +150,11 @@ typedef struct t_dnscache t_dnscache;
|
||||
struct t_dnscache {
|
||||
struct t_dnscache *next;
|
||||
const char *iadr;
|
||||
size_t host_length; // length ; (4 or 16) ; 0 for error
|
||||
char host_addr[HTS_MAXADDRLEN];
|
||||
// resolved addresses, in resolver (RFC 6724) order; host_count==0 means the
|
||||
// name does not resolve (negative cache). host_count<=HTS_MAXADDRNUM.
|
||||
int host_count;
|
||||
size_t host_length[HTS_MAXADDRNUM]; // sockaddr length of each (16 or 28)
|
||||
char host_addr[HTS_MAXADDRNUM][HTS_MAXADDRLEN];
|
||||
};
|
||||
|
||||
/* Library internal definictions */
|
||||
@@ -191,6 +194,13 @@ int http_cookie_header_selftest(t_cookie *cookie, const char *domain,
|
||||
//int newhttp(char* iadr,char* err=NULL);
|
||||
T_SOC newhttp(httrackp * opt, const char *iadr, htsblk * retour, int port,
|
||||
int waitconnect);
|
||||
/* Like newhttp(), but connect to the addr_index-th resolved address of the host
|
||||
(0-based) instead of always the first; *addr_count, if non-NULL, is set to
|
||||
the total resolved addresses. newhttp() == newhttp_addr(...,0,NULL). Used by
|
||||
the slot scheduler to try the next address when a connect fails (dead IPv6
|
||||
etc.). */
|
||||
T_SOC newhttp_addr(httrackp *opt, const char *iadr, htsblk *retour, int port,
|
||||
int waitconnect, int addr_index, int *addr_count);
|
||||
HTS_INLINE void deletehttp(htsblk * r);
|
||||
HTS_INLINE int deleteaddr(htsblk * r);
|
||||
HTS_INLINE void deletesoc(T_SOC soc);
|
||||
@@ -215,9 +225,14 @@ void treatfirstline(htsblk * retour, const char *rcvd);
|
||||
|
||||
// sous-fonctions
|
||||
LLint http_xfread1(htsblk * r, int bufl);
|
||||
HTS_INLINE SOCaddr* hts_dns_resolve2(httrackp * opt, const char *iadr,
|
||||
SOCaddr *const addr,
|
||||
const char **error);
|
||||
/* Cached resolver: fill out[0..count-1] with up to max addresses for iadr (in
|
||||
resolver order), returning the count (0 = does not resolve, negative-cached).
|
||||
Resolves once per host; later calls read the DNS cache. Must hold no lock
|
||||
(brackets opt->state.lock itself). */
|
||||
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
|
||||
const char **error);
|
||||
HTS_INLINE SOCaddr *hts_dns_resolve2(httrackp *opt, const char *iadr,
|
||||
SOCaddr *const addr, const char **error);
|
||||
HTS_INLINE SOCaddr* hts_dns_resolve(httrackp * opt, const char *iadr,
|
||||
SOCaddr *const addr);
|
||||
HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
|
||||
@@ -481,10 +496,22 @@ HTS_STATIC int strcmpnocase(const char *a, const char *b) {
|
||||
|
||||
// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
|
||||
#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
|
||||
/* Sentinel stored when the server declared no Content-Type. It is html-ish
|
||||
for every type test (so a typeless response still parses/stores as today),
|
||||
but the naming code (wire_patches_ext) treats it as "no declared type" and
|
||||
keeps the URL extension. It rides the cache, so updates name consistently. */
|
||||
#define HTS_UNKNOWN_MIME "unknown/unknown"
|
||||
/* Map the no-declared-type sentinel back to a real type for any header or
|
||||
record we EMIT or PERSIST, so "unknown/unknown" never reaches a consumer
|
||||
(a served Content-Type, a ProxyTrack .arc record, ...). */
|
||||
#define hts_effective_mime(m) \
|
||||
(strfield2((m), HTS_UNKNOWN_MIME) ? HTS_HYPERTEXT_DEFAULT_MIME : (m))
|
||||
|
||||
#define is_html_mime_type(a) \
|
||||
( (strfield2((a),"text/html")!=0)\
|
||||
|| (strfield2((a),"application/xhtml+xml")!=0) \
|
||||
#define is_html_mime_type(a) \
|
||||
((strfield2((a), "text/html") != 0) || \
|
||||
(strfield2((a), "application/xhtml+xml") != 0) || \
|
||||
(strfield2((a), HTS_UNKNOWN_MIME) != \
|
||||
0) /* no declared type: treat as html */ \
|
||||
)
|
||||
#define is_hypertext_mime__(a) \
|
||||
( \
|
||||
|
||||
@@ -138,6 +138,35 @@ static void cleanEndingSpaceOrDot(char *s) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Should the wire Content-Type override the URL's own extension when naming the
|
||||
saved file? True when the type is patchable (may_unknown2) and either the URL
|
||||
extension implies no specific type or the server declared a disagreeing one.
|
||||
A URL extension mapping to a specific non-HTML type is kept only when the
|
||||
server declared NO type (the HTS_UNKNOWN_MIME sentinel; the #267 mangle
|
||||
guard): a typeless .png stays .png, but a .pdf explicitly served as text/html
|
||||
is named .html. The sentinel rides the cache, so updates stay consistent. */
|
||||
static int wire_patches_ext(httrackp *opt, const char *wiremime,
|
||||
const char *file) {
|
||||
char urlmime[256];
|
||||
|
||||
if (may_unknown2(opt, wiremime, file))
|
||||
return 0; /* type kept verbatim (keep-list / bogus-multiple) */
|
||||
urlmime[0] = '\0';
|
||||
/* type implied by the URL extension, only when confidently known (flag 0) */
|
||||
if (!get_httptype_sized(opt, urlmime, sizeof(urlmime), file, 0))
|
||||
return 1; /* URL ext implies no known type: trust the wire type */
|
||||
if (strfield2(wiremime, urlmime))
|
||||
return 0; /* wire agrees with the ext: keep it (no .htm->.html churn) */
|
||||
/* wire disagrees with a specific non-HTML URL ext. Keep the ext only when
|
||||
the server declared no type (the sentinel); an explicitly declared type,
|
||||
even text/html, is trusted, so a binary-looking URL that really serves
|
||||
HTML (login/error interstitial, soft-404) is named .html. */
|
||||
if (!is_hypertext_mime(opt, urlmime, file) &&
|
||||
strfield2(wiremime, HTS_UNKNOWN_MIME))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// forme le nom du fichier à sauver (save) à partir de fil et adr
|
||||
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
|
||||
int url_savename(lien_adrfilsave *const afs,
|
||||
@@ -325,7 +354,10 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
|
||||
/* replace shtml to html.. */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD)
|
||||
/* HARD delays every type, except one the user pinned with --assume: honor it
|
||||
immediately (ishtml() consults the user type), no delayed name (#56) */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD &&
|
||||
!is_userknowntype(opt, fil))
|
||||
is_html = -1; /* ALWAYS delay type */
|
||||
else
|
||||
is_html = ishtml(opt, fil);
|
||||
@@ -380,7 +412,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, r.cdispo);
|
||||
} else if (!may_unknown2(opt, r.contenttype, fil)) { // on peut patcher à priori?
|
||||
} else if (wire_patches_ext(opt, r.contenttype, fil)) {
|
||||
if (give_mimext(s, sizeof(s),
|
||||
r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
@@ -425,7 +457,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(headers->r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, headers->r.cdispo);
|
||||
} else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
} else if (wire_patches_ext(opt, headers->r.contenttype,
|
||||
headers->url_fil)) {
|
||||
char s[16];
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
@@ -641,7 +674,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (!has_been_moved) {
|
||||
if (back[b].r.statuscode != -10) { // erreur
|
||||
if (strnotempty(back[b].r.contenttype) == 0)
|
||||
strcpybuff(back[b].r.contenttype, "text/html"); // message d'erreur en html
|
||||
strcpybuff(back[b].r.contenttype,
|
||||
HTS_UNKNOWN_MIME); // no declared type
|
||||
// Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
|
||||
// libérer emplacement backing
|
||||
}
|
||||
@@ -653,7 +687,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(back[b].r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, back[b].r.cdispo);
|
||||
} else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
} else if (wire_patches_ext(opt, back[b].r.contenttype,
|
||||
back[b].url_fil)) {
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
back[b].r.contenttype)) { // recognized extension
|
||||
|
||||
16
src/htsnet.h
16
src/htsnet.h
@@ -304,6 +304,22 @@ static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||
/** Length type for socket APIs (getsockname, accept, ...). */
|
||||
typedef socklen_t SOClen;
|
||||
|
||||
#if HTS_INET6 != 0
|
||||
/** Resolver backend: getaddrinfo/freeaddrinfo as a swappable pair, so the
|
||||
self-test can script DNS answers (families, multiplicity, errors)
|
||||
in-process. The free function must match its getaddrinfo (a fake allocates
|
||||
its own chain), hence the pair. */
|
||||
typedef struct hts_resolver_backend {
|
||||
int (*getaddrinfo)(const char *node, const char *service,
|
||||
const struct addrinfo *hints, struct addrinfo **res);
|
||||
void (*freeaddrinfo)(struct addrinfo *res);
|
||||
} hts_resolver_backend;
|
||||
|
||||
/** Install a resolver backend for the process; NULL restores the libc default.
|
||||
Test-only seam, not thread-safe; callers must serialize against resolves. */
|
||||
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
10
src/htsopt.h
10
src/htsopt.h
@@ -428,11 +428,11 @@ struct httrackp {
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
hts_boolean makeindex; /**< build a top-level index.html */
|
||||
hts_boolean kindex; /**< build a keyword index */
|
||||
hts_boolean delete_old; /**< delete locally obsolete files after update */
|
||||
hts_tristate delete_old; /**< delete locally obsolete files after update */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
@@ -465,13 +465,13 @@ struct httrackp {
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_tristate errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
hts_boolean external; /**< render external links as error pages */
|
||||
hts_tristate external; /**< render external links as error pages */
|
||||
hts_boolean passprivacy; /**< strip passwords from external links */
|
||||
hts_boolean includequery; /**< include the query string in saved names */
|
||||
hts_boolean mirror_first_page; /**< only mirror the links of the first page */
|
||||
@@ -485,7 +485,7 @@ struct httrackp {
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
|
||||
@@ -1176,11 +1176,15 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
if (element != NULL) {
|
||||
msgCode = element->statuscode;
|
||||
StringRoom(headers, 8192);
|
||||
sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n"
|
||||
sprintf(StringBuffRW(headers),
|
||||
"HTTP/1.1 %d %s\r\n"
|
||||
#ifndef NO_WEBDAV
|
||||
"%s"
|
||||
#endif
|
||||
"Content-Type: %s%s%s%s\r\n" "%s%s%s" "%s%s%s" "%s%s%s",
|
||||
"Content-Type: %s%s%s%s\r\n"
|
||||
"%s%s%s"
|
||||
"%s%s%s"
|
||||
"%s%s%s",
|
||||
/* */
|
||||
msgCode, element->msg,
|
||||
#ifndef NO_WEBDAV
|
||||
@@ -1188,16 +1192,18 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
StringBuff(davHeaders),
|
||||
#endif
|
||||
/* Content-type: foo; [ charset=bar ] */
|
||||
element->contenttype,
|
||||
hts_effective_mime(element->contenttype),
|
||||
((element->charset[0]) ? "; charset=\"" : ""),
|
||||
element->charset, ((element->charset[0]) ? "\"" : ""),
|
||||
/* location */
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? "Location: " : ""),
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? element->location : ""),
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? "\r\n" : ""),
|
||||
((element->location != NULL && element->location[0])
|
||||
? "Location: "
|
||||
: ""),
|
||||
((element->location != NULL && element->location[0])
|
||||
? element->location
|
||||
: ""),
|
||||
((element->location != NULL && element->location[0]) ? "\r\n"
|
||||
: ""),
|
||||
/* last-modified */
|
||||
((element->lastmodified[0]) ? "Last-Modified: " : ""),
|
||||
((element->lastmodified[0]) ? element->lastmodified : ""),
|
||||
@@ -1205,8 +1211,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
/* etag */
|
||||
((element->etag[0]) ? "ETag: " : ""),
|
||||
((element->etag[0]) ? element->etag : ""),
|
||||
((element->etag[0]) ? "\r\n" : "")
|
||||
);
|
||||
((element->etag[0]) ? "\r\n" : ""));
|
||||
StringLength(headers) = (int) strlen(StringBuff(headers));
|
||||
} else {
|
||||
/* No query string, no ending / : check the the <url>/ page */
|
||||
|
||||
@@ -52,6 +52,7 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#include "htscore.h"
|
||||
#include "htsback.h"
|
||||
#include "htslib.h" /* hts_effective_mime */
|
||||
|
||||
#include "store.h"
|
||||
#include "proxystrings.h"
|
||||
@@ -2289,10 +2290,17 @@ static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element)
|
||||
int size_headers;
|
||||
|
||||
sprintf(st->headers,
|
||||
"HTTP/1.0 %d %s" "\r\n" "X-Server: ProxyTrack " PROXYTRACK_VERSION
|
||||
"\r\n" "Content-type: %s%s%s%s" "\r\n" "Last-modified: %s" "\r\n"
|
||||
"Content-length: %d" "\r\n", element->statuscode, element->msg,
|
||||
/**/ element->contenttype,
|
||||
"HTTP/1.0 %d %s"
|
||||
"\r\n"
|
||||
"X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n"
|
||||
"Content-type: %s%s%s%s"
|
||||
"\r\n"
|
||||
"Last-modified: %s"
|
||||
"\r\n"
|
||||
"Content-length: %d"
|
||||
"\r\n",
|
||||
element->statuscode, element->msg,
|
||||
/**/ hts_effective_mime(element->contenttype),
|
||||
(element->charset[0] ? "; charset=\"" : ""),
|
||||
(element->charset[0] ? element->charset : ""),
|
||||
(element->charset[0] ? "\"" : ""), /**/ element->lastmodified,
|
||||
@@ -2328,10 +2336,10 @@ static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element)
|
||||
/* args */
|
||||
(link_has_authority(url) ? "" : "http://"), url, "0.0.0.0",
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour,
|
||||
tm->tm_min, tm->tm_sec, element->contenttype, element->statuscode,
|
||||
st->md5, (element->location ? element->location : "-"),
|
||||
(long int) ftell(fp), st->filename,
|
||||
(long int) (size_headers + element->size));
|
||||
tm->tm_min, tm->tm_sec, hts_effective_mime(element->contenttype),
|
||||
element->statuscode, st->md5,
|
||||
(element->location ? element->location : "-"), (long int) ftell(fp),
|
||||
st->filename, (long int) (size_headers + element->size));
|
||||
/* network_doc */
|
||||
if (fwrite(st->headers, 1, size_headers, fp) != size_headers
|
||||
|| (element->size > 0
|
||||
|
||||
15
tests/01_engine-dns.test
Normal file
15
tests/01_engine-dns.test
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# DNS resolver/cache self-test: a mock getaddrinfo (no network) checks address
|
||||
# family, single-address selection, the -@i4/-@i6 family filter, and cache reuse.
|
||||
# The trailing token is required, like the other -# selftests, so a bare command
|
||||
# line isn't treated as "no arguments" and routed to the usage screen.
|
||||
out=$(httrack -#D run)
|
||||
|
||||
test "$out" = "dns-selftest: OK" || {
|
||||
echo "expected 'dns-selftest: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
15
tests/13_local-cookies.test
Executable file
15
tests/13_local-cookies.test
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Cookie chain against the local test server (replaces the old online
|
||||
# ut/cookies/*.php fixtures). entrance.php sets cat/cake; second.php checks
|
||||
# them and sets badger; third.php checks all three. A missing or wrong cookie
|
||||
# returns 500, which would surface as an httrack error and a missing file, so a
|
||||
# clean 3-files/0-errors run proves the cookie jar is replayed across links.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 3 \
|
||||
--found 'cookies/entrance.html' \
|
||||
--found 'cookies/second.html' \
|
||||
--found 'cookies/third.html' \
|
||||
httrack 'BASEURL/cookies/entrance.php'
|
||||
18
tests/14_local-https.test
Executable file
18
tests/14_local-https.test
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# HTTPS crawl against the local test server, using the shipped self-signed
|
||||
# cert. httrack does not verify certs (htslib.c: SSL_CTX_new with no
|
||||
# SSL_CTX_set_verify), so the self-signed cert is accepted as-is and this
|
||||
# exercises the real TLS path offline. basic.html links to link.html with four
|
||||
# distinct query strings, each saved under a hashed name -> 5 files.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
if test "$HTTPS_SUPPORT" == "no"; then
|
||||
echo "no https support compiled, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --tls --errors 0 --files 5 \
|
||||
--found 'simple/basic.html' \
|
||||
httrack 'BASEURL/simple/basic.html'
|
||||
25
tests/15_local-types.test
Normal file
25
tests/15_local-types.test
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Content-Type vs URL-extension naming (issue #267 family) under the default
|
||||
# delayed type check (-%N2). Policy: a MISSING Content-Type must not clobber a
|
||||
# URL extension that maps to a specific non-HTML type (.png/.pdf stay as-is);
|
||||
# an explicitly DECLARED type is trusted, so a binary-looking URL that really
|
||||
# serves HTML (text/html on .pdf/.jpg) is named .html. The "wrong" names are
|
||||
# asserted absent so a regression in either direction fails here.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/notype.png' --not-found 'types/notype.html' \
|
||||
--found 'types/notype.pdf' --not-found 'types/notype.html' \
|
||||
--found 'types/photo.png' \
|
||||
--found 'types/doc.pdf' \
|
||||
--found 'types/lie.html' --not-found 'types/lie.png' \
|
||||
--found 'types/report.html' --not-found 'types/report.pdf' \
|
||||
--found 'types/page.htm' --not-found 'types/page.html' \
|
||||
--found 'types/script.js' \
|
||||
--found 'types/style.css' \
|
||||
--found 'types/data.json' \
|
||||
--found 'types/control.html' --not-found 'types/control.php' \
|
||||
--found 'types/gend61c.png' --not-found 'types/gend61c.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
11
tests/16_local-assume.test
Normal file
11
tests/16_local-assume.test
Normal file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# --assume under the default delayed type check (-%N2), issue #56. A user type
|
||||
# pinned with --assume must be honored immediately, not lost to the delayed
|
||||
# name: photo.png served as image/png but assumed text/html is saved as .html.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/photo.html' --not-found 'types/photo.png' \
|
||||
httrack 'BASEURL/types/photo.png' --assume png=text/html
|
||||
12
tests/17_local-empty-ct.test
Normal file
12
tests/17_local-empty-ct.test
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# An empty "Content-Type:" header value must be treated as "no usable type"
|
||||
# (keep the URL extension), not parsed from an uninitialized buffer. The crawl
|
||||
# also runs under ASan/UBSan in CI, which catches the uninitialized read this
|
||||
# guards against.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/emptyct.png' --not-found 'types/emptyct.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
15
tests/18_local-update.test
Normal file
15
tests/18_local-update.test
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# A second (update) pass must keep the names the first crawl chose. The stored
|
||||
# Content-Type rides the cache, so the update reads back the same value -- the
|
||||
# unknown/unknown sentinel for a typeless response, the declared type otherwise
|
||||
# -- and names consistently: a declared-text/html .pdf stays .html and a
|
||||
# typeless .png stays .png across the update rather than reverting.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun \
|
||||
--found 'types/report.html' --not-found 'types/report.pdf' \
|
||||
--found 'types/notype.png' --not-found 'types/notype.html' \
|
||||
--found 'types/lie.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
110
tests/19_local-connect-fallback.test
Normal file
110
tests/19_local-connect-fallback.test
Normal file
@@ -0,0 +1,110 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# A host that resolves to several addresses must fall back to the next one when
|
||||
# a connect fails, instead of giving up on the first (dead IPv6 on a dual-stack
|
||||
# host, ...). HTTRACK_DEBUG_RESOLVE pins "deadhost" to a refused address first
|
||||
# (127.0.0.2, nothing listening) then the live server (127.0.0.1): the crawl
|
||||
# only succeeds if httrack retries the second address. A second case pins every
|
||||
# address to a refused one, so the slot must exhaust the list and error out
|
||||
# (rather than hang or loop).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
if test "${V6_SUPPORT:-}" == "no"; then
|
||||
echo "no IPv6 support (resolver list/override is IPv6-only), skipping"
|
||||
exit 77
|
||||
fi
|
||||
if ! command -v python3 >/dev/null 2>&1; then
|
||||
echo "python3 missing, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
server="$top_srcdir/tests/local-server.py"
|
||||
root="$top_srcdir/tests/server-root"
|
||||
tmpdir=$(mktemp -d)
|
||||
serverpid=
|
||||
|
||||
cleanup() {
|
||||
if test -n "$serverpid"; then
|
||||
kill "$serverpid" 2>/dev/null || true
|
||||
wait "$serverpid" 2>/dev/null || true
|
||||
fi
|
||||
rm -rf "$tmpdir"
|
||||
return 0
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# bind the live server to 127.0.0.1 only, so 127.0.0.2 refuses the connect
|
||||
python3 "$server" --root "$root" --bind 127.0.0.1 >"$tmpdir/srv.out" 2>"$tmpdir/srv.err" &
|
||||
serverpid=$!
|
||||
port=
|
||||
for _ in $(seq 1 50); do
|
||||
line=$(head -n1 "$tmpdir/srv.out" 2>/dev/null || true)
|
||||
if test "${line%% *}" == "PORT"; then
|
||||
port="${line#PORT }"
|
||||
break
|
||||
fi
|
||||
kill -0 "$serverpid" 2>/dev/null || {
|
||||
echo "server exited early: $(cat "$tmpdir/srv.err")"
|
||||
exit 1
|
||||
}
|
||||
sleep 0.1
|
||||
done
|
||||
test -n "$port" || {
|
||||
echo "could not discover server port"
|
||||
exit 1
|
||||
}
|
||||
|
||||
out="$tmpdir/crawl"
|
||||
HTTRACK_DEBUG_RESOLVE="deadhost:127.0.0.2,127.0.0.1" \
|
||||
httrack "http://deadhost:$port/simple/basic.html" -O "$out" \
|
||||
-c1 --robots=0 --timeout=30 --quiet -Z >"$tmpdir/log" 2>&1
|
||||
|
||||
log="$out/hts-log.txt"
|
||||
|
||||
# the dead address was tried, then the next one (proves the fallback ran)
|
||||
if ! grep -q "trying next address" "$log"; then
|
||||
echo "FAIL: no connect fallback happened"
|
||||
cat "$log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 0 errors and the file was actually fetched (over the live address)
|
||||
errs=$(grep -iEc "^[0-9:]*[[:space:]]Error:" "$log" || true)
|
||||
test "$errs" == "0" || {
|
||||
echo "FAIL: $errs error(s) reported"
|
||||
grep -iE "Error:" "$log"
|
||||
exit 1
|
||||
}
|
||||
test -f "$out/deadhost_$port/simple/basic.html" || {
|
||||
echo "FAIL: basic.html not downloaded via fallback"
|
||||
find "$out" -type f
|
||||
exit 1
|
||||
}
|
||||
|
||||
# every address refused: the slot exhausts the list, then errors out (the
|
||||
# harness timeout would catch a hang/loop; refused connects are instant)
|
||||
out2="$tmpdir/crawl2"
|
||||
HTTRACK_DEBUG_RESOLVE="alldead:127.0.0.2,127.0.0.3" \
|
||||
httrack "http://alldead:$port/simple/basic.html" -O "$out2" \
|
||||
-c1 --robots=0 --timeout=30 --quiet -Z >"$tmpdir/log2" 2>&1
|
||||
log2="$out2/hts-log.txt"
|
||||
|
||||
grep -q "trying next address" "$log2" || {
|
||||
echo "FAIL: exhaustion path never tried the fallback address"
|
||||
cat "$log2"
|
||||
exit 1
|
||||
}
|
||||
grep -iqE "^[0-9:]*[[:space:]]Error:" "$log2" || {
|
||||
echo "FAIL: all addresses failing did not report an error"
|
||||
cat "$log2"
|
||||
exit 1
|
||||
}
|
||||
test ! -f "$out2/alldead_$port/simple/basic.html" || {
|
||||
echo "FAIL: file downloaded despite every address failing"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "OK: connect fallback succeeds, and exhausting all addresses errors out"
|
||||
@@ -3,6 +3,8 @@
|
||||
# silently drop it from the dist tarball and break "make distcheck".
|
||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||
proxy-https-server.py \
|
||||
local-crawl.sh local-server.py server.crt server.key \
|
||||
server-root/simple/basic.html server-root/simple/link.html \
|
||||
fixtures/cache-golden/hts-cache/new.zip
|
||||
|
||||
TESTS_ENVIRONMENT =
|
||||
@@ -11,6 +13,7 @@ TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
|
||||
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
|
||||
TESTS_ENVIRONMENT += ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS)
|
||||
TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
|
||||
TESTS_ENVIRONMENT += V6_SUPPORT=$(V6_SUPPORT)
|
||||
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
|
||||
|
||||
TEST_EXTENSIONS = .test
|
||||
@@ -27,6 +30,7 @@ TESTS = \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-cookies.test \
|
||||
01_engine-copyopt.test \
|
||||
01_engine-dns.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
@@ -47,6 +51,13 @@ TESTS = \
|
||||
11_crawl-longurl.test \
|
||||
11_crawl-parsing.test \
|
||||
12_crawl_https.test \
|
||||
13_crawl_proxy_https.test
|
||||
13_crawl_proxy_https.test \
|
||||
13_local-cookies.test \
|
||||
14_local-https.test \
|
||||
15_local-types.test \
|
||||
16_local-assume.test \
|
||||
17_local-empty-ct.test \
|
||||
18_local-update.test \
|
||||
19_local-connect-fallback.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
253
tests/local-crawl.sh
Executable file
253
tests/local-crawl.sh
Executable file
@@ -0,0 +1,253 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Launcher for httrack crawl tests against the local Python test server.
|
||||
#
|
||||
# Starts tests/local-server.py on an ephemeral port, discovers the port from
|
||||
# the server's stdout, then runs httrack against http(s)://127.0.0.1:$PORT and
|
||||
# audits the mirror. The server is always killed and the tmpdir removed on exit.
|
||||
#
|
||||
# The token BASEURL in any httrack argument is replaced with the discovered
|
||||
# http(s)://127.0.0.1:$PORT base. --found/--directory paths are relative to the
|
||||
# discovered host root (127.0.0.1_<port>/), since the random port leaks into
|
||||
# the mirror directory name.
|
||||
#
|
||||
# Usage:
|
||||
# bash local-crawl.sh [--tls] [--root DIR] \
|
||||
# --errors N --files N --found PATH ... --directory PATH ... \
|
||||
# httrack BASEURL/some/path [httrack-args...]
|
||||
|
||||
set -u
|
||||
|
||||
testdir=$(cd "$(dirname "$0")" && pwd)
|
||||
server="${testdir}/local-server.py"
|
||||
root="${LOCAL_SERVER_ROOT:-${testdir}/server-root}"
|
||||
cert="${testdir}/server.crt"
|
||||
key="${testdir}/server.key"
|
||||
|
||||
tls=
|
||||
verbose=
|
||||
rerun=
|
||||
tmpdir=
|
||||
serverpid=
|
||||
crawlpid=
|
||||
|
||||
function warning {
|
||||
echo "** $*" >&2
|
||||
return 0
|
||||
}
|
||||
function die {
|
||||
warning "$*"
|
||||
exit 1
|
||||
}
|
||||
function debug {
|
||||
test -n "$verbose" && echo "$*" >&2
|
||||
return 0
|
||||
}
|
||||
function info { printf "[%s] ..\t" "$*" >&2; }
|
||||
function result { echo "$*" >&2; }
|
||||
|
||||
function cleanup {
|
||||
if test -n "$crawlpid"; then
|
||||
kill -9 "$crawlpid" 2>/dev/null
|
||||
crawlpid=
|
||||
fi
|
||||
if test -n "$serverpid"; then
|
||||
kill "$serverpid" 2>/dev/null
|
||||
# Reap it so the port is released before we rm the tmpdir/log.
|
||||
wait "$serverpid" 2>/dev/null
|
||||
serverpid=
|
||||
fi
|
||||
if test -n "$tmpdir" && test -d "$tmpdir"; then
|
||||
test -n "$nopurge" || rm -rf "$tmpdir"
|
||||
fi
|
||||
}
|
||||
|
||||
function assert_equals {
|
||||
info "$1"
|
||||
if test ! "$2" == "$3"; then
|
||||
result "expected '$2', got '$3'"
|
||||
exit 1
|
||||
fi
|
||||
result "OK ($2)"
|
||||
}
|
||||
|
||||
nopurge=
|
||||
trap cleanup EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
# python3 is required; mirror check-network.sh's skip-with-77 convention.
|
||||
command -v python3 >/dev/null || ! echo "python3 not found; skipping local crawl tests" || exit 77
|
||||
|
||||
tmptopdir=${TMPDIR:-/tmp}
|
||||
test -d "$tmptopdir" || mkdir -p "$tmptopdir" || die "no temporary directory; set TMPDIR"
|
||||
tmpdir=$(mktemp -d "${tmptopdir}/httrack_local.XXXXXX") || die "could not create tmpdir"
|
||||
|
||||
# --- parse leading control flags --------------------------------------------
|
||||
declare -a audit=()
|
||||
scheme=http
|
||||
pos=0
|
||||
args=("$@")
|
||||
nargs=$#
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
case "${args[$pos]}" in
|
||||
--debug) verbose=1 ;;
|
||||
--rerun) rerun=1 ;; # run httrack a second time (update pass) before auditing
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
audit+=("--no-purge")
|
||||
;;
|
||||
--tls)
|
||||
tls=1
|
||||
scheme=https
|
||||
;;
|
||||
--root)
|
||||
pos=$((pos + 1))
|
||||
root="${args[$pos]}"
|
||||
;;
|
||||
--errors | --files)
|
||||
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||
pos=$((pos + 1))
|
||||
;;
|
||||
--found | --not-found | --directory)
|
||||
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||
pos=$((pos + 1))
|
||||
;;
|
||||
httrack)
|
||||
pos=$((pos + 1))
|
||||
break
|
||||
;;
|
||||
*) die "unrecognized option ${args[$pos]}" ;;
|
||||
esac
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
|
||||
# --- start the server --------------------------------------------------------
|
||||
test -r "$server" || die "cannot read $server"
|
||||
serverlog="${tmpdir}/server.log"
|
||||
serverargs=(--root "$root")
|
||||
if test -n "$tls"; then
|
||||
serverargs+=(--tls --cert "$cert" --key "$key")
|
||||
fi
|
||||
debug "starting python3 $server ${serverargs[*]}"
|
||||
python3 "$server" "${serverargs[@]}" >"$serverlog" 2>&1 &
|
||||
serverpid=$!
|
||||
|
||||
# Wait for the "PORT <n>" line (server prints it once bound).
|
||||
port=
|
||||
for _ in $(seq 1 50); do
|
||||
if test -s "$serverlog"; then
|
||||
line=$(head -n1 "$serverlog")
|
||||
if test "${line%% *}" == "PORT"; then
|
||||
port="${line#PORT }"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
kill -0 "$serverpid" 2>/dev/null || die "server exited early: $(cat "$serverlog")"
|
||||
sleep 0.1
|
||||
done
|
||||
test -n "$port" || die "could not discover server port: $(cat "$serverlog")"
|
||||
debug "server listening on ${scheme}://127.0.0.1:${port}"
|
||||
|
||||
baseurl="${scheme}://127.0.0.1:${port}"
|
||||
|
||||
# --- substitute BASEURL in the remaining (httrack) args ----------------------
|
||||
declare -a hts=()
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
hts+=("${args[$pos]//BASEURL/$baseurl}")
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
|
||||
# --- run httrack -------------------------------------------------------------
|
||||
which httrack >/dev/null || die "could not find httrack"
|
||||
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
|
||||
test -n "$ver" || die "could not run httrack"
|
||||
|
||||
out="${tmpdir}/crawl"
|
||||
mkdir "$out" || die "could not create $out"
|
||||
# Localhost is fast; disable the rate/bandwidth safety limits but keep a
|
||||
# max-time backstop so a hang cannot wedge the suite.
|
||||
declare -a moreargs=(--quiet --max-time=120 --timeout=30 --disable-security-limits --robots=0)
|
||||
log="${tmpdir}/log"
|
||||
info "running httrack ${hts[*]}"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" "${moreargs[@]}" "${hts[@]}" >"$log" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid"
|
||||
crawlres=$?
|
||||
crawlpid=
|
||||
# httrack exits 0 even on hard connect/DNS errors, so this is a backstop only;
|
||||
# the real guard is the audit below (--errors 0 plus the host-root existence check).
|
||||
test "$crawlres" -eq 0 || ! result "httrack exited $crawlres" || {
|
||||
cat "$log" >&2
|
||||
exit 1
|
||||
}
|
||||
result "OK"
|
||||
grep -iE "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt" >&2
|
||||
|
||||
# --- optional second pass: re-mirror into the same dir (cache/update path) ----
|
||||
if test -n "$rerun"; then
|
||||
info "re-running httrack (update pass)"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" \
|
||||
"${moreargs[@]}" "${hts[@]}" >"${log}.2" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid"
|
||||
crawlres=$?
|
||||
crawlpid=
|
||||
test "$crawlres" -eq 0 || ! result "update pass exited $crawlres" || {
|
||||
cat "${log}.2" >&2
|
||||
exit 1
|
||||
}
|
||||
result "OK (update)"
|
||||
fi
|
||||
|
||||
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||
hostroot=
|
||||
for cand in "${out}/127.0.0.1_${port}" "${out}/127.0.0.1"; do
|
||||
if test -d "$cand"; then
|
||||
hostroot="$cand"
|
||||
break
|
||||
fi
|
||||
done
|
||||
test -n "$hostroot" || die "could not find host root under $out"
|
||||
debug "host root: $hostroot"
|
||||
|
||||
# --- audit -------------------------------------------------------------------
|
||||
i=0
|
||||
while test "$i" -lt "${#audit[@]}"; do
|
||||
case "${audit[$i]}" in
|
||||
--errors)
|
||||
i=$((i + 1))
|
||||
assert_equals "checking errors" "${audit[$i]}" \
|
||||
"$(grep -iEc "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt")"
|
||||
;;
|
||||
--files)
|
||||
i=$((i + 1))
|
||||
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${out}/hts-log.txt" |
|
||||
sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
|
||||
assert_equals "checking files" "${audit[$i]}" "$nFiles"
|
||||
;;
|
||||
--found)
|
||||
i=$((i + 1))
|
||||
info "checking for ${audit[$i]}"
|
||||
if test -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--not-found)
|
||||
i=$((i + 1))
|
||||
info "checking absence of ${audit[$i]}"
|
||||
if test ! -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "present"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--directory)
|
||||
i=$((i + 1))
|
||||
info "checking for dir ${audit[$i]}"
|
||||
if test -d "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
i=$((i + 1))
|
||||
done
|
||||
254
tests/local-server.py
Executable file
254
tests/local-server.py
Executable file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Self-contained local web server for httrack's crawl tests.
|
||||
|
||||
Serves static fixtures from a docroot plus a handful of dynamic endpoints
|
||||
(cookies, ...) so httrack can be exercised over loopback, deterministically and
|
||||
offline, instead of crawling the live ut.httrack.com.
|
||||
|
||||
Binds to an ephemeral port (port 0) and prints the chosen port to stdout as
|
||||
"PORT <n>\n" so a launcher can discover it. Pass --tls to wrap the socket with
|
||||
the shipped self-signed test cert; httrack does not verify certs, so no CA
|
||||
trust plumbing is needed.
|
||||
|
||||
stdlib only (http.server + ssl) -- no new build or runtime dependency.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
|
||||
from urllib.parse import quote, unquote, urlsplit
|
||||
|
||||
# Cookie chain replicated from the old ut/cookies/*.php fixtures.
|
||||
COOKIE_PATH = "/cookies/"
|
||||
COOKIES = {
|
||||
"cat": "dog",
|
||||
"cake": "is a lie!",
|
||||
"badger": "mushroom, with 'ants'",
|
||||
}
|
||||
|
||||
PAGE = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
\t"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
||||
<head>
|
||||
\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
\t<title>Sample test</title>
|
||||
</head>
|
||||
<body>
|
||||
{body}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class Handler(SimpleHTTPRequestHandler):
|
||||
# Quieter logging; the launcher captures httrack's own log anyway.
|
||||
def log_message(self, fmt, *args):
|
||||
if os.environ.get("LOCAL_SERVER_VERBOSE"):
|
||||
super().log_message(fmt, *args)
|
||||
|
||||
# --- helpers -----------------------------------------------------------
|
||||
|
||||
def request_cookies(self):
|
||||
"""Parse the Cookie header into {name: decoded-value}.
|
||||
|
||||
Mirrors PHP's $_COOKIE: values are url-decoded, matching the encoding
|
||||
applied when the cookie was set (see set_cookie)."""
|
||||
jar = {}
|
||||
raw = self.headers.get("Cookie", "")
|
||||
for pair in raw.split(";"):
|
||||
pair = pair.strip()
|
||||
if "=" in pair:
|
||||
name, value = pair.split("=", 1)
|
||||
jar[name.strip()] = unquote(value.strip())
|
||||
return jar
|
||||
|
||||
def set_cookie(self, name, value):
|
||||
"""Queue a Set-Cookie header, url-encoding the value like PHP's
|
||||
setcookie() so spaces/quotes/commas stay a single token that httrack
|
||||
can store and replay verbatim."""
|
||||
self._set_cookies.append(f"{name}={quote(value)}; Path={COOKIE_PATH}")
|
||||
|
||||
def send_html(self, body, status=200, extra_status=None):
|
||||
encoded = PAGE.format(body=body).encode("utf-8")
|
||||
self.send_response(status, extra_status)
|
||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(encoded)))
|
||||
for cookie in self._set_cookies:
|
||||
self.send_header("Set-Cookie", cookie)
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(encoded)
|
||||
|
||||
def fail_cookie(self, what):
|
||||
# The old PHPs answered 500 with the reason in the status line.
|
||||
self.send_html("", status=500, extra_status=f"The {what} is missing or invalid")
|
||||
|
||||
# --- dynamic routes ----------------------------------------------------
|
||||
|
||||
def route_entrance(self):
|
||||
self.set_cookie("cat", COOKIES["cat"])
|
||||
self.set_cookie("cake", COOKIES["cake"])
|
||||
self.send_html('\tThis is a <a href="second.php">link</a>')
|
||||
|
||||
def route_second(self):
|
||||
jar = self.request_cookies()
|
||||
if jar.get("cat") != COOKIES["cat"]:
|
||||
return self.fail_cookie("cat")
|
||||
if jar.get("cake") != COOKIES["cake"]:
|
||||
return self.fail_cookie("cake")
|
||||
self.set_cookie("badger", COOKIES["badger"])
|
||||
self.send_html('\tThis is a <a href="third.php">link</a>')
|
||||
|
||||
def route_third(self):
|
||||
jar = self.request_cookies()
|
||||
if jar.get("cat") != COOKIES["cat"]:
|
||||
return self.fail_cookie("cat")
|
||||
if jar.get("cake") != COOKIES["cake"]:
|
||||
return self.fail_cookie("cake")
|
||||
if jar.get("badger") != COOKIES["badger"]:
|
||||
return self.fail_cookie("badger")
|
||||
self.send_html("\tThis is a test.")
|
||||
|
||||
def route_robots(self):
|
||||
body = b"User-agent: *\nDisallow:\n"
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
|
||||
# --- type/extension matrix (issue #267 family) -------------------------
|
||||
|
||||
def send_raw(self, body, content_type):
|
||||
"""Send a raw body with an explicit Content-Type, or none at all when
|
||||
content_type is None (to observe httrack's typeless-file naming)."""
|
||||
self.send_response(200)
|
||||
if content_type is not None:
|
||||
self.send_header("Content-Type", content_type)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
|
||||
# Fake-binary blobs for the image/pdf/typeless cases.
|
||||
FAKE_PNG = b"\x89PNG\r\n\x1a\n" + b"\x00" * 64
|
||||
FAKE_PDF = b"%PDF-1.4\n" + b"\x00" * 64
|
||||
|
||||
# path -> (body, content_type); None sends no header, "" sends an empty
|
||||
# Content-Type value (no usable type, must be treated like None).
|
||||
TYPE_MATRIX = {
|
||||
"/types/control.php": (b"<html><body>control</body></html>", "text/html"),
|
||||
"/types/photo.png": (FAKE_PNG, "image/png"),
|
||||
"/types/doc.pdf": (FAKE_PDF, "application/pdf"),
|
||||
"/types/notype.png": (FAKE_PNG, None),
|
||||
"/types/notype.pdf": (FAKE_PDF, None),
|
||||
"/types/emptyct.png": (FAKE_PNG, ""),
|
||||
"/types/lie.png": (FAKE_PNG, "text/html"),
|
||||
"/types/report.pdf": (b"<html><body>real page</body></html>", "text/html"),
|
||||
"/types/page.htm": (b"<html><body>htm page</body></html>", "text/html"),
|
||||
"/types/script.js": (b"var x = 1;\n", "application/javascript"),
|
||||
"/types/style.css": (b"body { color: red; }\n", "text/css"),
|
||||
"/types/data.json": (b'{"k": "v"}\n', "application/json"),
|
||||
"/types/gen.php": (FAKE_PNG, "image/png"),
|
||||
}
|
||||
|
||||
def route_types_index(self):
|
||||
body = (
|
||||
'\t<a href="control.php">control</a>\n'
|
||||
'\t<img src="photo.png" />\n'
|
||||
'\t<a href="doc.pdf">doc</a>\n'
|
||||
'\t<img src="notype.png" />\n'
|
||||
'\t<a href="notype.pdf">notypepdf</a>\n'
|
||||
'\t<img src="emptyct.png" />\n'
|
||||
'\t<img src="lie.png" />\n'
|
||||
'\t<a href="report.pdf">report</a>\n'
|
||||
'\t<a href="page.htm">htm</a>\n'
|
||||
'\t<script src="script.js"></script>\n'
|
||||
'\t<link rel="stylesheet" href="style.css" />\n'
|
||||
'\t<a href="data.json">json</a>\n'
|
||||
'\t<img src="gen.php?id=5" />\n'
|
||||
)
|
||||
self.send_html(body)
|
||||
|
||||
def route_types(self):
|
||||
path = urlsplit(self.path).path
|
||||
body, ctype = self.TYPE_MATRIX[path]
|
||||
self.send_raw(body, ctype)
|
||||
|
||||
ROUTES = {
|
||||
"/cookies/entrance.php": route_entrance,
|
||||
"/cookies/second.php": route_second,
|
||||
"/cookies/third.php": route_third,
|
||||
"/robots.txt": route_robots,
|
||||
"/types/index.html": route_types_index,
|
||||
"/types/control.php": route_types,
|
||||
"/types/photo.png": route_types,
|
||||
"/types/doc.pdf": route_types,
|
||||
"/types/notype.png": route_types,
|
||||
"/types/notype.pdf": route_types,
|
||||
"/types/emptyct.png": route_types,
|
||||
"/types/lie.png": route_types,
|
||||
"/types/report.pdf": route_types,
|
||||
"/types/page.htm": route_types,
|
||||
"/types/script.js": route_types,
|
||||
"/types/style.css": route_types,
|
||||
"/types/data.json": route_types,
|
||||
"/types/gen.php": route_types,
|
||||
}
|
||||
|
||||
# --- dispatch ----------------------------------------------------------
|
||||
|
||||
def dispatch(self):
|
||||
self._set_cookies = []
|
||||
path = urlsplit(self.path).path
|
||||
handler = self.ROUTES.get(path)
|
||||
if handler is not None:
|
||||
handler(self)
|
||||
return True
|
||||
return False
|
||||
|
||||
def do_GET(self):
|
||||
if not self.dispatch():
|
||||
super().do_GET()
|
||||
|
||||
def do_HEAD(self):
|
||||
if not self.dispatch():
|
||||
super().do_HEAD()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="docroot for static files")
|
||||
parser.add_argument("--bind", default="127.0.0.1", help="bind address")
|
||||
parser.add_argument("--tls", action="store_true", help="serve HTTPS")
|
||||
parser.add_argument("--cert", help="TLS certificate (PEM)")
|
||||
parser.add_argument("--key", help="TLS private key (PEM)")
|
||||
args = parser.parse_args()
|
||||
|
||||
root = os.path.abspath(args.root)
|
||||
|
||||
def factory(*a, **kw):
|
||||
return Handler(*a, directory=root, **kw)
|
||||
|
||||
httpd = ThreadingHTTPServer((args.bind, 0), factory)
|
||||
|
||||
if args.tls:
|
||||
import ssl
|
||||
|
||||
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
ctx.load_cert_chain(certfile=args.cert, keyfile=args.key)
|
||||
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
|
||||
|
||||
port = httpd.socket.getsockname()[1]
|
||||
# The launcher reads this line to discover the ephemeral port.
|
||||
print(f"PORT {port}", flush=True)
|
||||
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
18
tests/server-root/simple/basic.html
Normal file
18
tests/server-root/simple/basic.html
Normal file
@@ -0,0 +1,18 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="fr">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Sample test</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
This is a <a href="link.html?v=1">link</a>
|
||||
This is a <a href='link.html?v=2'>link</a>
|
||||
This is a <a href="./link.html?v=3">link</a>
|
||||
This is a <a href=link.html?v=4>link</a>
|
||||
|
||||
</body>
|
||||
3
tests/server-root/simple/link.html
Normal file
3
tests/server-root/simple/link.html
Normal file
@@ -0,0 +1,3 @@
|
||||
This is a link.
|
||||
|
||||
Go back to <a href="basic.html">home</a>.
|
||||
21
tests/server.crt
Normal file
21
tests/server.crt
Normal file
@@ -0,0 +1,21 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDbzCCAlegAwIBAgIUdWkDDomnY3WW95UqJ+UOASuR/i0wDQYJKoZIhvcNAQEL
|
||||
BQAwODESMBAGA1UEAwwJMTI3LjAuMC4xMSIwIAYDVQQKDBlIVFRyYWNrIGxvY2Fs
|
||||
IHRlc3Qgc2VydmVyMCAXDTI2MDYxNTE0NDQxMFoYDzIwNTYwNjA3MTQ0NDEwWjA4
|
||||
MRIwEAYDVQQDDAkxMjcuMC4wLjExIjAgBgNVBAoMGUhUVHJhY2sgbG9jYWwgdGVz
|
||||
dCBzZXJ2ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDx78mogNhT
|
||||
noWwRa51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf
|
||||
3rwj79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJC
|
||||
SGxIin9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4
|
||||
ZbPgwep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaE
|
||||
nQrAlTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJx
|
||||
rjVEPfA8QSbtAgMBAAGjbzBtMB0GA1UdDgQWBBTHE0KKW8REV4HxajzVsIBxz3iL
|
||||
9zAfBgNVHSMEGDAWgBTHE0KKW8REV4HxajzVsIBxz3iL9zAPBgNVHRMBAf8EBTAD
|
||||
AQH/MBoGA1UdEQQTMBGHBH8AAAGCCWxvY2FsaG9zdDANBgkqhkiG9w0BAQsFAAOC
|
||||
AQEAYlTEftrwGJBXuPmtxhmtw2HO/VTC4TGnq67hH5H+ptwgZJuuxCQ5KW6flTyp
|
||||
FTyMhha33WD4EBL3wqqJsWr9Y4BXqi4G0lRqXBcC1oIUa2VYIDMER7kaY1qTSqE8
|
||||
ARpwdB2BhvngAzDLc+4Jt4jQMRGr8fHAwxpDBoIZ1knbyzYNP73Bajse6/8YtxUu
|
||||
nB2BsldjZnLvyHvRxUpWp92OyQih4jYSrlN6olDFlKDg7++kMhkHtJQW9a1t54VN
|
||||
0ZXrB1ZRuHUUvGBq26x71riTWor7HNOSQaGeCMQjZNQkh5tfshNygUGSZVXTEwhG
|
||||
xSrOL7NqBt2+EkVwf7LjGzjmBw==
|
||||
-----END CERTIFICATE-----
|
||||
28
tests/server.key
Normal file
28
tests/server.key
Normal file
@@ -0,0 +1,28 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDx78mogNhTnoWw
|
||||
Ra51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf3rwj
|
||||
79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJCSGxI
|
||||
in9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4ZbPg
|
||||
wep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaEnQrA
|
||||
lTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJxrjVE
|
||||
PfA8QSbtAgMBAAECggEACgNK4klq1T3IpKdNoBY5yoE7CbUQZBNkBpSPRxHgBezj
|
||||
SVFfgrZGnOySrIJSt4JHtuynG2Hl+0ku74HRep/ck+eOsh5W3mZvGvMLnGxhwR3u
|
||||
Or99osTIgU0VQTkpC0SLQ16FCnih0uJycNIikdLR7uuya1tt1OyIBzK7XlNGIywT
|
||||
p85zJc7/6TfTC9eM7lqh7JGR7KplBxSvgZL1pUr7y4rNpKms6uzOvPND79CcKnbU
|
||||
BBA9Tu4qdOkoOljsZKkvh3pihxyG9X6d8QTZ/uX3pkvliwSFBc+Sz9EootA3/4r5
|
||||
gVWpQ2t/AY7fY4hqzLIX/HivVaPj3cWk1G+SHm0XNQKBgQD5I9rijqFvV/p6FmUl
|
||||
FbnjJFFHHgZLivlGxAC5vOyJNQQaqdeDzg7yMotNmQTggVGjT6sjdosQb3n+ctuk
|
||||
EhQnZSU5VkNKv1+PTR35WrRkaECCaqz3Pv79pV9GVcX3it7UuYjNiOeSPqINWe+X
|
||||
49JwnJFz+qQ1BchAwOis4zkENwKBgQD4mShDaYLOO97VpgZj4cGxHHWyEK9CRQvp
|
||||
I7HxRmfaWS3JHwb88lOmALEU6pAj5cYJPAznv8BnUWcVHalZbkQ1JWYtUJRqj6OI
|
||||
Ym7rw/nm4Ay5ijbdEism173dSk3IjOe+PdAlxzsOuVzYdBTqElmeQWtBzhY9aHvX
|
||||
r+A02C2j+wKBgHHDo6Gsi57yR5gUPd9vSlCkNtEIrss0DJv5yHMIB+KnaNZcE+NF
|
||||
5qFF30Jxyz5RDtxJ9tXcvaeln8lG3XDQKI/MqfDCqTuqo5ImHrfMaW8oA70JxS2p
|
||||
gHqGVzkg1aMxsIrmpcdk6olnPExocvWivGdbtzeEjhMALu8Sp6y6nUCFAoGBAK5h
|
||||
KLgYw/OMVaQCIMthaa+l6f0s7PMMYe1453H6VBD6qz4/8HPwO7LfG1gzrUYxADgs
|
||||
ElVh0UHn/On383nS+i9Ze5Hfyyvwc+LQQURKJPrJQMPJavCptPE7NmiKnYNHK6vr
|
||||
yh0l4oxShAklbCJBGvICq4zuVfVfXDeQnDIVTfaPAoGBAMCrZqYdOUhUu+aUqxZq
|
||||
qO/TTQxrxftU63jGUg+o042TdgI4KWLn07wvHJ8/E2OqF35eXenvcuKbNLI1l72J
|
||||
4cp+3cUv8iAXThTRYEztr5CS/wta4o4CNN8zfjn5dV9AI4Hmt4V7EaGWpBcViGbj
|
||||
n0Mhag+dO8DHuenqi1yfMrAt
|
||||
-----END PRIVATE KEY-----
|
||||
152
tools/mk-sbuild-chroot.sh
Executable file
152
tools/mk-sbuild-chroot.sh
Executable file
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Bootstrap an sbuild chroot for the clean-room build gate (mkdeb.sh --sbuild).
|
||||
#
|
||||
# Uses the rootless unshare backend: no root, no schroot daemon. It builds a
|
||||
# minimal buildd chroot tarball into ~/.cache/sbuild/<dist>-<arch>.tar.zst, where
|
||||
# sbuild --dist=<dist> finds it automatically in unshare mode.
|
||||
#
|
||||
# Usage:
|
||||
# tools/mk-sbuild-chroot.sh [options]
|
||||
#
|
||||
# Options:
|
||||
# -d, --dist DIST suite to bootstrap (default: unstable)
|
||||
# -a, --arch ARCH architecture (default: dpkg --print-architecture)
|
||||
# -m, --mirror URL apt mirror (default: http://deb.debian.org/debian)
|
||||
# --components LIST comma-separated components (default: main)
|
||||
# -f, --force rebuild even if the tarball already exists
|
||||
# --write-sbuildrc add "$chroot_mode = 'unshare';" to ~/.sbuildrc if absent
|
||||
# -h, --help show this help
|
||||
#
|
||||
# One-time setup; refresh later with sbuild-update or by rerunning with --force.
|
||||
# Requires mmdebstrap and the uidmap tools (newuidmap) for the unshare backend.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
readonly PROGNAME=${0##*/}
|
||||
|
||||
die() {
|
||||
printf '%s: error: %s\n' "$PROGNAME" "$*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
info() {
|
||||
printf '==> %s\n' "$*" >&2
|
||||
}
|
||||
|
||||
usage() {
|
||||
sed -n '2,/^set -euo/{/^set -euo/!p}' "$0" | sed 's/^# \{0,1\}//'
|
||||
}
|
||||
|
||||
need() {
|
||||
local tool
|
||||
for tool in "$@"; do
|
||||
command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool"
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
local dist=unstable
|
||||
local arch=""
|
||||
local mirror=http://deb.debian.org/debian
|
||||
local components=main
|
||||
local force=0
|
||||
local write_sbuildrc=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-d | --dist)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
dist=$2
|
||||
shift 2
|
||||
;;
|
||||
-a | --arch)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
arch=$2
|
||||
shift 2
|
||||
;;
|
||||
-m | --mirror)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
mirror=$2
|
||||
shift 2
|
||||
;;
|
||||
--components)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
components=$2
|
||||
shift 2
|
||||
;;
|
||||
-f | --force)
|
||||
force=1
|
||||
shift
|
||||
;;
|
||||
--write-sbuildrc)
|
||||
write_sbuildrc=1
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "unknown option: $1 (try --help)"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
need mmdebstrap dpkg
|
||||
# Unshare needs the setuid uid/gid mappers; mmdebstrap fails cryptically without.
|
||||
command -v newuidmap >/dev/null 2>&1 ||
|
||||
die "newuidmap not found; install the uidmap package for the unshare backend"
|
||||
|
||||
# Unshare maps a whole UID range, not just the caller's: the base install
|
||||
# creates system users, and without an /etc/subuid+subgid range the install
|
||||
# crashes (dpkg SIGSEGV) instead of erroring cleanly. Root uses mode=root and
|
||||
# needs no range.
|
||||
if [[ $(id -u) -ne 0 ]]; then
|
||||
local me
|
||||
me=$(id -un)
|
||||
if ! grep -qs "^$me:" /etc/subuid || ! grep -qs "^$me:" /etc/subgid; then
|
||||
# Suggest a range starting past every allocation in either file.
|
||||
local start
|
||||
start=$(awk -F: '{e = $2 + $3; if (e > m) m = e} END {print (m ? m : 100000)}' \
|
||||
/etc/subuid /etc/subgid 2>/dev/null)
|
||||
die "no /etc/subuid+subgid range for $me; the unshare backend needs one:
|
||||
sudo usermod --add-subuids $start-$((start + 65535)) --add-subgids $start-$((start + 65535)) $me"
|
||||
fi
|
||||
fi
|
||||
|
||||
: "${arch:=$(dpkg --print-architecture)}"
|
||||
local cache=$HOME/.cache/sbuild
|
||||
local tarball=$cache/${dist}-${arch}.tar.zst
|
||||
|
||||
if [[ -e $tarball && $force -eq 0 ]]; then
|
||||
info "chroot already exists: $tarball (use --force to rebuild)"
|
||||
else
|
||||
info "bootstrapping $dist/$arch chroot into $tarball"
|
||||
mkdir -p "$cache"
|
||||
mmdebstrap --variant=buildd --arch="$arch" --components="$components" \
|
||||
"$dist" "$tarball" "$mirror"
|
||||
info "chroot ready: $tarball"
|
||||
fi
|
||||
|
||||
local rc=$HOME/.sbuildrc
|
||||
local mode_line="\$chroot_mode = 'unshare';"
|
||||
# shellcheck disable=SC2016 # $chroot_mode is literal regex text, not a shell var.
|
||||
if grep -qsE '^[[:space:]]*\$chroot_mode[[:space:]]*=.*unshare' "$rc"; then
|
||||
: # already configured (active, non-commented line)
|
||||
elif [[ $write_sbuildrc -eq 1 ]]; then
|
||||
info "enabling the unshare backend in $rc"
|
||||
printf '%s\n' "$mode_line" >>"$rc"
|
||||
else
|
||||
cat >&2 <<EOF
|
||||
==> To use this chroot without passing --chroot-mode each time, add to $rc:
|
||||
$mode_line
|
||||
(or rerun with --write-sbuildrc). Then verify with:
|
||||
sbuild --dist=$dist path/to/package.dsc
|
||||
and build the release gate with:
|
||||
tools/mkdeb.sh --source-only --sbuild
|
||||
EOF
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
156
tools/mkdeb.sh
156
tools/mkdeb.sh
@@ -20,11 +20,27 @@
|
||||
# Options:
|
||||
# -k, --key KEYID GPG key for signing (default: $DEBSIGN_KEYID)
|
||||
# -o, --outdir DIR output directory (default: <repo>/dist)
|
||||
# --orig FILE reuse this upstream orig tarball instead of
|
||||
# regenerating it (required for a Debian revision
|
||||
# >= 2, whose orig is frozen in the archive)
|
||||
# -s, --source-only build only the source package
|
||||
# -u, --unsigned do not sign anything (implies no release sigs)
|
||||
# --no-release-artifacts skip the orig tarball .asc/.md5/.sha1
|
||||
# --sbuild additionally build the .dsc in a clean sbuild
|
||||
# chroot as a from-scratch verification gate
|
||||
# -h, --help show this help
|
||||
#
|
||||
# --sbuild reproduces the buildd environment: it builds the source package in a
|
||||
# minimal chroot holding only the declared Build-Depends, so an FTBFS or a
|
||||
# missing dependency fails here instead of on the archive's buildds (which, with
|
||||
# a source-only upload, are otherwise the first clean build). It needs an sbuild
|
||||
# chroot for the changelog's distribution; create one once with the companion
|
||||
# tools/mk-sbuild-chroot.sh (rootless unshare backend).
|
||||
#
|
||||
# The Debian revision in debian/changelog decides the orig: revision 1 builds a
|
||||
# fresh upstream tarball; revision >= 2 must reuse the orig frozen at revision 1
|
||||
# (the .dsc references it by checksum), so pass it with --orig.
|
||||
#
|
||||
# SOURCE_DATE_EPOCH is honored for reproducible output.
|
||||
|
||||
set -euo pipefail
|
||||
@@ -57,9 +73,11 @@ need() {
|
||||
main() {
|
||||
local key=${DEBSIGN_KEYID:-}
|
||||
local outdir=""
|
||||
local orig_in=""
|
||||
local source_only=0
|
||||
local unsigned=0
|
||||
local release_artifacts=1
|
||||
local sbuild=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
@@ -73,6 +91,11 @@ main() {
|
||||
outdir=$2
|
||||
shift 2
|
||||
;;
|
||||
--orig)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
orig_in=$2
|
||||
shift 2
|
||||
;;
|
||||
-s | --source-only)
|
||||
source_only=1
|
||||
shift
|
||||
@@ -85,6 +108,10 @@ main() {
|
||||
release_artifacts=0
|
||||
shift
|
||||
;;
|
||||
--sbuild)
|
||||
sbuild=1
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
@@ -95,7 +122,8 @@ main() {
|
||||
esac
|
||||
done
|
||||
|
||||
need git autoreconf debuild dcmd
|
||||
need git autoreconf debuild dcmd dpkg-parsechangelog
|
||||
[[ $sbuild -eq 1 ]] && need sbuild
|
||||
if [[ $unsigned -eq 0 ]]; then
|
||||
need gpg
|
||||
[[ -n $key ]] || die "no signing key (pass --key or set DEBSIGN_KEYID, or use --unsigned)"
|
||||
@@ -107,6 +135,11 @@ main() {
|
||||
mkdir -p "$outdir"
|
||||
outdir=$(cd "$outdir" && pwd)
|
||||
|
||||
if [[ -n $orig_in ]]; then
|
||||
[[ -r $orig_in ]] || die "--orig file not readable: $orig_in"
|
||||
orig_in=$(cd "$(dirname "$orig_in")" && pwd)/$(basename "$orig_in")
|
||||
fi
|
||||
|
||||
scratch=$(mktemp -d "${TMPDIR:-/tmp}/httrack-mkdeb.XXXXXX")
|
||||
trap 'rm -rf -- "$scratch"' EXIT
|
||||
|
||||
@@ -118,45 +151,65 @@ main() {
|
||||
git -C "$repo/src/coucal" archive --format=tar --prefix=src/coucal/ HEAD |
|
||||
tar -x -C "$export_dir"
|
||||
|
||||
# Refresh build system and man page, then build the tarball. We build here
|
||||
# only because regen-man needs the compiled binaries; the test suite is not
|
||||
# run in this pass. debuild (below) runs the full suite once, with the online
|
||||
# tests enabled, so a check here would just be a slower, offline-only repeat.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
# Build the tarball from a clean tree so no object files leak into it.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
# Upstream version and Debian revision drive the orig: revision 1 builds a
|
||||
# fresh tarball, revision >= 2 reuses the one frozen at -1 (the .dsc pins it
|
||||
# by checksum, so a regenerated orig with new mtimes would be rejected).
|
||||
local fullver ver rev
|
||||
fullver=$(cd "$export_dir" && dpkg-parsechangelog -S Version)
|
||||
ver=${fullver%-*}
|
||||
rev=${fullver##*-}
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
info "version $ver (Debian revision $rev)"
|
||||
|
||||
local tarball ver
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
tarball=${tarballs[0]##*/}
|
||||
ver=${tarball#httrack-}
|
||||
ver=${ver%.tar.gz}
|
||||
info "version $ver"
|
||||
# A signed build is upload-bound, so a revision >= 2 must reuse the frozen
|
||||
# orig (--orig); an unsigned build is a throwaway (CI, local) and may
|
||||
# regenerate it, since it can never reach the archive.
|
||||
if [[ -z $orig_in && $rev != 1 && $unsigned -eq 0 ]]; then
|
||||
die "Debian revision $rev needs --orig FILE (the orig is frozen from revision 1)"
|
||||
fi
|
||||
|
||||
if [[ -n $orig_in ]]; then
|
||||
info "reusing upstream tarball $orig_in"
|
||||
cp -- "$orig_in" "$scratch/$orig"
|
||||
else
|
||||
# Refresh build system and man page, then build the tarball. We build
|
||||
# here only because regen-man needs the compiled binaries; the test
|
||||
# suite is not run in this pass. debuild (below) runs the full suite
|
||||
# once, online tests enabled, so a check here would just repeat it.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
# Build the tarball from a clean tree so no object files leak in.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
local tarball=${tarballs[0]##*/}
|
||||
[[ $tarball == "httrack-$ver.tar.gz" ]] ||
|
||||
die "changelog version $ver disagrees with built tarball $tarball (configure.ac mismatch?)"
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
fi
|
||||
|
||||
# 3.0 (quilt): orig tarball is upstream-only; debian/ is overlaid on top.
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
(
|
||||
cd "$scratch"
|
||||
tar -xf "$orig"
|
||||
[[ -d httrack-$ver ]] || die "orig tarball does not unpack to httrack-$ver/"
|
||||
cp -a "$export_dir/debian" "httrack-$ver/debian"
|
||||
)
|
||||
|
||||
# Build (debuild also runs lintian and signs). --fail-on aborts on a lintian
|
||||
# error or warning, so neither a release nor CI produces an unclean package.
|
||||
local -a debuild_opts=(--lintian-opts -I -i "--fail-on=error,warning")
|
||||
# Build and sign. debuild runs lintian too but does NOT propagate its exit
|
||||
# status, so a broken package would pass unnoticed; disable it here and run
|
||||
# lintian ourselves below as the real gate.
|
||||
local -a debuild_opts=(--no-lintian)
|
||||
local -a build_opts=()
|
||||
[[ $source_only -eq 1 ]] && build_opts+=(-S)
|
||||
if [[ $unsigned -eq 1 ]]; then
|
||||
@@ -167,7 +220,8 @@ main() {
|
||||
info "building packages with debuild"
|
||||
(
|
||||
cd "$scratch/httrack-$ver"
|
||||
debuild "${build_opts[@]}" "${debuild_opts[@]}"
|
||||
# debuild options (--no-lintian) must precede the dpkg-buildpackage ones
|
||||
debuild "${debuild_opts[@]}" "${build_opts[@]}"
|
||||
)
|
||||
|
||||
# Collect every file the .changes references (orig, dsc, debs, ddebs, buildinfo).
|
||||
@@ -177,11 +231,49 @@ main() {
|
||||
changes=("$scratch"/*.changes)
|
||||
shopt -u nullglob
|
||||
[[ ${#changes[@]} -ge 1 ]] || die "debuild produced no .changes file"
|
||||
|
||||
# The real lintian gate (debuild only reports, it does not fail on tags).
|
||||
# --profile debian: CI runners are Ubuntu, whose vendor data would wrongly
|
||||
# reject the Debian "unstable" distribution. newer-standards-version only
|
||||
# means the local lintian is older than the buildds', not a package
|
||||
# defect, so suppress it. set -e turns any error/warning tag into a failure.
|
||||
info "running lintian gate (--fail-on=error,warning)"
|
||||
lintian --profile debian -I -i --fail-on=error,warning \
|
||||
--suppress-tags newer-standards-version "${changes[@]}"
|
||||
|
||||
dcmd cp -- "${changes[@]}" "$outdir/"
|
||||
|
||||
# Clean-room build gate: rebuild the source package in a minimal chroot that
|
||||
# holds only the declared Build-Depends, the same way the buildds will. An
|
||||
# undeclared dependency or any FTBFS aborts the release here instead of
|
||||
# surfacing after a source-only upload. Logs and clean-built debs land in
|
||||
# $outdir/sbuild for inspection.
|
||||
if [[ $sbuild -eq 1 ]]; then
|
||||
local -a dscs
|
||||
shopt -s nullglob
|
||||
dscs=("$scratch"/*.dsc)
|
||||
shopt -u nullglob
|
||||
[[ ${#dscs[@]} -ge 1 ]] || die "no .dsc to sbuild"
|
||||
|
||||
local dist
|
||||
dist=$(cd "$scratch/httrack-$ver" && dpkg-parsechangelog -S Distribution)
|
||||
[[ $dist == UNRELEASED ]] && dist=unstable
|
||||
|
||||
info "clean-room build with sbuild (dist $dist)"
|
||||
local sbdir=$outdir/sbuild
|
||||
rm -rf -- "$sbdir"
|
||||
mkdir -p "$sbdir"
|
||||
(cd "$sbdir" && sbuild --dist="$dist" -- "${dscs[0]}")
|
||||
info "sbuild clean-room build passed; logs in $sbdir"
|
||||
fi
|
||||
|
||||
# Release artifacts for the upstream tarball (detached sig + checksums).
|
||||
# A Debian revision >= 2 .changes omits the orig (it is already in the
|
||||
# archive), so dcmd above won't have copied it; place it from the build tree
|
||||
# so the website artifacts are produced regardless of the revision.
|
||||
if [[ $release_artifacts -eq 1 && $unsigned -eq 0 ]]; then
|
||||
info "signing upstream tarball"
|
||||
cp -- "$scratch/$orig" "$outdir/$orig"
|
||||
(
|
||||
cd "$outdir"
|
||||
gpg --armor --detach-sign --yes -u "$key" -- "$orig"
|
||||
|
||||
Reference in New Issue
Block a user