mirror of
https://github.com/xroche/httrack.git
synced 2026-06-23 10:37:50 +03:00
Compare commits
7 Commits
fix/empty-
...
dns-resolv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
71bece09fd | ||
|
|
54f5717057 | ||
|
|
40fc9de360 | ||
|
|
4614eefefe | ||
|
|
b0e8262db0 | ||
|
|
addbd3136b | ||
|
|
a64c4cd160 |
@@ -1,6 +1,6 @@
|
||||
AC_PREREQ([2.71])
|
||||
|
||||
AC_INIT([httrack], [3.49.8], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_INIT([httrack], [3.49.9], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_COPYRIGHT([
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 1998-2015 Xavier Roche and other contributors
|
||||
@@ -29,9 +29,10 @@ AC_CONFIG_SRCDIR(src/httrack.c)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
AM_INIT_AUTOMAKE([subdir-objects])
|
||||
# 4:0:0: htsblk gained the contenttype_given field, an incompatible ABI break,
|
||||
# so bump current and reset revision/age.
|
||||
VERSION_INFO="4:0:0"
|
||||
# 3:1:0: 3.49.9 changed code but not the exported interface vs 3.49.8 (same 164
|
||||
# symbols, no struct-layout change), so bump revision only. (3:0:0 was the htsblk
|
||||
# mime-buffer widening, an ABI break that moved the soname .so.2 -> .so.3.)
|
||||
VERSION_INFO="3:1:0"
|
||||
AM_MAINTAINER_MODE
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
|
||||
15
debian/changelog
vendored
15
debian/changelog
vendored
@@ -1,12 +1,13 @@
|
||||
httrack (3.49.8-3) unstable; urgency=medium
|
||||
httrack (3.49.9-1) unstable; urgency=medium
|
||||
|
||||
* Rename libhttrack3 to libhttrack4 to follow the SONAME bump to
|
||||
libhttrack.so.4: htsblk gained a contenttype_given field, an
|
||||
incompatible ABI change (VERSION_INFO 3 -> 4). The .files wildcard
|
||||
now tracks .so.4* so the runtime libraries land in the right
|
||||
package. New binary package, via NEW.
|
||||
* New upstream release: Content-Type and file-type detection fixes (trust a
|
||||
declared Content-Type over a binary URL extension, honor --assume under the
|
||||
delayed type check, keep a known extension against a bogus or empty
|
||||
Content-Type, and avoid an uninitialised read on an empty Content-Type), and
|
||||
restored C++ source-compatibility of the installed headers so reverse
|
||||
dependencies (httraqt) build again.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sat, 20 Jun 2026 19:46:16 +0200
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 21 Jun 2026 17:59:38 +0200
|
||||
|
||||
httrack (3.49.8-2) unstable; urgency=medium
|
||||
|
||||
|
||||
6
debian/control
vendored
6
debian/control
vendored
@@ -58,13 +58,13 @@ Description: webhttrack common files
|
||||
This package is the common files of webhttrack, website copier and
|
||||
mirroring utility
|
||||
|
||||
Package: libhttrack4
|
||||
Package: libhttrack3
|
||||
Architecture: any
|
||||
Multi-Arch: same
|
||||
Section: libs
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}
|
||||
Replaces: libhttrack3, httrack (<< 3.49.8-3~)
|
||||
Breaks: libhttrack3, httrack (<< 3.49.8-3~)
|
||||
Replaces: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Breaks: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Description: Httrack website copier library
|
||||
This package is the library part of httrack, website copier and mirroring
|
||||
utility
|
||||
|
||||
118
debian/copyright
vendored
118
debian/copyright
vendored
@@ -1,21 +1,109 @@
|
||||
This package was debianized by Xavier Roche <roche@httrack.com> on
|
||||
Fri, 27 Sep 2002 16:42:26 +0200
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: httrack
|
||||
Upstream-Contact: Xavier Roche <roche@httrack.com>
|
||||
Source: https://www.httrack.com/
|
||||
|
||||
The current Debian maintainer is Xavier Roche <xavier@debian.org>
|
||||
Files: *
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: GPL-3+
|
||||
Comment:
|
||||
The engine includes contributions from Yann Philippot (src/htsjava.c,
|
||||
src/htsjava.h). htsbasenet.h links against the system OpenSSL library
|
||||
(originally by Eric Young); no OpenSSL/SSLeay code is bundled here.
|
||||
|
||||
Upstream author: Xavier Roche <roche@httrack.com>
|
||||
Files: src/minizip/*
|
||||
Copyright: 1998-2010 Gilles Vollant
|
||||
2007-2008 Even Rouault
|
||||
2009-2010 Mathias Svensson
|
||||
1990-2000 Info-ZIP
|
||||
License: Zlib
|
||||
Comment:
|
||||
The decryption code in src/minizip/crypt.h and src/minizip/unzip.c derives
|
||||
from the Info-ZIP distribution, distributed under the same terms.
|
||||
|
||||
Copyright: 1998-2014 Xavier Roche and other contributors
|
||||
Files: src/md5.c
|
||||
Copyright: 1993 Colin Plumb
|
||||
License: public-domain-md5
|
||||
This code implements the MD5 message-digest algorithm, due to Ron Rivest.
|
||||
It was written by Colin Plumb in 1993, no copyright is claimed. This code
|
||||
is in the public domain; do with it what you wish.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Files: src/coucal/*
|
||||
Copyright: 2013-2014 Xavier Roche
|
||||
License: BSD-3-clause
|
||||
|
||||
On Debian systems, the complete text of the GNU General Public
|
||||
License version 3 can be found in /usr/share/common-licenses/GPL-3 file.
|
||||
Files: src/coucal/murmurhash3.h*
|
||||
Copyright: Austin Appleby
|
||||
License: public-domain-murmurhash3
|
||||
MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Files: html/server/div/com.httrack.WebHTTrack.metainfo.xml
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: FSFAP
|
||||
Copying and distribution of this file, with or without modification, are
|
||||
permitted in any medium without royalty provided the copyright notice and
|
||||
this notice are preserved. This file is offered as-is, without any warranty.
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2002-2026 Xavier Roche <xavier@debian.org>
|
||||
License: GPL-3+
|
||||
|
||||
License: GPL-3+
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public License
|
||||
version 3 can be found in /usr/share/common-licenses/GPL-3.
|
||||
|
||||
License: Zlib
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the
|
||||
use of this software.
|
||||
.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
.
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
License: BSD-3-clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
3
debian/libhttrack3.files
vendored
Normal file
3
debian/libhttrack3.files
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
usr/lib/*/libhttrack.so.3*
|
||||
usr/lib/*/libhtsjava.so.3*
|
||||
usr/share/httrack/templates
|
||||
@@ -1,3 +1,3 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME plus a >= upstream-version dependency, see debian/rules).
|
||||
libhttrack4: no-symbols-control-file usr/lib/*
|
||||
libhttrack3: no-symbols-control-file usr/lib/*
|
||||
3
debian/libhttrack4.files
vendored
3
debian/libhttrack4.files
vendored
@@ -1,3 +0,0 @@
|
||||
usr/lib/*/libhttrack.so.4*
|
||||
usr/lib/*/libhtsjava.so.4*
|
||||
usr/share/httrack/templates
|
||||
2
debian/rules
vendored
2
debian/rules
vendored
@@ -135,7 +135,7 @@ binary-arch: build install
|
||||
dh_makeshlibs -a -X/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest --version-info
|
||||
dh_installdeb -a
|
||||
# we depend on the current version (ABI may change)
|
||||
dh_shlibdeps -a -ldebian/libhttrack4/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_shlibdeps -a -ldebian/libhttrack3/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_gencontrol -a
|
||||
dh_md5sums -a
|
||||
dh_builddeb -a
|
||||
|
||||
@@ -4,6 +4,12 @@ HTTrack Website Copier release history:
|
||||
|
||||
This file lists all changes and fixes that have been made for HTTrack
|
||||
|
||||
3.49-9
|
||||
+ Fixed: file-type detection from the Content-Type header: trust a declared type over a binary URL extension, honor --assume under the delayed type check, and keep a known extension against a bogus or empty Content-Type (#267, #29, #56)
|
||||
+ Fixed: an uninitialized-buffer read when the Content-Type is empty (#411)
|
||||
+ Fixed: restored C++ source-compatibility of the installed headers so reverse dependencies (httraqt) build again (#413)
|
||||
+ Changed: multiple internal build, packaging and test-harness improvements
|
||||
|
||||
3.49-8
|
||||
+ New: tunnel HTTPS downloads through the configured HTTP proxy via CONNECT (#85)
|
||||
+ New: parse every candidate URL in <img> and <source> srcset lists (#326)
|
||||
|
||||
@@ -56,7 +56,7 @@ whttrackrundir = $(bindir)
|
||||
whttrackrun_SCRIPTS = webhttrack
|
||||
|
||||
libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
|
||||
htscache_selftest.c \
|
||||
htscache_selftest.c htsdns_selftest.c \
|
||||
htscatchurl.c htsfilters.c htsftp.c htshash.c coucal/coucal.c \
|
||||
htshelp.c htslib.c htscoremain.c \
|
||||
htsname.c htsrobots.c htstools.c htswizard.c \
|
||||
@@ -66,7 +66,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
|
||||
md5.c \
|
||||
minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
|
||||
hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \
|
||||
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htscatchurl.h \
|
||||
htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htsdns_selftest.h htscatchurl.h \
|
||||
htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
|
||||
htsfilters.h htsftp.h htsglobal.h htshash.h coucal/coucal.h \
|
||||
htshelp.h htsindex.h htslib.h htsmd5.h \
|
||||
|
||||
@@ -3703,9 +3703,9 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->maxsoc > 0)
|
||||
to->maxsoc = from->maxsoc;
|
||||
|
||||
/* hts_boolean/enum fields are unsigned (GCC), so a bare `> -1` unset-guard
|
||||
is always false; cast to int to keep the -1 "unset" sentinel test. */
|
||||
if ((int) from->nearlink > -1)
|
||||
/* hts_tristate fields use HTS_DEFAULT (-1) for "unspecified": copy_htsopt
|
||||
skips them so the target keeps its value. */
|
||||
if (from->nearlink > -1)
|
||||
to->nearlink = from->nearlink;
|
||||
|
||||
if (from->timeout > -1)
|
||||
@@ -3732,10 +3732,10 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->hostcontrol > -1)
|
||||
to->hostcontrol = from->hostcontrol;
|
||||
|
||||
if ((int) from->errpage > -1)
|
||||
if (from->errpage > -1)
|
||||
to->errpage = from->errpage;
|
||||
|
||||
if ((int) from->parseall > -1)
|
||||
if (from->parseall > -1)
|
||||
to->parseall = from->parseall;
|
||||
|
||||
// test all: bit 8 de travel
|
||||
|
||||
@@ -47,6 +47,7 @@ Please visit our Website: http://www.httrack.com
|
||||
#include "htscharset.h"
|
||||
#include "htsencoding.h"
|
||||
#include "htscache_selftest.h"
|
||||
#include "htsdns_selftest.h"
|
||||
#include "htsmd5.h"
|
||||
|
||||
#include <ctype.h>
|
||||
@@ -2460,6 +2461,13 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'D': { // DNS resolver/cache self-test (mock getaddrinfo)
|
||||
const int err = dns_selftests(opt);
|
||||
|
||||
printf("dns-selftest: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
} break;
|
||||
case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
|
||||
{
|
||||
int hasFilter = 0;
|
||||
@@ -2579,7 +2587,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
(r.size >= 0) ? r.size : (-r.size));
|
||||
if (r.contenttype >= 0) {
|
||||
fprintf(stdout, "Content-Type: %s\r\n",
|
||||
r.contenttype);
|
||||
hts_effective_mime(r.contenttype));
|
||||
}
|
||||
if (r.cdispo[0]) {
|
||||
fprintf(stdout, "Content-Disposition: %s\r\n",
|
||||
@@ -3166,6 +3174,16 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (to->parseall != HTS_FALSE)
|
||||
err = 1;
|
||||
|
||||
/* HTS_DEFAULT (-1) is "unspecified": copy_htsopt must skip it,
|
||||
leaving the target intact. Only a signed (int-backed) field
|
||||
can hold -1, so this also guards the type against regressing
|
||||
to an unsigned hts_boolean. */
|
||||
from->parseall = HTS_DEFAULT;
|
||||
to->parseall = HTS_TRUE;
|
||||
copy_htsopt(from, to);
|
||||
if (to->parseall != HTS_TRUE)
|
||||
err = 1;
|
||||
|
||||
hts_free_opt(from);
|
||||
hts_free_opt(to);
|
||||
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
|
||||
|
||||
254
src/htsdns_selftest.c
Normal file
254
src/htsdns_selftest.c
Normal file
@@ -0,0 +1,254 @@
|
||||
/* ------------------------------------------------------------ */
|
||||
/*
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 2026 Xavier Roche and other contributors
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Ethical use: we kindly ask that you NOT use this software to harvest email
|
||||
addresses or to collect any other private information about people. Doing so
|
||||
would dishonor our work and waste the many hours we have spent on it.
|
||||
|
||||
Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* File: htsdns_selftest.c subroutines: */
|
||||
/* in-process self-test for the DNS resolver and cache */
|
||||
/* Author: Xavier Roche */
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
/* Routes the resolver through a scripted getaddrinfo (hts_resolver_backend)
|
||||
instead of the network, so resolution and the DNS cache are testable for a
|
||||
fixed set of scenarios (IPv4/IPv6/dual-stack, errors, family filter,
|
||||
cache reuse) with no live DNS. */
|
||||
|
||||
#define HTS_INTERNAL_BYTECODE
|
||||
|
||||
#include "htsdns_selftest.h"
|
||||
|
||||
#include "htscore.h"
|
||||
#include "htslib.h"
|
||||
#include "htsnet.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if HTS_INET6 != 0
|
||||
|
||||
/* IPV6_resolver: 0 = v4+v6, 1 = v4 only, 2 = v6 only (htscoremain -@i). */
|
||||
extern int IPV6_resolver;
|
||||
|
||||
/* One scripted host: either a getaddrinfo error, or an ordered address list. */
|
||||
typedef struct mock_addr {
|
||||
int family; /* AF_INET / AF_INET6 */
|
||||
unsigned char addr[16]; /* 4 (v4) or 16 (v6) meaningful bytes */
|
||||
} mock_addr;
|
||||
|
||||
typedef struct mock_host {
|
||||
const char *name;
|
||||
int gai_err; /* non-zero: getaddrinfo returns this */
|
||||
int naddr;
|
||||
mock_addr addr[3];
|
||||
int calls; /* times the backend resolved this host */
|
||||
} mock_host;
|
||||
|
||||
static mock_host mock_hosts[] = {
|
||||
{"v4only.test", 0, 1, {{AF_INET, {1, 2, 3, 4}}}, 0},
|
||||
{"v6only.test", 0, 1, {{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 1}}}, 0},
|
||||
/* dual stack, IPv6 first (RFC 6724 order) then IPv4 */
|
||||
{"dual.test",
|
||||
0,
|
||||
2,
|
||||
{{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 2}}, {AF_INET, {5, 6, 7, 8}}},
|
||||
0},
|
||||
/* dual stack, IPv4 first: distinguishes "keep the first address" from
|
||||
"prefer a family", so the selection contract is actually pinned. */
|
||||
{"dual4.test",
|
||||
0,
|
||||
2,
|
||||
{{AF_INET, {9, 10, 11, 12}},
|
||||
{AF_INET6, {0x20, 0x01, 0x0d, 0xb8, [15] = 3}}},
|
||||
0},
|
||||
{"nodns.test", EAI_NONAME, 0, {{0}}, 0},
|
||||
};
|
||||
|
||||
static mock_host *mock_find(const char *name) {
|
||||
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++) {
|
||||
if (strcmp(mock_hosts[i].name, name) == 0)
|
||||
return &mock_hosts[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mock_reset_calls(void) {
|
||||
for (size_t i = 0; i < sizeof(mock_hosts) / sizeof(mock_hosts[0]); i++)
|
||||
mock_hosts[i].calls = 0;
|
||||
}
|
||||
|
||||
/* Build one addrinfo node owning its sockaddr (freed by mock_freeaddrinfo). */
|
||||
static struct addrinfo *mock_mkai(const mock_addr *a) {
|
||||
struct addrinfo *ai = calloct(1, sizeof(*ai));
|
||||
|
||||
ai->ai_family = a->family;
|
||||
if (a->family == AF_INET) {
|
||||
struct sockaddr_in *sin = calloct(1, sizeof(*sin));
|
||||
|
||||
sin->sin_family = AF_INET;
|
||||
memcpy(&sin->sin_addr, a->addr, 4);
|
||||
ai->ai_addr = (struct sockaddr *) sin;
|
||||
ai->ai_addrlen = sizeof(*sin);
|
||||
} else {
|
||||
struct sockaddr_in6 *sin6 = calloct(1, sizeof(*sin6));
|
||||
|
||||
sin6->sin6_family = AF_INET6;
|
||||
memcpy(&sin6->sin6_addr, a->addr, 16);
|
||||
ai->ai_addr = (struct sockaddr *) sin6;
|
||||
ai->ai_addrlen = sizeof(*sin6);
|
||||
}
|
||||
return ai;
|
||||
}
|
||||
|
||||
static int mock_getaddrinfo(const char *node, const char *service,
|
||||
const struct addrinfo *hints,
|
||||
struct addrinfo **res) {
|
||||
mock_host *const h = mock_find(node);
|
||||
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
|
||||
struct addrinfo *head = NULL, *tail = NULL;
|
||||
|
||||
(void) service;
|
||||
*res = NULL;
|
||||
if (h == NULL)
|
||||
return EAI_NONAME;
|
||||
h->calls++; /* a real backend hit; a cached host skips this */
|
||||
if (h->gai_err != 0)
|
||||
return h->gai_err;
|
||||
for (int i = 0; i < h->naddr; i++) {
|
||||
if (want != PF_UNSPEC && want != h->addr[i].family)
|
||||
continue; /* honor the requested family (v4/v6 only) */
|
||||
struct addrinfo *const ai = mock_mkai(&h->addr[i]);
|
||||
|
||||
if (head == NULL)
|
||||
head = ai;
|
||||
else
|
||||
tail->ai_next = ai;
|
||||
tail = ai;
|
||||
}
|
||||
if (head == NULL)
|
||||
return EAI_NONAME; /* filtered to empty, as the libc resolver does */
|
||||
*res = head;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mock_freeaddrinfo(struct addrinfo *res) {
|
||||
while (res != NULL) {
|
||||
struct addrinfo *const next = res->ai_next;
|
||||
|
||||
freet(res->ai_addr);
|
||||
freet(res);
|
||||
res = next;
|
||||
}
|
||||
}
|
||||
|
||||
static const hts_resolver_backend mock_backend = {mock_getaddrinfo,
|
||||
mock_freeaddrinfo};
|
||||
|
||||
static int failures = 0;
|
||||
|
||||
#define CHECK(cond) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
failures++; \
|
||||
fprintf(stderr, "dns-selftest: FAIL at %s:%d: %s\n", __FILE__, __LINE__, \
|
||||
#cond); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Resolve via the uncached entry point; return the address family, or
|
||||
AF_UNSPEC if the host did not resolve. */
|
||||
static int resolve_family_nocache(const char *host) {
|
||||
SOCaddr addr;
|
||||
const char *err = NULL;
|
||||
|
||||
if (hts_dns_resolve_nocache2(host, &addr, &err) == NULL)
|
||||
return AF_UNSPEC;
|
||||
return SOCaddr_sinfamily(addr);
|
||||
}
|
||||
|
||||
int dns_selftests(httrackp *opt) {
|
||||
failures = 0;
|
||||
hts_dns_set_resolver_backend(&mock_backend);
|
||||
|
||||
/* IPv4-only / IPv6-only hosts map to the right family. */
|
||||
IPV6_resolver = 0;
|
||||
CHECK(resolve_family_nocache("v4only.test") == AF_INET);
|
||||
CHECK(resolve_family_nocache("v6only.test") == AF_INET6);
|
||||
|
||||
/* Dual-stack: the current resolver keeps only the *first* address. Both
|
||||
orderings pin that (not a family preference); PR2 (multi-address) widens
|
||||
it. */
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET6); /* v6 listed first */
|
||||
CHECK(resolve_family_nocache("dual4.test") == AF_INET); /* v4 listed first */
|
||||
|
||||
/* Unknown host does not resolve. */
|
||||
CHECK(resolve_family_nocache("nodns.test") == AF_UNSPEC);
|
||||
|
||||
/* Family filter (-@i4 / -@i6) selects v4 / v6 out of the dual-stack host. */
|
||||
IPV6_resolver = 1;
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET);
|
||||
IPV6_resolver = 2;
|
||||
CHECK(resolve_family_nocache("dual.test") == AF_INET6);
|
||||
IPV6_resolver = 0;
|
||||
|
||||
/* Cached driver resolves a host once and reuses the *same* address. */
|
||||
mock_reset_calls();
|
||||
{
|
||||
SOCaddr a1, a2;
|
||||
char ip1[64], ip2[64];
|
||||
const char *err = NULL;
|
||||
|
||||
CHECK(hts_dns_resolve2(opt, "v4only.test", &a1, &err) != NULL);
|
||||
CHECK(hts_dns_resolve2(opt, "v4only.test", &a2, &err) != NULL);
|
||||
CHECK(mock_find("v4only.test")->calls == 1);
|
||||
/* the cache returns the right address, not merely a hit for the key */
|
||||
SOCaddr_inetntoa(ip1, sizeof(ip1), a1);
|
||||
SOCaddr_inetntoa(ip2, sizeof(ip2), a2);
|
||||
CHECK(strcmp(ip1, "1.2.3.4") == 0);
|
||||
CHECK(strcmp(ip1, ip2) == 0);
|
||||
}
|
||||
|
||||
/* A negative result is cached too: a second lookup does not re-resolve. */
|
||||
{
|
||||
SOCaddr a1, a2;
|
||||
const char *err = NULL;
|
||||
|
||||
CHECK(hts_dns_resolve2(opt, "nodns.test", &a1, &err) == NULL);
|
||||
CHECK(hts_dns_resolve2(opt, "nodns.test", &a2, &err) == NULL);
|
||||
CHECK(mock_find("nodns.test")->calls == 1); /* resolved once, then cached */
|
||||
}
|
||||
|
||||
hts_dns_set_resolver_backend(NULL);
|
||||
return failures;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int dns_selftests(httrackp *opt) {
|
||||
(void) opt;
|
||||
return 0; /* resolver seam only exists in the IPv6 build */
|
||||
}
|
||||
|
||||
#endif
|
||||
51
src/htsdns_selftest.h
Normal file
51
src/htsdns_selftest.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/* ------------------------------------------------------------ */
|
||||
/*
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 2026 Xavier Roche and other contributors
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Ethical use: we kindly ask that you NOT use this software to harvest email
|
||||
addresses or to collect any other private information about people. Doing so
|
||||
would dishonor our work and waste the many hours we have spent on it.
|
||||
|
||||
Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* File: htsdns_selftest.h */
|
||||
/* Author: Xavier Roche */
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
#ifndef HTSDNS_SELFTEST_DEFH
|
||||
#define HTSDNS_SELFTEST_DEFH
|
||||
|
||||
#ifdef HTS_INTERNAL_BYTECODE
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
||||
#define HTS_DEF_FWSTRUCT_httrackp
|
||||
typedef struct httrackp httrackp;
|
||||
#endif
|
||||
|
||||
/* Drive the DNS resolver and cache through a scripted (mock) getaddrinfo,
|
||||
asserting address family, single-address selection, negative caching, the
|
||||
IPv4/IPv6 family filter, and that a cached host is resolved only once.
|
||||
Returns the number of failed checks (0 == success). */
|
||||
int dns_selftests(httrackp *opt);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -43,8 +43,8 @@ Please visit our Website: http://www.httrack.com
|
||||
configure.ac, decoupled from these). VERSION is the display form, VERSIONID
|
||||
the dotted numeric form, AFF_VERSION the short form shown in footers,
|
||||
LIB_VERSION the data/cache format generation. */
|
||||
#define HTTRACK_VERSION "3.49-8"
|
||||
#define HTTRACK_VERSIONID "3.49.8"
|
||||
#define HTTRACK_VERSION "3.49-9"
|
||||
#define HTTRACK_VERSIONID "3.49.9"
|
||||
#define HTTRACK_AFF_VERSION "3.x"
|
||||
#define HTTRACK_LIB_VERSION "2.0"
|
||||
|
||||
@@ -247,13 +247,23 @@ Please visit our Website: http://www.httrack.com
|
||||
#define HTS_NOPARAM "(none)"
|
||||
#define HTS_NOPARAM2 "\"(none)\""
|
||||
|
||||
/* Boolean flag for option fields and API yes/no returns. An enum (not C bool)
|
||||
so it stays int-sized: option fields keep the httrackp layout/ABI, and a
|
||||
return type stays compatible with the int it replaces. */
|
||||
/* Boolean flag for option fields and API yes/no returns. Int-backed, not an
|
||||
enum: an enum makes C++ reject `field = 1` / `f(0)` on the exported fields
|
||||
and params. Int-sized, so the httrackp layout and the ABI are unchanged. */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
#define HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
|
||||
typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
typedef int hts_boolean;
|
||||
#define HTS_FALSE 0
|
||||
#define HTS_TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
#define HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
/* Tri-state hts_boolean: HTS_DEFAULT (-1) = "unspecified" (copy_htsopt leaves
|
||||
the target untouched); HTS_FALSE/HTS_TRUE = off/on. */
|
||||
typedef int hts_tristate;
|
||||
#define HTS_DEFAULT (-1)
|
||||
#endif
|
||||
|
||||
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||
|
||||
43
src/htslib.c
43
src/htslib.c
@@ -1396,8 +1396,6 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||
void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
const char *a = rcvd;
|
||||
|
||||
retour->contenttype_given = HTS_FALSE; /* set when a Content-Type is seen */
|
||||
|
||||
// exemple:
|
||||
// HTTP/1.0 200 OK
|
||||
if (*a) {
|
||||
@@ -1425,7 +1423,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
else
|
||||
infostatuscode(retour->msg, retour->statuscode);
|
||||
// type MIME par défaut2
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
} else { // pas de code!
|
||||
retour->statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(retour->msg, "Unknown response structure");
|
||||
@@ -1440,7 +1438,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
retour->statuscode = HTTP_OK;
|
||||
retour->keep_alive = 0;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
} else if (strnotempty(a)) {
|
||||
retour->statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(retour->msg, "Unknown (not HTTP/xx) response structure");
|
||||
@@ -1449,7 +1447,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
retour->statuscode = HTTP_OK;
|
||||
retour->keep_alive = 0;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
}
|
||||
}
|
||||
} else { // vide!
|
||||
@@ -1460,7 +1458,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
/* This is dirty .. */
|
||||
retour->statuscode = HTTP_OK;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1591,16 +1589,14 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
|
||||
}
|
||||
}
|
||||
}
|
||||
// An empty/whitespace Content-Type value yields no token; keep the
|
||||
// default type and the "not given" flag instead of reading uninit tempo.
|
||||
// An empty/whitespace Content-Type value yields no token: keep the
|
||||
// sentinel default rather than reading an uninitialized tempo.
|
||||
if (sscanf(rcvd + p, "%s", tempo) == 1) {
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype,
|
||||
"application/octet-stream-unknown"); // erreur
|
||||
retour->contenttype_given =
|
||||
HTS_TRUE; /* server declared a usable type */
|
||||
}
|
||||
}
|
||||
} else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
|
||||
@@ -4318,6 +4314,7 @@ int give_mimext(char *s, size_t ssize, const char *st) {
|
||||
int ok = 0;
|
||||
int j = 0;
|
||||
|
||||
st = hts_effective_mime(st); /* no declared type: derive an html ext */
|
||||
s[0] = '\0';
|
||||
while((!ok) && (strnotempty(hts_mime[j][1]))) {
|
||||
if (strfield2(hts_mime[j][0], st)) {
|
||||
@@ -4818,8 +4815,21 @@ static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCadd
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
|
||||
SOCaddr *const addr,
|
||||
#if HTS_INET6 != 0
|
||||
/* Active resolver backend; defaults to the libc resolver. The self-test
|
||||
reroutes it to script DNS answers in-process (see
|
||||
hts_dns_set_resolver_backend). */
|
||||
static const hts_resolver_backend hts_resolver_libc = {getaddrinfo,
|
||||
freeaddrinfo};
|
||||
static const hts_resolver_backend *hts_resolver = &hts_resolver_libc;
|
||||
|
||||
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend) {
|
||||
hts_resolver = (backend != NULL) ? backend : &hts_resolver_libc;
|
||||
}
|
||||
#endif
|
||||
|
||||
static SOCaddr *hts_dns_resolve_nocache2_(const char *const hostname,
|
||||
SOCaddr *const addr,
|
||||
const char **error) {
|
||||
{
|
||||
#if HTS_INET6==0
|
||||
@@ -4848,7 +4858,7 @@ static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
|
||||
hints.ai_family = PF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_protocol = IPPROTO_TCP;
|
||||
if ( ( gerr = getaddrinfo(hostname, NULL, &hints, &res) ) == 0) {
|
||||
if ((gerr = hts_resolver->getaddrinfo(hostname, NULL, &hints, &res)) == 0) {
|
||||
if (res != NULL) {
|
||||
if (res->ai_addr != NULL && res->ai_addrlen != 0) {
|
||||
SOCaddr_copyaddr2(*addr, res->ai_addr, res->ai_addrlen);
|
||||
@@ -4860,7 +4870,7 @@ static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
|
||||
}
|
||||
}
|
||||
if (res) {
|
||||
freeaddrinfo(res);
|
||||
hts_resolver->freeaddrinfo(res);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -5308,6 +5318,11 @@ static int get_loglevel_from_coucal(coucal_loglevel level) {
|
||||
static void default_coucal_loghandler(void *arg, coucal_loglevel level,
|
||||
const char* format, va_list args) {
|
||||
|
||||
/* informational chatter (hashtable stats on delete, etc.) only when
|
||||
debugging; keep warnings and critical errors always visible. */
|
||||
if (level > coucal_log_warning && hts_dgb_init <= 0) {
|
||||
return;
|
||||
}
|
||||
if (level <= coucal_log_warning) {
|
||||
fprintf(stderr, "** warning: ");
|
||||
}
|
||||
|
||||
18
src/htslib.h
18
src/htslib.h
@@ -481,10 +481,22 @@ HTS_STATIC int strcmpnocase(const char *a, const char *b) {
|
||||
|
||||
// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
|
||||
#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
|
||||
/* Sentinel stored when the server declared no Content-Type. It is html-ish
|
||||
for every type test (so a typeless response still parses/stores as today),
|
||||
but the naming code (wire_patches_ext) treats it as "no declared type" and
|
||||
keeps the URL extension. It rides the cache, so updates name consistently. */
|
||||
#define HTS_UNKNOWN_MIME "unknown/unknown"
|
||||
/* Map the no-declared-type sentinel back to a real type for any header or
|
||||
record we EMIT or PERSIST, so "unknown/unknown" never reaches a consumer
|
||||
(a served Content-Type, a ProxyTrack .arc record, ...). */
|
||||
#define hts_effective_mime(m) \
|
||||
(strfield2((m), HTS_UNKNOWN_MIME) ? HTS_HYPERTEXT_DEFAULT_MIME : (m))
|
||||
|
||||
#define is_html_mime_type(a) \
|
||||
( (strfield2((a),"text/html")!=0)\
|
||||
|| (strfield2((a),"application/xhtml+xml")!=0) \
|
||||
#define is_html_mime_type(a) \
|
||||
((strfield2((a), "text/html") != 0) || \
|
||||
(strfield2((a), "application/xhtml+xml") != 0) || \
|
||||
(strfield2((a), HTS_UNKNOWN_MIME) != \
|
||||
0) /* no declared type: treat as html */ \
|
||||
)
|
||||
#define is_hypertext_mime__(a) \
|
||||
( \
|
||||
|
||||
@@ -142,10 +142,11 @@ static void cleanEndingSpaceOrDot(char *s) {
|
||||
saved file? True when the type is patchable (may_unknown2) and either the URL
|
||||
extension implies no specific type or the server declared a disagreeing one.
|
||||
A URL extension mapping to a specific non-HTML type is kept only when the
|
||||
server sent NO Content-Type (the #267 mangle guard): a typeless .png stays
|
||||
.png, but a .pdf explicitly served as text/html is named .html. */
|
||||
server declared NO type (the HTS_UNKNOWN_MIME sentinel; the #267 mangle
|
||||
guard): a typeless .png stays .png, but a .pdf explicitly served as text/html
|
||||
is named .html. The sentinel rides the cache, so updates stay consistent. */
|
||||
static int wire_patches_ext(httrackp *opt, const char *wiremime,
|
||||
const char *file, int contenttype_given) {
|
||||
const char *file) {
|
||||
char urlmime[256];
|
||||
|
||||
if (may_unknown2(opt, wiremime, file))
|
||||
@@ -157,11 +158,11 @@ static int wire_patches_ext(httrackp *opt, const char *wiremime,
|
||||
if (strfield2(wiremime, urlmime))
|
||||
return 0; /* wire agrees with the ext: keep it (no .htm->.html churn) */
|
||||
/* wire disagrees with a specific non-HTML URL ext. Keep the ext only when
|
||||
the server sent NO Content-Type: a missing type is defaulted to text/html
|
||||
upstream and must not clobber e.g. a .png. An explicitly declared type is
|
||||
trusted, so a binary-looking URL that really serves HTML (login/error
|
||||
interstitial, soft-404) is named .html instead of kept as .pdf/.jpg. */
|
||||
if (!is_hypertext_mime(opt, urlmime, file) && !contenttype_given)
|
||||
the server declared no type (the sentinel); an explicitly declared type,
|
||||
even text/html, is trusted, so a binary-looking URL that really serves
|
||||
HTML (login/error interstitial, soft-404) is named .html. */
|
||||
if (!is_hypertext_mime(opt, urlmime, file) &&
|
||||
strfield2(wiremime, HTS_UNKNOWN_MIME))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@@ -411,8 +412,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, r.cdispo);
|
||||
} else if (wire_patches_ext(opt, r.contenttype, fil,
|
||||
r.contenttype_given)) {
|
||||
} else if (wire_patches_ext(opt, r.contenttype, fil)) {
|
||||
if (give_mimext(s, sizeof(s),
|
||||
r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
@@ -458,8 +458,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, headers->r.cdispo);
|
||||
} else if (wire_patches_ext(opt, headers->r.contenttype,
|
||||
headers->url_fil,
|
||||
headers->r.contenttype_given)) {
|
||||
headers->url_fil)) {
|
||||
char s[16];
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
@@ -675,7 +674,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (!has_been_moved) {
|
||||
if (back[b].r.statuscode != -10) { // erreur
|
||||
if (strnotempty(back[b].r.contenttype) == 0)
|
||||
strcpybuff(back[b].r.contenttype, "text/html"); // message d'erreur en html
|
||||
strcpybuff(back[b].r.contenttype,
|
||||
HTS_UNKNOWN_MIME); // no declared type
|
||||
// Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
|
||||
// libérer emplacement backing
|
||||
}
|
||||
@@ -688,8 +688,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, back[b].r.cdispo);
|
||||
} else if (wire_patches_ext(opt, back[b].r.contenttype,
|
||||
back[b].url_fil,
|
||||
back[b].r.contenttype_given)) {
|
||||
back[b].url_fil)) {
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
back[b].r.contenttype)) { // recognized extension
|
||||
|
||||
16
src/htsnet.h
16
src/htsnet.h
@@ -304,6 +304,22 @@ static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||
/** Length type for socket APIs (getsockname, accept, ...). */
|
||||
typedef socklen_t SOClen;
|
||||
|
||||
#if HTS_INET6 != 0
|
||||
/** Resolver backend: getaddrinfo/freeaddrinfo as a swappable pair, so the
|
||||
self-test can script DNS answers (families, multiplicity, errors)
|
||||
in-process. The free function must match its getaddrinfo (a fake allocates
|
||||
its own chain), hence the pair. */
|
||||
typedef struct hts_resolver_backend {
|
||||
int (*getaddrinfo)(const char *node, const char *service,
|
||||
const struct addrinfo *hints, struct addrinfo **res);
|
||||
void (*freeaddrinfo)(struct addrinfo *res);
|
||||
} hts_resolver_backend;
|
||||
|
||||
/** Install a resolver backend for the process; NULL restores the libc default.
|
||||
Test-only seam, not thread-safe; callers must serialize against resolves. */
|
||||
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
12
src/htsopt.h
12
src/htsopt.h
@@ -428,11 +428,11 @@ struct httrackp {
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
hts_boolean makeindex; /**< build a top-level index.html */
|
||||
hts_boolean kindex; /**< build a keyword index */
|
||||
hts_boolean delete_old; /**< delete locally obsolete files after update */
|
||||
hts_tristate delete_old; /**< delete locally obsolete files after update */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
@@ -465,13 +465,13 @@ struct httrackp {
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_tristate errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
hts_boolean external; /**< render external links as error pages */
|
||||
hts_tristate external; /**< render external links as error pages */
|
||||
hts_boolean passprivacy; /**< strip passwords from external links */
|
||||
hts_boolean includequery; /**< include the query string in saved names */
|
||||
hts_boolean mirror_first_page; /**< only mirror the links of the first page */
|
||||
@@ -485,7 +485,7 @@ struct httrackp {
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
@@ -651,8 +651,6 @@ struct htsblk {
|
||||
int debugid; /**< connection debug id */
|
||||
/* */
|
||||
htsrequest req; /**< parameters used for the request */
|
||||
/* a Content-Type header was received (else contenttype holds a default) */
|
||||
hts_boolean contenttype_given;
|
||||
/*char digest[32+2]; // md5 digest generated by the engine ("" if none) */
|
||||
};
|
||||
|
||||
|
||||
@@ -1176,11 +1176,15 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
if (element != NULL) {
|
||||
msgCode = element->statuscode;
|
||||
StringRoom(headers, 8192);
|
||||
sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n"
|
||||
sprintf(StringBuffRW(headers),
|
||||
"HTTP/1.1 %d %s\r\n"
|
||||
#ifndef NO_WEBDAV
|
||||
"%s"
|
||||
#endif
|
||||
"Content-Type: %s%s%s%s\r\n" "%s%s%s" "%s%s%s" "%s%s%s",
|
||||
"Content-Type: %s%s%s%s\r\n"
|
||||
"%s%s%s"
|
||||
"%s%s%s"
|
||||
"%s%s%s",
|
||||
/* */
|
||||
msgCode, element->msg,
|
||||
#ifndef NO_WEBDAV
|
||||
@@ -1188,16 +1192,18 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
StringBuff(davHeaders),
|
||||
#endif
|
||||
/* Content-type: foo; [ charset=bar ] */
|
||||
element->contenttype,
|
||||
hts_effective_mime(element->contenttype),
|
||||
((element->charset[0]) ? "; charset=\"" : ""),
|
||||
element->charset, ((element->charset[0]) ? "\"" : ""),
|
||||
/* location */
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? "Location: " : ""),
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? element->location : ""),
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? "\r\n" : ""),
|
||||
((element->location != NULL && element->location[0])
|
||||
? "Location: "
|
||||
: ""),
|
||||
((element->location != NULL && element->location[0])
|
||||
? element->location
|
||||
: ""),
|
||||
((element->location != NULL && element->location[0]) ? "\r\n"
|
||||
: ""),
|
||||
/* last-modified */
|
||||
((element->lastmodified[0]) ? "Last-Modified: " : ""),
|
||||
((element->lastmodified[0]) ? element->lastmodified : ""),
|
||||
@@ -1205,8 +1211,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
/* etag */
|
||||
((element->etag[0]) ? "ETag: " : ""),
|
||||
((element->etag[0]) ? element->etag : ""),
|
||||
((element->etag[0]) ? "\r\n" : "")
|
||||
);
|
||||
((element->etag[0]) ? "\r\n" : ""));
|
||||
StringLength(headers) = (int) strlen(StringBuff(headers));
|
||||
} else {
|
||||
/* No query string, no ending / : check the the <url>/ page */
|
||||
|
||||
@@ -52,6 +52,7 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#include "htscore.h"
|
||||
#include "htsback.h"
|
||||
#include "htslib.h" /* hts_effective_mime */
|
||||
|
||||
#include "store.h"
|
||||
#include "proxystrings.h"
|
||||
@@ -2289,10 +2290,17 @@ static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element)
|
||||
int size_headers;
|
||||
|
||||
sprintf(st->headers,
|
||||
"HTTP/1.0 %d %s" "\r\n" "X-Server: ProxyTrack " PROXYTRACK_VERSION
|
||||
"\r\n" "Content-type: %s%s%s%s" "\r\n" "Last-modified: %s" "\r\n"
|
||||
"Content-length: %d" "\r\n", element->statuscode, element->msg,
|
||||
/**/ element->contenttype,
|
||||
"HTTP/1.0 %d %s"
|
||||
"\r\n"
|
||||
"X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n"
|
||||
"Content-type: %s%s%s%s"
|
||||
"\r\n"
|
||||
"Last-modified: %s"
|
||||
"\r\n"
|
||||
"Content-length: %d"
|
||||
"\r\n",
|
||||
element->statuscode, element->msg,
|
||||
/**/ hts_effective_mime(element->contenttype),
|
||||
(element->charset[0] ? "; charset=\"" : ""),
|
||||
(element->charset[0] ? element->charset : ""),
|
||||
(element->charset[0] ? "\"" : ""), /**/ element->lastmodified,
|
||||
@@ -2328,10 +2336,10 @@ static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element)
|
||||
/* args */
|
||||
(link_has_authority(url) ? "" : "http://"), url, "0.0.0.0",
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour,
|
||||
tm->tm_min, tm->tm_sec, element->contenttype, element->statuscode,
|
||||
st->md5, (element->location ? element->location : "-"),
|
||||
(long int) ftell(fp), st->filename,
|
||||
(long int) (size_headers + element->size));
|
||||
tm->tm_min, tm->tm_sec, hts_effective_mime(element->contenttype),
|
||||
element->statuscode, st->md5,
|
||||
(element->location ? element->location : "-"), (long int) ftell(fp),
|
||||
st->filename, (long int) (size_headers + element->size));
|
||||
/* network_doc */
|
||||
if (fwrite(st->headers, 1, size_headers, fp) != size_headers
|
||||
|| (element->size > 0
|
||||
|
||||
15
tests/01_engine-dns.test
Normal file
15
tests/01_engine-dns.test
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# DNS resolver/cache self-test: a mock getaddrinfo (no network) checks address
|
||||
# family, single-address selection, the -@i4/-@i6 family filter, and cache reuse.
|
||||
# The trailing token is required, like the other -# selftests, so a bare command
|
||||
# line isn't treated as "no arguments" and routed to the usage screen.
|
||||
out=$(httrack -#D run)
|
||||
|
||||
test "$out" = "dns-selftest: OK" || {
|
||||
echo "expected 'dns-selftest: OK', got: $out" >&2
|
||||
exit 1
|
||||
}
|
||||
@@ -10,7 +10,7 @@
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/notype.png' \
|
||||
--found 'types/notype.png' --not-found 'types/notype.html' \
|
||||
--found 'types/notype.pdf' --not-found 'types/notype.html' \
|
||||
--found 'types/photo.png' \
|
||||
--found 'types/doc.pdf' \
|
||||
|
||||
15
tests/18_local-update.test
Normal file
15
tests/18_local-update.test
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# A second (update) pass must keep the names the first crawl chose. The stored
|
||||
# Content-Type rides the cache, so the update reads back the same value -- the
|
||||
# unknown/unknown sentinel for a typeless response, the declared type otherwise
|
||||
# -- and names consistently: a declared-text/html .pdf stays .html and a
|
||||
# typeless .png stays .png across the update rather than reverting.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun \
|
||||
--found 'types/report.html' --not-found 'types/report.pdf' \
|
||||
--found 'types/notype.png' --not-found 'types/notype.html' \
|
||||
--found 'types/lie.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
@@ -29,6 +29,7 @@ TESTS = \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-cookies.test \
|
||||
01_engine-copyopt.test \
|
||||
01_engine-dns.test \
|
||||
01_engine-doitlog.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
@@ -54,6 +55,7 @@ TESTS = \
|
||||
14_local-https.test \
|
||||
15_local-types.test \
|
||||
16_local-assume.test \
|
||||
17_local-empty-ct.test
|
||||
17_local-empty-ct.test \
|
||||
18_local-update.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
@@ -26,6 +26,7 @@ key="${testdir}/server.key"
|
||||
|
||||
tls=
|
||||
verbose=
|
||||
rerun=
|
||||
tmpdir=
|
||||
serverpid=
|
||||
crawlpid=
|
||||
@@ -89,6 +90,7 @@ nargs=$#
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
case "${args[$pos]}" in
|
||||
--debug) verbose=1 ;;
|
||||
--rerun) rerun=1 ;; # run httrack a second time (update pass) before auditing
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
audit+=("--no-purge")
|
||||
@@ -180,6 +182,22 @@ test "$crawlres" -eq 0 || ! result "httrack exited $crawlres" || {
|
||||
result "OK"
|
||||
grep -iE "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt" >&2
|
||||
|
||||
# --- optional second pass: re-mirror into the same dir (cache/update path) ----
|
||||
if test -n "$rerun"; then
|
||||
info "re-running httrack (update pass)"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" \
|
||||
"${moreargs[@]}" "${hts[@]}" >"${log}.2" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid"
|
||||
crawlres=$?
|
||||
crawlpid=
|
||||
test "$crawlres" -eq 0 || ! result "update pass exited $crawlres" || {
|
||||
cat "${log}.2" >&2
|
||||
exit 1
|
||||
}
|
||||
result "OK (update)"
|
||||
fi
|
||||
|
||||
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||
hostroot=
|
||||
for cand in "${out}/127.0.0.1_${port}" "${out}/127.0.0.1"; do
|
||||
|
||||
Reference in New Issue
Block a user