mirror of
https://github.com/xroche/httrack.git
synced 2026-06-22 10:07:11 +03:00
Compare commits
25 Commits
3.49.8
...
debian-cop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6fcd0a16e9 | ||
|
|
4614eefefe | ||
|
|
b0e8262db0 | ||
|
|
addbd3136b | ||
|
|
a64c4cd160 | ||
|
|
1611dbcabf | ||
|
|
099501ee50 | ||
|
|
1b9eefa3b4 | ||
|
|
9c8d3a41eb | ||
|
|
ae77cd9d6d | ||
|
|
51b8dcd81c | ||
|
|
bcce664143 | ||
|
|
7a24add87c | ||
|
|
2308e7bafd | ||
|
|
ef5691fc47 | ||
|
|
0a6eb73903 | ||
|
|
fdb243e5a2 | ||
|
|
f8546e146d | ||
|
|
b7f602f2eb | ||
|
|
550100b56a | ||
|
|
33ddb27243 | ||
|
|
4606dfbf66 | ||
|
|
a6f1b9a3dd | ||
|
|
fb35d6a0f1 | ||
|
|
8a270fec03 |
5
.flake8
Normal file
5
.flake8
Normal file
@@ -0,0 +1,5 @@
|
||||
[flake8]
|
||||
# Match black's formatting so the two tools don't fight.
|
||||
max-line-length = 88
|
||||
# E203/W503 conflict with black's slice and line-break style.
|
||||
extend-ignore = E203, W503
|
||||
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
@@ -227,7 +227,8 @@ jobs:
|
||||
# Validate the Debian packaging via the same script maintainers release with.
|
||||
# One amd64/gcc run is enough: packaging (control/rules/manifest/lintian/quilt
|
||||
# source build) is arch- and compiler-independent, and the build matrix above
|
||||
# already covers compile portability. lintian runs with --fail-on=error.
|
||||
# already covers compile portability. mkdeb.sh runs lintian as an explicit gate
|
||||
# (debuild does not propagate lintian's exit) with --fail-on=error,warning.
|
||||
deb:
|
||||
name: deb package (lintian)
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
AC_PREREQ([2.71])
|
||||
|
||||
AC_INIT([httrack], [3.49.8], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_INIT([httrack], [3.49.9], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
|
||||
AC_COPYRIGHT([
|
||||
HTTrack Website Copier, Offline Browser for Windows and Unix
|
||||
Copyright (C) 1998-2015 Xavier Roche and other contributors
|
||||
@@ -29,9 +29,10 @@ AC_CONFIG_SRCDIR(src/httrack.c)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
AM_INIT_AUTOMAKE([subdir-objects])
|
||||
# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to
|
||||
# 128), an incompatible ABI break, so bump current and reset revision/age.
|
||||
VERSION_INFO="3:0:0"
|
||||
# 3:1:0: 3.49.9 changed code but not the exported interface vs 3.49.8 (same 164
|
||||
# symbols, no struct-layout change), so bump revision only. (3:0:0 was the htsblk
|
||||
# mime-buffer widening, an ABI break that moved the soname .so.2 -> .so.3.)
|
||||
VERSION_INFO="3:1:0"
|
||||
AM_MAINTAINER_MODE
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
|
||||
24
debian/changelog
vendored
24
debian/changelog
vendored
@@ -1,3 +1,27 @@
|
||||
httrack (3.49.9-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release: Content-Type and file-type detection fixes (trust a
|
||||
declared Content-Type over a binary URL extension, honor --assume under the
|
||||
delayed type check, keep a known extension against a bogus or empty
|
||||
Content-Type, and avoid an uninitialised read on an empty Content-Type), and
|
||||
restored C++ source-compatibility of the installed headers so reverse
|
||||
dependencies (httraqt) build again.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 21 Jun 2026 17:59:38 +0200
|
||||
|
||||
httrack (3.49.8-2) unstable; urgency=medium
|
||||
|
||||
* Rename libhttrack2 to libhttrack3 to follow the SONAME, which the 3.49.8
|
||||
ABI bump moved to libhttrack.so.3 (package-name-doesnt-match-sonames). In
|
||||
3.49.8-1 the libhttrack2.files glob still matched .so.2, so the runtime
|
||||
libraries fell through into the httrack package and libhttrack2 shipped no
|
||||
library. The new .files uses a .so.3* wildcard so a future SONAME bump no
|
||||
longer silently misplaces the libraries. New binary package, via NEW.
|
||||
* Drop the stale debian/libhttrack-swf1.files: the swf module is no longer
|
||||
built and no libhttrack-swf1 package exists.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sat, 20 Jun 2026 14:42:13 +0200
|
||||
|
||||
httrack (3.49.8-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release: HTTPS-proxy CONNECT tunnelling and wider srcset
|
||||
|
||||
6
debian/control
vendored
6
debian/control
vendored
@@ -58,13 +58,13 @@ Description: webhttrack common files
|
||||
This package is the common files of webhttrack, website copier and
|
||||
mirroring utility
|
||||
|
||||
Package: libhttrack2
|
||||
Package: libhttrack3
|
||||
Architecture: any
|
||||
Multi-Arch: same
|
||||
Section: libs
|
||||
Replaces: libhttrack1
|
||||
Conflicts: libhttrack1
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}
|
||||
Replaces: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Breaks: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Description: Httrack website copier library
|
||||
This package is the library part of httrack, website copier and mirroring
|
||||
utility
|
||||
|
||||
118
debian/copyright
vendored
118
debian/copyright
vendored
@@ -1,21 +1,109 @@
|
||||
This package was debianized by Xavier Roche <roche@httrack.com> on
|
||||
Fri, 27 Sep 2002 16:42:26 +0200
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: httrack
|
||||
Upstream-Contact: Xavier Roche <roche@httrack.com>
|
||||
Source: https://www.httrack.com/
|
||||
|
||||
The current Debian maintainer is Xavier Roche <xavier@debian.org>
|
||||
Files: *
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: GPL-3+
|
||||
Comment:
|
||||
The engine includes contributions from Yann Philippot (src/htsjava.c,
|
||||
src/htsjava.h). htsbasenet.h links against the system OpenSSL library
|
||||
(originally by Eric Young); no OpenSSL/SSLeay code is bundled here.
|
||||
|
||||
Upstream author: Xavier Roche <roche@httrack.com>
|
||||
Files: src/minizip/*
|
||||
Copyright: 1998-2010 Gilles Vollant
|
||||
2007-2008 Even Rouault
|
||||
2009-2010 Mathias Svensson
|
||||
1990-2000 Info-ZIP
|
||||
License: Zlib
|
||||
Comment:
|
||||
The decryption code in src/minizip/crypt.h and src/minizip/unzip.c derives
|
||||
from the Info-ZIP distribution, distributed under the same terms.
|
||||
|
||||
Copyright: 1998-2014 Xavier Roche and other contributors
|
||||
Files: src/md5.c
|
||||
Copyright: 1993 Colin Plumb
|
||||
License: public-domain-md5
|
||||
This code implements the MD5 message-digest algorithm, due to Ron Rivest.
|
||||
It was written by Colin Plumb in 1993, no copyright is claimed. This code
|
||||
is in the public domain; do with it what you wish.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Files: src/coucal/*
|
||||
Copyright: 2013-2014 Xavier Roche
|
||||
License: BSD-3-clause
|
||||
|
||||
On Debian systems, the complete text of the GNU General Public
|
||||
License version 3 can be found in /usr/share/common-licenses/GPL-3 file.
|
||||
Files: src/coucal/murmurhash3.h*
|
||||
Copyright: Austin Appleby
|
||||
License: public-domain-murmurhash3
|
||||
MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Files: html/server/div/com.httrack.WebHTTrack.metainfo.xml
|
||||
Copyright: 1998-2026 Xavier Roche and other contributors
|
||||
License: FSFAP
|
||||
Copying and distribution of this file, with or without modification, are
|
||||
permitted in any medium without royalty provided the copyright notice and
|
||||
this notice are preserved. This file is offered as-is, without any warranty.
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2002-2026 Xavier Roche <xavier@debian.org>
|
||||
License: GPL-3+
|
||||
|
||||
License: GPL-3+
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public License
|
||||
version 3 can be found in /usr/share/common-licenses/GPL-3.
|
||||
|
||||
License: Zlib
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the
|
||||
use of this software.
|
||||
.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
.
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
License: BSD-3-clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
3
debian/httrack-doc.lintian-overrides
vendored
3
debian/httrack-doc.lintian-overrides
vendored
@@ -4,3 +4,6 @@
|
||||
# so the path lives in the display pointer, not the override -- match with '*'.
|
||||
httrack-doc: extra-license-file *
|
||||
httrack-doc: package-contains-documentation-outside-usr-share-doc *
|
||||
# search.sh is a sample CGI shipped alongside the HTML manual, not meant to be
|
||||
# run from the package tree; it stays non-executable by design.
|
||||
httrack-doc: script-not-executable *
|
||||
|
||||
2
debian/libhttrack-swf1.files
vendored
2
debian/libhttrack-swf1.files
vendored
@@ -1,2 +0,0 @@
|
||||
usr/lib/*/libhtsswf.so.1.0.0
|
||||
usr/lib/*/libhtsswf.so.1
|
||||
5
debian/libhttrack2.files
vendored
5
debian/libhttrack2.files
vendored
@@ -1,5 +0,0 @@
|
||||
usr/lib/*/libhttrack.so.2.0.49
|
||||
usr/lib/*/libhttrack.so.2
|
||||
usr/lib/*/libhtsjava.so.2.0.49
|
||||
usr/lib/*/libhtsjava.so.2
|
||||
usr/share/httrack/templates
|
||||
3
debian/libhttrack2.lintian-overrides
vendored
3
debian/libhttrack2.lintian-overrides
vendored
@@ -1,3 +0,0 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME and a strict =version dependency, see debian/rules).
|
||||
libhttrack2: no-symbols-control-file usr/lib/*
|
||||
3
debian/libhttrack3.files
vendored
Normal file
3
debian/libhttrack3.files
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
usr/lib/*/libhttrack.so.3*
|
||||
usr/lib/*/libhtsjava.so.3*
|
||||
usr/share/httrack/templates
|
||||
3
debian/libhttrack3.lintian-overrides
vendored
Normal file
3
debian/libhttrack3.lintian-overrides
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME plus a >= upstream-version dependency, see debian/rules).
|
||||
libhttrack3: no-symbols-control-file usr/lib/*
|
||||
2
debian/rules
vendored
2
debian/rules
vendored
@@ -135,7 +135,7 @@ binary-arch: build install
|
||||
dh_makeshlibs -a -X/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest --version-info
|
||||
dh_installdeb -a
|
||||
# we depend on the current version (ABI may change)
|
||||
dh_shlibdeps -a -ldebian/libhttrack2/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_shlibdeps -a -ldebian/libhttrack3/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_gencontrol -a
|
||||
dh_md5sums -a
|
||||
dh_builddeb -a
|
||||
|
||||
@@ -4,6 +4,12 @@ HTTrack Website Copier release history:
|
||||
|
||||
This file lists all changes and fixes that have been made for HTTrack
|
||||
|
||||
3.49-9
|
||||
+ Fixed: file-type detection from the Content-Type header: trust a declared type over a binary URL extension, honor --assume under the delayed type check, and keep a known extension against a bogus or empty Content-Type (#267, #29, #56)
|
||||
+ Fixed: an uninitialized-buffer read when the Content-Type is empty (#411)
|
||||
+ Fixed: restored C++ source-compatibility of the installed headers so reverse dependencies (httraqt) build again (#413)
|
||||
+ Changed: multiple internal build, packaging and test-harness improvements
|
||||
|
||||
3.49-8
|
||||
+ New: tunnel HTTPS downloads through the configured HTTP proxy via CONNECT (#85)
|
||||
+ New: parse every candidate URL in <img> and <source> srcset lists (#326)
|
||||
|
||||
@@ -3703,9 +3703,9 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->maxsoc > 0)
|
||||
to->maxsoc = from->maxsoc;
|
||||
|
||||
/* hts_boolean/enum fields are unsigned (GCC), so a bare `> -1` unset-guard
|
||||
is always false; cast to int to keep the -1 "unset" sentinel test. */
|
||||
if ((int) from->nearlink > -1)
|
||||
/* hts_tristate fields use HTS_DEFAULT (-1) for "unspecified": copy_htsopt
|
||||
skips them so the target keeps its value. */
|
||||
if (from->nearlink > -1)
|
||||
to->nearlink = from->nearlink;
|
||||
|
||||
if (from->timeout > -1)
|
||||
@@ -3732,10 +3732,10 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
|
||||
if (from->hostcontrol > -1)
|
||||
to->hostcontrol = from->hostcontrol;
|
||||
|
||||
if ((int) from->errpage > -1)
|
||||
if (from->errpage > -1)
|
||||
to->errpage = from->errpage;
|
||||
|
||||
if ((int) from->parseall > -1)
|
||||
if (from->parseall > -1)
|
||||
to->parseall = from->parseall;
|
||||
|
||||
// test all: bit 8 de travel
|
||||
|
||||
@@ -2579,7 +2579,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
(r.size >= 0) ? r.size : (-r.size));
|
||||
if (r.contenttype >= 0) {
|
||||
fprintf(stdout, "Content-Type: %s\r\n",
|
||||
r.contenttype);
|
||||
hts_effective_mime(r.contenttype));
|
||||
}
|
||||
if (r.cdispo[0]) {
|
||||
fprintf(stdout, "Content-Disposition: %s\r\n",
|
||||
@@ -3166,6 +3166,16 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (to->parseall != HTS_FALSE)
|
||||
err = 1;
|
||||
|
||||
/* HTS_DEFAULT (-1) is "unspecified": copy_htsopt must skip it,
|
||||
leaving the target intact. Only a signed (int-backed) field
|
||||
can hold -1, so this also guards the type against regressing
|
||||
to an unsigned hts_boolean. */
|
||||
from->parseall = HTS_DEFAULT;
|
||||
to->parseall = HTS_TRUE;
|
||||
copy_htsopt(from, to);
|
||||
if (to->parseall != HTS_TRUE)
|
||||
err = 1;
|
||||
|
||||
hts_free_opt(from);
|
||||
hts_free_opt(to);
|
||||
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
|
||||
|
||||
@@ -43,8 +43,8 @@ Please visit our Website: http://www.httrack.com
|
||||
configure.ac, decoupled from these). VERSION is the display form, VERSIONID
|
||||
the dotted numeric form, AFF_VERSION the short form shown in footers,
|
||||
LIB_VERSION the data/cache format generation. */
|
||||
#define HTTRACK_VERSION "3.49-8"
|
||||
#define HTTRACK_VERSIONID "3.49.8"
|
||||
#define HTTRACK_VERSION "3.49-9"
|
||||
#define HTTRACK_VERSIONID "3.49.9"
|
||||
#define HTTRACK_AFF_VERSION "3.x"
|
||||
#define HTTRACK_LIB_VERSION "2.0"
|
||||
|
||||
@@ -247,13 +247,23 @@ Please visit our Website: http://www.httrack.com
|
||||
#define HTS_NOPARAM "(none)"
|
||||
#define HTS_NOPARAM2 "\"(none)\""
|
||||
|
||||
/* Boolean flag for option fields and API yes/no returns. An enum (not C bool)
|
||||
so it stays int-sized: option fields keep the httrackp layout/ABI, and a
|
||||
return type stays compatible with the int it replaces. */
|
||||
/* Boolean flag for option fields and API yes/no returns. Int-backed, not an
|
||||
enum: an enum makes C++ reject `field = 1` / `f(0)` on the exported fields
|
||||
and params. Int-sized, so the httrackp layout and the ABI are unchanged. */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
#define HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
|
||||
typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
typedef int hts_boolean;
|
||||
#define HTS_FALSE 0
|
||||
#define HTS_TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
#define HTS_DEF_DEFSTRUCT_hts_tristate
|
||||
/* Tri-state hts_boolean: HTS_DEFAULT (-1) = "unspecified" (copy_htsopt leaves
|
||||
the target untouched); HTS_FALSE/HTS_TRUE = off/on. */
|
||||
typedef int hts_tristate;
|
||||
#define HTS_DEFAULT (-1)
|
||||
#endif
|
||||
|
||||
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||
|
||||
23
src/htslib.c
23
src/htslib.c
@@ -1423,7 +1423,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
else
|
||||
infostatuscode(retour->msg, retour->statuscode);
|
||||
// type MIME par défaut2
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
} else { // pas de code!
|
||||
retour->statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(retour->msg, "Unknown response structure");
|
||||
@@ -1438,7 +1438,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
retour->statuscode = HTTP_OK;
|
||||
retour->keep_alive = 0;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
} else if (strnotempty(a)) {
|
||||
retour->statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(retour->msg, "Unknown (not HTTP/xx) response structure");
|
||||
@@ -1447,7 +1447,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
retour->statuscode = HTTP_OK;
|
||||
retour->keep_alive = 0;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
}
|
||||
}
|
||||
} else { // vide!
|
||||
@@ -1458,7 +1458,7 @@ void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
/* This is dirty .. */
|
||||
retour->statuscode = HTTP_OK;
|
||||
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
||||
strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
|
||||
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1589,11 +1589,15 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
|
||||
}
|
||||
}
|
||||
}
|
||||
sscanf(rcvd + p, "%s", tempo);
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype, "application/octet-stream-unknown"); // erreur
|
||||
// An empty/whitespace Content-Type value yields no token: keep the
|
||||
// sentinel default rather than reading an uninitialized tempo.
|
||||
if (sscanf(rcvd + p, "%s", tempo) == 1) {
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype,
|
||||
"application/octet-stream-unknown"); // erreur
|
||||
}
|
||||
}
|
||||
} else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
|
||||
// Content-Range: bytes 0-70870/70871
|
||||
@@ -4310,6 +4314,7 @@ int give_mimext(char *s, size_t ssize, const char *st) {
|
||||
int ok = 0;
|
||||
int j = 0;
|
||||
|
||||
st = hts_effective_mime(st); /* no declared type: derive an html ext */
|
||||
s[0] = '\0';
|
||||
while((!ok) && (strnotempty(hts_mime[j][1]))) {
|
||||
if (strfield2(hts_mime[j][0], st)) {
|
||||
|
||||
18
src/htslib.h
18
src/htslib.h
@@ -481,10 +481,22 @@ HTS_STATIC int strcmpnocase(const char *a, const char *b) {
|
||||
|
||||
// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
|
||||
#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
|
||||
/* Sentinel stored when the server declared no Content-Type. It is html-ish
|
||||
for every type test (so a typeless response still parses/stores as today),
|
||||
but the naming code (wire_patches_ext) treats it as "no declared type" and
|
||||
keeps the URL extension. It rides the cache, so updates name consistently. */
|
||||
#define HTS_UNKNOWN_MIME "unknown/unknown"
|
||||
/* Map the no-declared-type sentinel back to a real type for any header or
|
||||
record we EMIT or PERSIST, so "unknown/unknown" never reaches a consumer
|
||||
(a served Content-Type, a ProxyTrack .arc record, ...). */
|
||||
#define hts_effective_mime(m) \
|
||||
(strfield2((m), HTS_UNKNOWN_MIME) ? HTS_HYPERTEXT_DEFAULT_MIME : (m))
|
||||
|
||||
#define is_html_mime_type(a) \
|
||||
( (strfield2((a),"text/html")!=0)\
|
||||
|| (strfield2((a),"application/xhtml+xml")!=0) \
|
||||
#define is_html_mime_type(a) \
|
||||
((strfield2((a), "text/html") != 0) || \
|
||||
(strfield2((a), "application/xhtml+xml") != 0) || \
|
||||
(strfield2((a), HTS_UNKNOWN_MIME) != \
|
||||
0) /* no declared type: treat as html */ \
|
||||
)
|
||||
#define is_hypertext_mime__(a) \
|
||||
( \
|
||||
|
||||
@@ -138,6 +138,35 @@ static void cleanEndingSpaceOrDot(char *s) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Should the wire Content-Type override the URL's own extension when naming the
|
||||
saved file? True when the type is patchable (may_unknown2) and either the URL
|
||||
extension implies no specific type or the server declared a disagreeing one.
|
||||
A URL extension mapping to a specific non-HTML type is kept only when the
|
||||
server declared NO type (the HTS_UNKNOWN_MIME sentinel; the #267 mangle
|
||||
guard): a typeless .png stays .png, but a .pdf explicitly served as text/html
|
||||
is named .html. The sentinel rides the cache, so updates stay consistent. */
|
||||
static int wire_patches_ext(httrackp *opt, const char *wiremime,
|
||||
const char *file) {
|
||||
char urlmime[256];
|
||||
|
||||
if (may_unknown2(opt, wiremime, file))
|
||||
return 0; /* type kept verbatim (keep-list / bogus-multiple) */
|
||||
urlmime[0] = '\0';
|
||||
/* type implied by the URL extension, only when confidently known (flag 0) */
|
||||
if (!get_httptype_sized(opt, urlmime, sizeof(urlmime), file, 0))
|
||||
return 1; /* URL ext implies no known type: trust the wire type */
|
||||
if (strfield2(wiremime, urlmime))
|
||||
return 0; /* wire agrees with the ext: keep it (no .htm->.html churn) */
|
||||
/* wire disagrees with a specific non-HTML URL ext. Keep the ext only when
|
||||
the server declared no type (the sentinel); an explicitly declared type,
|
||||
even text/html, is trusted, so a binary-looking URL that really serves
|
||||
HTML (login/error interstitial, soft-404) is named .html. */
|
||||
if (!is_hypertext_mime(opt, urlmime, file) &&
|
||||
strfield2(wiremime, HTS_UNKNOWN_MIME))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// forme le nom du fichier à sauver (save) à partir de fil et adr
|
||||
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
|
||||
int url_savename(lien_adrfilsave *const afs,
|
||||
@@ -325,7 +354,10 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
|
||||
/* replace shtml to html.. */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD)
|
||||
/* HARD delays every type, except one the user pinned with --assume: honor it
|
||||
immediately (ishtml() consults the user type), no delayed name (#56) */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD &&
|
||||
!is_userknowntype(opt, fil))
|
||||
is_html = -1; /* ALWAYS delay type */
|
||||
else
|
||||
is_html = ishtml(opt, fil);
|
||||
@@ -380,7 +412,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, r.cdispo);
|
||||
} else if (!may_unknown2(opt, r.contenttype, fil)) { // on peut patcher à priori?
|
||||
} else if (wire_patches_ext(opt, r.contenttype, fil)) {
|
||||
if (give_mimext(s, sizeof(s),
|
||||
r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
@@ -425,7 +457,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(headers->r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, headers->r.cdispo);
|
||||
} else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
} else if (wire_patches_ext(opt, headers->r.contenttype,
|
||||
headers->url_fil)) {
|
||||
char s[16];
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
@@ -641,7 +674,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (!has_been_moved) {
|
||||
if (back[b].r.statuscode != -10) { // erreur
|
||||
if (strnotempty(back[b].r.contenttype) == 0)
|
||||
strcpybuff(back[b].r.contenttype, "text/html"); // message d'erreur en html
|
||||
strcpybuff(back[b].r.contenttype,
|
||||
HTS_UNKNOWN_MIME); // no declared type
|
||||
// Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
|
||||
// libérer emplacement backing
|
||||
}
|
||||
@@ -653,7 +687,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(back[b].r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, back[b].r.cdispo);
|
||||
} else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
} else if (wire_patches_ext(opt, back[b].r.contenttype,
|
||||
back[b].url_fil)) {
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
back[b].r.contenttype)) { // recognized extension
|
||||
|
||||
10
src/htsopt.h
10
src/htsopt.h
@@ -428,11 +428,11 @@ struct httrackp {
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
hts_boolean makeindex; /**< build a top-level index.html */
|
||||
hts_boolean kindex; /**< build a keyword index */
|
||||
hts_boolean delete_old; /**< delete locally obsolete files after update */
|
||||
hts_tristate delete_old; /**< delete locally obsolete files after update */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
@@ -465,13 +465,13 @@ struct httrackp {
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_tristate errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
hts_boolean external; /**< render external links as error pages */
|
||||
hts_tristate external; /**< render external links as error pages */
|
||||
hts_boolean passprivacy; /**< strip passwords from external links */
|
||||
hts_boolean includequery; /**< include the query string in saved names */
|
||||
hts_boolean mirror_first_page; /**< only mirror the links of the first page */
|
||||
@@ -485,7 +485,7 @@ struct httrackp {
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
hts_boolean
|
||||
hts_tristate
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
|
||||
@@ -1176,11 +1176,15 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
if (element != NULL) {
|
||||
msgCode = element->statuscode;
|
||||
StringRoom(headers, 8192);
|
||||
sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n"
|
||||
sprintf(StringBuffRW(headers),
|
||||
"HTTP/1.1 %d %s\r\n"
|
||||
#ifndef NO_WEBDAV
|
||||
"%s"
|
||||
#endif
|
||||
"Content-Type: %s%s%s%s\r\n" "%s%s%s" "%s%s%s" "%s%s%s",
|
||||
"Content-Type: %s%s%s%s\r\n"
|
||||
"%s%s%s"
|
||||
"%s%s%s"
|
||||
"%s%s%s",
|
||||
/* */
|
||||
msgCode, element->msg,
|
||||
#ifndef NO_WEBDAV
|
||||
@@ -1188,16 +1192,18 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
StringBuff(davHeaders),
|
||||
#endif
|
||||
/* Content-type: foo; [ charset=bar ] */
|
||||
element->contenttype,
|
||||
hts_effective_mime(element->contenttype),
|
||||
((element->charset[0]) ? "; charset=\"" : ""),
|
||||
element->charset, ((element->charset[0]) ? "\"" : ""),
|
||||
/* location */
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? "Location: " : ""),
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? element->location : ""),
|
||||
((element->location != NULL
|
||||
&& element->location[0]) ? "\r\n" : ""),
|
||||
((element->location != NULL && element->location[0])
|
||||
? "Location: "
|
||||
: ""),
|
||||
((element->location != NULL && element->location[0])
|
||||
? element->location
|
||||
: ""),
|
||||
((element->location != NULL && element->location[0]) ? "\r\n"
|
||||
: ""),
|
||||
/* last-modified */
|
||||
((element->lastmodified[0]) ? "Last-Modified: " : ""),
|
||||
((element->lastmodified[0]) ? element->lastmodified : ""),
|
||||
@@ -1205,8 +1211,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
|
||||
/* etag */
|
||||
((element->etag[0]) ? "ETag: " : ""),
|
||||
((element->etag[0]) ? element->etag : ""),
|
||||
((element->etag[0]) ? "\r\n" : "")
|
||||
);
|
||||
((element->etag[0]) ? "\r\n" : ""));
|
||||
StringLength(headers) = (int) strlen(StringBuff(headers));
|
||||
} else {
|
||||
/* No query string, no ending / : check the the <url>/ page */
|
||||
|
||||
@@ -52,6 +52,7 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#include "htscore.h"
|
||||
#include "htsback.h"
|
||||
#include "htslib.h" /* hts_effective_mime */
|
||||
|
||||
#include "store.h"
|
||||
#include "proxystrings.h"
|
||||
@@ -2289,10 +2290,17 @@ static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element)
|
||||
int size_headers;
|
||||
|
||||
sprintf(st->headers,
|
||||
"HTTP/1.0 %d %s" "\r\n" "X-Server: ProxyTrack " PROXYTRACK_VERSION
|
||||
"\r\n" "Content-type: %s%s%s%s" "\r\n" "Last-modified: %s" "\r\n"
|
||||
"Content-length: %d" "\r\n", element->statuscode, element->msg,
|
||||
/**/ element->contenttype,
|
||||
"HTTP/1.0 %d %s"
|
||||
"\r\n"
|
||||
"X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n"
|
||||
"Content-type: %s%s%s%s"
|
||||
"\r\n"
|
||||
"Last-modified: %s"
|
||||
"\r\n"
|
||||
"Content-length: %d"
|
||||
"\r\n",
|
||||
element->statuscode, element->msg,
|
||||
/**/ hts_effective_mime(element->contenttype),
|
||||
(element->charset[0] ? "; charset=\"" : ""),
|
||||
(element->charset[0] ? element->charset : ""),
|
||||
(element->charset[0] ? "\"" : ""), /**/ element->lastmodified,
|
||||
@@ -2328,10 +2336,10 @@ static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element)
|
||||
/* args */
|
||||
(link_has_authority(url) ? "" : "http://"), url, "0.0.0.0",
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour,
|
||||
tm->tm_min, tm->tm_sec, element->contenttype, element->statuscode,
|
||||
st->md5, (element->location ? element->location : "-"),
|
||||
(long int) ftell(fp), st->filename,
|
||||
(long int) (size_headers + element->size));
|
||||
tm->tm_min, tm->tm_sec, hts_effective_mime(element->contenttype),
|
||||
element->statuscode, st->md5,
|
||||
(element->location ? element->location : "-"), (long int) ftell(fp),
|
||||
st->filename, (long int) (size_headers + element->size));
|
||||
/* network_doc */
|
||||
if (fwrite(st->headers, 1, size_headers, fp) != size_headers
|
||||
|| (element->size > 0
|
||||
|
||||
15
tests/13_local-cookies.test
Executable file
15
tests/13_local-cookies.test
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Cookie chain against the local test server (replaces the old online
|
||||
# ut/cookies/*.php fixtures). entrance.php sets cat/cake; second.php checks
|
||||
# them and sets badger; third.php checks all three. A missing or wrong cookie
|
||||
# returns 500, which would surface as an httrack error and a missing file, so a
|
||||
# clean 3-files/0-errors run proves the cookie jar is replayed across links.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 3 \
|
||||
--found 'cookies/entrance.html' \
|
||||
--found 'cookies/second.html' \
|
||||
--found 'cookies/third.html' \
|
||||
httrack 'BASEURL/cookies/entrance.php'
|
||||
18
tests/14_local-https.test
Executable file
18
tests/14_local-https.test
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# HTTPS crawl against the local test server, using the shipped self-signed
|
||||
# cert. httrack does not verify certs (htslib.c: SSL_CTX_new with no
|
||||
# SSL_CTX_set_verify), so the self-signed cert is accepted as-is and this
|
||||
# exercises the real TLS path offline. basic.html links to link.html with four
|
||||
# distinct query strings, each saved under a hashed name -> 5 files.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
if test "$HTTPS_SUPPORT" == "no"; then
|
||||
echo "no https support compiled, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --tls --errors 0 --files 5 \
|
||||
--found 'simple/basic.html' \
|
||||
httrack 'BASEURL/simple/basic.html'
|
||||
25
tests/15_local-types.test
Normal file
25
tests/15_local-types.test
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Content-Type vs URL-extension naming (issue #267 family) under the default
|
||||
# delayed type check (-%N2). Policy: a MISSING Content-Type must not clobber a
|
||||
# URL extension that maps to a specific non-HTML type (.png/.pdf stay as-is);
|
||||
# an explicitly DECLARED type is trusted, so a binary-looking URL that really
|
||||
# serves HTML (text/html on .pdf/.jpg) is named .html. The "wrong" names are
|
||||
# asserted absent so a regression in either direction fails here.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/notype.png' --not-found 'types/notype.html' \
|
||||
--found 'types/notype.pdf' --not-found 'types/notype.html' \
|
||||
--found 'types/photo.png' \
|
||||
--found 'types/doc.pdf' \
|
||||
--found 'types/lie.html' --not-found 'types/lie.png' \
|
||||
--found 'types/report.html' --not-found 'types/report.pdf' \
|
||||
--found 'types/page.htm' --not-found 'types/page.html' \
|
||||
--found 'types/script.js' \
|
||||
--found 'types/style.css' \
|
||||
--found 'types/data.json' \
|
||||
--found 'types/control.html' --not-found 'types/control.php' \
|
||||
--found 'types/gend61c.png' --not-found 'types/gend61c.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
11
tests/16_local-assume.test
Normal file
11
tests/16_local-assume.test
Normal file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# --assume under the default delayed type check (-%N2), issue #56. A user type
|
||||
# pinned with --assume must be honored immediately, not lost to the delayed
|
||||
# name: photo.png served as image/png but assumed text/html is saved as .html.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/photo.html' --not-found 'types/photo.png' \
|
||||
httrack 'BASEURL/types/photo.png' --assume png=text/html
|
||||
12
tests/17_local-empty-ct.test
Normal file
12
tests/17_local-empty-ct.test
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# An empty "Content-Type:" header value must be treated as "no usable type"
|
||||
# (keep the URL extension), not parsed from an uninitialized buffer. The crawl
|
||||
# also runs under ASan/UBSan in CI, which catches the uninitialized read this
|
||||
# guards against.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/emptyct.png' --not-found 'types/emptyct.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
15
tests/18_local-update.test
Normal file
15
tests/18_local-update.test
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# A second (update) pass must keep the names the first crawl chose. The stored
|
||||
# Content-Type rides the cache, so the update reads back the same value -- the
|
||||
# unknown/unknown sentinel for a typeless response, the declared type otherwise
|
||||
# -- and names consistently: a declared-text/html .pdf stays .html and a
|
||||
# typeless .png stays .png across the update rather than reverting.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun \
|
||||
--found 'types/report.html' --not-found 'types/report.pdf' \
|
||||
--found 'types/notype.png' --not-found 'types/notype.html' \
|
||||
--found 'types/lie.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
@@ -3,6 +3,8 @@
|
||||
# silently drop it from the dist tarball and break "make distcheck".
|
||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||
proxy-https-server.py \
|
||||
local-crawl.sh local-server.py server.crt server.key \
|
||||
server-root/simple/basic.html server-root/simple/link.html \
|
||||
fixtures/cache-golden/hts-cache/new.zip
|
||||
|
||||
TESTS_ENVIRONMENT =
|
||||
@@ -47,6 +49,12 @@ TESTS = \
|
||||
11_crawl-longurl.test \
|
||||
11_crawl-parsing.test \
|
||||
12_crawl_https.test \
|
||||
13_crawl_proxy_https.test
|
||||
13_crawl_proxy_https.test \
|
||||
13_local-cookies.test \
|
||||
14_local-https.test \
|
||||
15_local-types.test \
|
||||
16_local-assume.test \
|
||||
17_local-empty-ct.test \
|
||||
18_local-update.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
253
tests/local-crawl.sh
Executable file
253
tests/local-crawl.sh
Executable file
@@ -0,0 +1,253 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Launcher for httrack crawl tests against the local Python test server.
|
||||
#
|
||||
# Starts tests/local-server.py on an ephemeral port, discovers the port from
|
||||
# the server's stdout, then runs httrack against http(s)://127.0.0.1:$PORT and
|
||||
# audits the mirror. The server is always killed and the tmpdir removed on exit.
|
||||
#
|
||||
# The token BASEURL in any httrack argument is replaced with the discovered
|
||||
# http(s)://127.0.0.1:$PORT base. --found/--directory paths are relative to the
|
||||
# discovered host root (127.0.0.1_<port>/), since the random port leaks into
|
||||
# the mirror directory name.
|
||||
#
|
||||
# Usage:
|
||||
# bash local-crawl.sh [--tls] [--root DIR] \
|
||||
# --errors N --files N --found PATH ... --directory PATH ... \
|
||||
# httrack BASEURL/some/path [httrack-args...]
|
||||
|
||||
set -u
|
||||
|
||||
testdir=$(cd "$(dirname "$0")" && pwd)
|
||||
server="${testdir}/local-server.py"
|
||||
root="${LOCAL_SERVER_ROOT:-${testdir}/server-root}"
|
||||
cert="${testdir}/server.crt"
|
||||
key="${testdir}/server.key"
|
||||
|
||||
tls=
|
||||
verbose=
|
||||
rerun=
|
||||
tmpdir=
|
||||
serverpid=
|
||||
crawlpid=
|
||||
|
||||
function warning {
|
||||
echo "** $*" >&2
|
||||
return 0
|
||||
}
|
||||
function die {
|
||||
warning "$*"
|
||||
exit 1
|
||||
}
|
||||
function debug {
|
||||
test -n "$verbose" && echo "$*" >&2
|
||||
return 0
|
||||
}
|
||||
function info { printf "[%s] ..\t" "$*" >&2; }
|
||||
function result { echo "$*" >&2; }
|
||||
|
||||
function cleanup {
|
||||
if test -n "$crawlpid"; then
|
||||
kill -9 "$crawlpid" 2>/dev/null
|
||||
crawlpid=
|
||||
fi
|
||||
if test -n "$serverpid"; then
|
||||
kill "$serverpid" 2>/dev/null
|
||||
# Reap it so the port is released before we rm the tmpdir/log.
|
||||
wait "$serverpid" 2>/dev/null
|
||||
serverpid=
|
||||
fi
|
||||
if test -n "$tmpdir" && test -d "$tmpdir"; then
|
||||
test -n "$nopurge" || rm -rf "$tmpdir"
|
||||
fi
|
||||
}
|
||||
|
||||
function assert_equals {
|
||||
info "$1"
|
||||
if test ! "$2" == "$3"; then
|
||||
result "expected '$2', got '$3'"
|
||||
exit 1
|
||||
fi
|
||||
result "OK ($2)"
|
||||
}
|
||||
|
||||
nopurge=
|
||||
trap cleanup EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
# python3 is required; mirror check-network.sh's skip-with-77 convention.
|
||||
command -v python3 >/dev/null || ! echo "python3 not found; skipping local crawl tests" || exit 77
|
||||
|
||||
tmptopdir=${TMPDIR:-/tmp}
|
||||
test -d "$tmptopdir" || mkdir -p "$tmptopdir" || die "no temporary directory; set TMPDIR"
|
||||
tmpdir=$(mktemp -d "${tmptopdir}/httrack_local.XXXXXX") || die "could not create tmpdir"
|
||||
|
||||
# --- parse leading control flags --------------------------------------------
|
||||
declare -a audit=()
|
||||
scheme=http
|
||||
pos=0
|
||||
args=("$@")
|
||||
nargs=$#
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
case "${args[$pos]}" in
|
||||
--debug) verbose=1 ;;
|
||||
--rerun) rerun=1 ;; # run httrack a second time (update pass) before auditing
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
audit+=("--no-purge")
|
||||
;;
|
||||
--tls)
|
||||
tls=1
|
||||
scheme=https
|
||||
;;
|
||||
--root)
|
||||
pos=$((pos + 1))
|
||||
root="${args[$pos]}"
|
||||
;;
|
||||
--errors | --files)
|
||||
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||
pos=$((pos + 1))
|
||||
;;
|
||||
--found | --not-found | --directory)
|
||||
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||
pos=$((pos + 1))
|
||||
;;
|
||||
httrack)
|
||||
pos=$((pos + 1))
|
||||
break
|
||||
;;
|
||||
*) die "unrecognized option ${args[$pos]}" ;;
|
||||
esac
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
|
||||
# --- start the server --------------------------------------------------------
|
||||
test -r "$server" || die "cannot read $server"
|
||||
serverlog="${tmpdir}/server.log"
|
||||
serverargs=(--root "$root")
|
||||
if test -n "$tls"; then
|
||||
serverargs+=(--tls --cert "$cert" --key "$key")
|
||||
fi
|
||||
debug "starting python3 $server ${serverargs[*]}"
|
||||
python3 "$server" "${serverargs[@]}" >"$serverlog" 2>&1 &
|
||||
serverpid=$!
|
||||
|
||||
# Wait for the "PORT <n>" line (server prints it once bound).
|
||||
port=
|
||||
for _ in $(seq 1 50); do
|
||||
if test -s "$serverlog"; then
|
||||
line=$(head -n1 "$serverlog")
|
||||
if test "${line%% *}" == "PORT"; then
|
||||
port="${line#PORT }"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
kill -0 "$serverpid" 2>/dev/null || die "server exited early: $(cat "$serverlog")"
|
||||
sleep 0.1
|
||||
done
|
||||
test -n "$port" || die "could not discover server port: $(cat "$serverlog")"
|
||||
debug "server listening on ${scheme}://127.0.0.1:${port}"
|
||||
|
||||
baseurl="${scheme}://127.0.0.1:${port}"
|
||||
|
||||
# --- substitute BASEURL in the remaining (httrack) args ----------------------
|
||||
declare -a hts=()
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
hts+=("${args[$pos]//BASEURL/$baseurl}")
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
|
||||
# --- run httrack -------------------------------------------------------------
|
||||
which httrack >/dev/null || die "could not find httrack"
|
||||
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
|
||||
test -n "$ver" || die "could not run httrack"
|
||||
|
||||
out="${tmpdir}/crawl"
|
||||
mkdir "$out" || die "could not create $out"
|
||||
# Localhost is fast; disable the rate/bandwidth safety limits but keep a
|
||||
# max-time backstop so a hang cannot wedge the suite.
|
||||
declare -a moreargs=(--quiet --max-time=120 --timeout=30 --disable-security-limits --robots=0)
|
||||
log="${tmpdir}/log"
|
||||
info "running httrack ${hts[*]}"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" "${moreargs[@]}" "${hts[@]}" >"$log" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid"
|
||||
crawlres=$?
|
||||
crawlpid=
|
||||
# httrack exits 0 even on hard connect/DNS errors, so this is a backstop only;
|
||||
# the real guard is the audit below (--errors 0 plus the host-root existence check).
|
||||
test "$crawlres" -eq 0 || ! result "httrack exited $crawlres" || {
|
||||
cat "$log" >&2
|
||||
exit 1
|
||||
}
|
||||
result "OK"
|
||||
grep -iE "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt" >&2
|
||||
|
||||
# --- optional second pass: re-mirror into the same dir (cache/update path) ----
|
||||
if test -n "$rerun"; then
|
||||
info "re-running httrack (update pass)"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" \
|
||||
"${moreargs[@]}" "${hts[@]}" >"${log}.2" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid"
|
||||
crawlres=$?
|
||||
crawlpid=
|
||||
test "$crawlres" -eq 0 || ! result "update pass exited $crawlres" || {
|
||||
cat "${log}.2" >&2
|
||||
exit 1
|
||||
}
|
||||
result "OK (update)"
|
||||
fi
|
||||
|
||||
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||
hostroot=
|
||||
for cand in "${out}/127.0.0.1_${port}" "${out}/127.0.0.1"; do
|
||||
if test -d "$cand"; then
|
||||
hostroot="$cand"
|
||||
break
|
||||
fi
|
||||
done
|
||||
test -n "$hostroot" || die "could not find host root under $out"
|
||||
debug "host root: $hostroot"
|
||||
|
||||
# --- audit -------------------------------------------------------------------
|
||||
i=0
|
||||
while test "$i" -lt "${#audit[@]}"; do
|
||||
case "${audit[$i]}" in
|
||||
--errors)
|
||||
i=$((i + 1))
|
||||
assert_equals "checking errors" "${audit[$i]}" \
|
||||
"$(grep -iEc "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt")"
|
||||
;;
|
||||
--files)
|
||||
i=$((i + 1))
|
||||
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${out}/hts-log.txt" |
|
||||
sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
|
||||
assert_equals "checking files" "${audit[$i]}" "$nFiles"
|
||||
;;
|
||||
--found)
|
||||
i=$((i + 1))
|
||||
info "checking for ${audit[$i]}"
|
||||
if test -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--not-found)
|
||||
i=$((i + 1))
|
||||
info "checking absence of ${audit[$i]}"
|
||||
if test ! -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "present"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--directory)
|
||||
i=$((i + 1))
|
||||
info "checking for dir ${audit[$i]}"
|
||||
if test -d "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
i=$((i + 1))
|
||||
done
|
||||
254
tests/local-server.py
Executable file
254
tests/local-server.py
Executable file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Self-contained local web server for httrack's crawl tests.
|
||||
|
||||
Serves static fixtures from a docroot plus a handful of dynamic endpoints
|
||||
(cookies, ...) so httrack can be exercised over loopback, deterministically and
|
||||
offline, instead of crawling the live ut.httrack.com.
|
||||
|
||||
Binds to an ephemeral port (port 0) and prints the chosen port to stdout as
|
||||
"PORT <n>\n" so a launcher can discover it. Pass --tls to wrap the socket with
|
||||
the shipped self-signed test cert; httrack does not verify certs, so no CA
|
||||
trust plumbing is needed.
|
||||
|
||||
stdlib only (http.server + ssl) -- no new build or runtime dependency.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
|
||||
from urllib.parse import quote, unquote, urlsplit
|
||||
|
||||
# Cookie chain replicated from the old ut/cookies/*.php fixtures.
|
||||
COOKIE_PATH = "/cookies/"
|
||||
COOKIES = {
|
||||
"cat": "dog",
|
||||
"cake": "is a lie!",
|
||||
"badger": "mushroom, with 'ants'",
|
||||
}
|
||||
|
||||
PAGE = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
\t"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
||||
<head>
|
||||
\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
\t<title>Sample test</title>
|
||||
</head>
|
||||
<body>
|
||||
{body}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class Handler(SimpleHTTPRequestHandler):
|
||||
# Quieter logging; the launcher captures httrack's own log anyway.
|
||||
def log_message(self, fmt, *args):
|
||||
if os.environ.get("LOCAL_SERVER_VERBOSE"):
|
||||
super().log_message(fmt, *args)
|
||||
|
||||
# --- helpers -----------------------------------------------------------
|
||||
|
||||
def request_cookies(self):
|
||||
"""Parse the Cookie header into {name: decoded-value}.
|
||||
|
||||
Mirrors PHP's $_COOKIE: values are url-decoded, matching the encoding
|
||||
applied when the cookie was set (see set_cookie)."""
|
||||
jar = {}
|
||||
raw = self.headers.get("Cookie", "")
|
||||
for pair in raw.split(";"):
|
||||
pair = pair.strip()
|
||||
if "=" in pair:
|
||||
name, value = pair.split("=", 1)
|
||||
jar[name.strip()] = unquote(value.strip())
|
||||
return jar
|
||||
|
||||
def set_cookie(self, name, value):
|
||||
"""Queue a Set-Cookie header, url-encoding the value like PHP's
|
||||
setcookie() so spaces/quotes/commas stay a single token that httrack
|
||||
can store and replay verbatim."""
|
||||
self._set_cookies.append(f"{name}={quote(value)}; Path={COOKIE_PATH}")
|
||||
|
||||
def send_html(self, body, status=200, extra_status=None):
|
||||
encoded = PAGE.format(body=body).encode("utf-8")
|
||||
self.send_response(status, extra_status)
|
||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(encoded)))
|
||||
for cookie in self._set_cookies:
|
||||
self.send_header("Set-Cookie", cookie)
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(encoded)
|
||||
|
||||
def fail_cookie(self, what):
|
||||
# The old PHPs answered 500 with the reason in the status line.
|
||||
self.send_html("", status=500, extra_status=f"The {what} is missing or invalid")
|
||||
|
||||
# --- dynamic routes ----------------------------------------------------
|
||||
|
||||
def route_entrance(self):
|
||||
self.set_cookie("cat", COOKIES["cat"])
|
||||
self.set_cookie("cake", COOKIES["cake"])
|
||||
self.send_html('\tThis is a <a href="second.php">link</a>')
|
||||
|
||||
def route_second(self):
|
||||
jar = self.request_cookies()
|
||||
if jar.get("cat") != COOKIES["cat"]:
|
||||
return self.fail_cookie("cat")
|
||||
if jar.get("cake") != COOKIES["cake"]:
|
||||
return self.fail_cookie("cake")
|
||||
self.set_cookie("badger", COOKIES["badger"])
|
||||
self.send_html('\tThis is a <a href="third.php">link</a>')
|
||||
|
||||
def route_third(self):
|
||||
jar = self.request_cookies()
|
||||
if jar.get("cat") != COOKIES["cat"]:
|
||||
return self.fail_cookie("cat")
|
||||
if jar.get("cake") != COOKIES["cake"]:
|
||||
return self.fail_cookie("cake")
|
||||
if jar.get("badger") != COOKIES["badger"]:
|
||||
return self.fail_cookie("badger")
|
||||
self.send_html("\tThis is a test.")
|
||||
|
||||
def route_robots(self):
|
||||
body = b"User-agent: *\nDisallow:\n"
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
|
||||
# --- type/extension matrix (issue #267 family) -------------------------
|
||||
|
||||
def send_raw(self, body, content_type):
|
||||
"""Send a raw body with an explicit Content-Type, or none at all when
|
||||
content_type is None (to observe httrack's typeless-file naming)."""
|
||||
self.send_response(200)
|
||||
if content_type is not None:
|
||||
self.send_header("Content-Type", content_type)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
|
||||
# Fake-binary blobs for the image/pdf/typeless cases.
|
||||
FAKE_PNG = b"\x89PNG\r\n\x1a\n" + b"\x00" * 64
|
||||
FAKE_PDF = b"%PDF-1.4\n" + b"\x00" * 64
|
||||
|
||||
# path -> (body, content_type); None sends no header, "" sends an empty
|
||||
# Content-Type value (no usable type, must be treated like None).
|
||||
TYPE_MATRIX = {
|
||||
"/types/control.php": (b"<html><body>control</body></html>", "text/html"),
|
||||
"/types/photo.png": (FAKE_PNG, "image/png"),
|
||||
"/types/doc.pdf": (FAKE_PDF, "application/pdf"),
|
||||
"/types/notype.png": (FAKE_PNG, None),
|
||||
"/types/notype.pdf": (FAKE_PDF, None),
|
||||
"/types/emptyct.png": (FAKE_PNG, ""),
|
||||
"/types/lie.png": (FAKE_PNG, "text/html"),
|
||||
"/types/report.pdf": (b"<html><body>real page</body></html>", "text/html"),
|
||||
"/types/page.htm": (b"<html><body>htm page</body></html>", "text/html"),
|
||||
"/types/script.js": (b"var x = 1;\n", "application/javascript"),
|
||||
"/types/style.css": (b"body { color: red; }\n", "text/css"),
|
||||
"/types/data.json": (b'{"k": "v"}\n', "application/json"),
|
||||
"/types/gen.php": (FAKE_PNG, "image/png"),
|
||||
}
|
||||
|
||||
def route_types_index(self):
|
||||
body = (
|
||||
'\t<a href="control.php">control</a>\n'
|
||||
'\t<img src="photo.png" />\n'
|
||||
'\t<a href="doc.pdf">doc</a>\n'
|
||||
'\t<img src="notype.png" />\n'
|
||||
'\t<a href="notype.pdf">notypepdf</a>\n'
|
||||
'\t<img src="emptyct.png" />\n'
|
||||
'\t<img src="lie.png" />\n'
|
||||
'\t<a href="report.pdf">report</a>\n'
|
||||
'\t<a href="page.htm">htm</a>\n'
|
||||
'\t<script src="script.js"></script>\n'
|
||||
'\t<link rel="stylesheet" href="style.css" />\n'
|
||||
'\t<a href="data.json">json</a>\n'
|
||||
'\t<img src="gen.php?id=5" />\n'
|
||||
)
|
||||
self.send_html(body)
|
||||
|
||||
def route_types(self):
|
||||
path = urlsplit(self.path).path
|
||||
body, ctype = self.TYPE_MATRIX[path]
|
||||
self.send_raw(body, ctype)
|
||||
|
||||
ROUTES = {
|
||||
"/cookies/entrance.php": route_entrance,
|
||||
"/cookies/second.php": route_second,
|
||||
"/cookies/third.php": route_third,
|
||||
"/robots.txt": route_robots,
|
||||
"/types/index.html": route_types_index,
|
||||
"/types/control.php": route_types,
|
||||
"/types/photo.png": route_types,
|
||||
"/types/doc.pdf": route_types,
|
||||
"/types/notype.png": route_types,
|
||||
"/types/notype.pdf": route_types,
|
||||
"/types/emptyct.png": route_types,
|
||||
"/types/lie.png": route_types,
|
||||
"/types/report.pdf": route_types,
|
||||
"/types/page.htm": route_types,
|
||||
"/types/script.js": route_types,
|
||||
"/types/style.css": route_types,
|
||||
"/types/data.json": route_types,
|
||||
"/types/gen.php": route_types,
|
||||
}
|
||||
|
||||
# --- dispatch ----------------------------------------------------------
|
||||
|
||||
def dispatch(self):
|
||||
self._set_cookies = []
|
||||
path = urlsplit(self.path).path
|
||||
handler = self.ROUTES.get(path)
|
||||
if handler is not None:
|
||||
handler(self)
|
||||
return True
|
||||
return False
|
||||
|
||||
def do_GET(self):
|
||||
if not self.dispatch():
|
||||
super().do_GET()
|
||||
|
||||
def do_HEAD(self):
|
||||
if not self.dispatch():
|
||||
super().do_HEAD()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="docroot for static files")
|
||||
parser.add_argument("--bind", default="127.0.0.1", help="bind address")
|
||||
parser.add_argument("--tls", action="store_true", help="serve HTTPS")
|
||||
parser.add_argument("--cert", help="TLS certificate (PEM)")
|
||||
parser.add_argument("--key", help="TLS private key (PEM)")
|
||||
args = parser.parse_args()
|
||||
|
||||
root = os.path.abspath(args.root)
|
||||
|
||||
def factory(*a, **kw):
|
||||
return Handler(*a, directory=root, **kw)
|
||||
|
||||
httpd = ThreadingHTTPServer((args.bind, 0), factory)
|
||||
|
||||
if args.tls:
|
||||
import ssl
|
||||
|
||||
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
ctx.load_cert_chain(certfile=args.cert, keyfile=args.key)
|
||||
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
|
||||
|
||||
port = httpd.socket.getsockname()[1]
|
||||
# The launcher reads this line to discover the ephemeral port.
|
||||
print(f"PORT {port}", flush=True)
|
||||
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
18
tests/server-root/simple/basic.html
Normal file
18
tests/server-root/simple/basic.html
Normal file
@@ -0,0 +1,18 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="fr">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Sample test</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
This is a <a href="link.html?v=1">link</a>
|
||||
This is a <a href='link.html?v=2'>link</a>
|
||||
This is a <a href="./link.html?v=3">link</a>
|
||||
This is a <a href=link.html?v=4>link</a>
|
||||
|
||||
</body>
|
||||
3
tests/server-root/simple/link.html
Normal file
3
tests/server-root/simple/link.html
Normal file
@@ -0,0 +1,3 @@
|
||||
This is a link.
|
||||
|
||||
Go back to <a href="basic.html">home</a>.
|
||||
21
tests/server.crt
Normal file
21
tests/server.crt
Normal file
@@ -0,0 +1,21 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDbzCCAlegAwIBAgIUdWkDDomnY3WW95UqJ+UOASuR/i0wDQYJKoZIhvcNAQEL
|
||||
BQAwODESMBAGA1UEAwwJMTI3LjAuMC4xMSIwIAYDVQQKDBlIVFRyYWNrIGxvY2Fs
|
||||
IHRlc3Qgc2VydmVyMCAXDTI2MDYxNTE0NDQxMFoYDzIwNTYwNjA3MTQ0NDEwWjA4
|
||||
MRIwEAYDVQQDDAkxMjcuMC4wLjExIjAgBgNVBAoMGUhUVHJhY2sgbG9jYWwgdGVz
|
||||
dCBzZXJ2ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDx78mogNhT
|
||||
noWwRa51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf
|
||||
3rwj79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJC
|
||||
SGxIin9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4
|
||||
ZbPgwep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaE
|
||||
nQrAlTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJx
|
||||
rjVEPfA8QSbtAgMBAAGjbzBtMB0GA1UdDgQWBBTHE0KKW8REV4HxajzVsIBxz3iL
|
||||
9zAfBgNVHSMEGDAWgBTHE0KKW8REV4HxajzVsIBxz3iL9zAPBgNVHRMBAf8EBTAD
|
||||
AQH/MBoGA1UdEQQTMBGHBH8AAAGCCWxvY2FsaG9zdDANBgkqhkiG9w0BAQsFAAOC
|
||||
AQEAYlTEftrwGJBXuPmtxhmtw2HO/VTC4TGnq67hH5H+ptwgZJuuxCQ5KW6flTyp
|
||||
FTyMhha33WD4EBL3wqqJsWr9Y4BXqi4G0lRqXBcC1oIUa2VYIDMER7kaY1qTSqE8
|
||||
ARpwdB2BhvngAzDLc+4Jt4jQMRGr8fHAwxpDBoIZ1knbyzYNP73Bajse6/8YtxUu
|
||||
nB2BsldjZnLvyHvRxUpWp92OyQih4jYSrlN6olDFlKDg7++kMhkHtJQW9a1t54VN
|
||||
0ZXrB1ZRuHUUvGBq26x71riTWor7HNOSQaGeCMQjZNQkh5tfshNygUGSZVXTEwhG
|
||||
xSrOL7NqBt2+EkVwf7LjGzjmBw==
|
||||
-----END CERTIFICATE-----
|
||||
28
tests/server.key
Normal file
28
tests/server.key
Normal file
@@ -0,0 +1,28 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDx78mogNhTnoWw
|
||||
Ra51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf3rwj
|
||||
79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJCSGxI
|
||||
in9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4ZbPg
|
||||
wep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaEnQrA
|
||||
lTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJxrjVE
|
||||
PfA8QSbtAgMBAAECggEACgNK4klq1T3IpKdNoBY5yoE7CbUQZBNkBpSPRxHgBezj
|
||||
SVFfgrZGnOySrIJSt4JHtuynG2Hl+0ku74HRep/ck+eOsh5W3mZvGvMLnGxhwR3u
|
||||
Or99osTIgU0VQTkpC0SLQ16FCnih0uJycNIikdLR7uuya1tt1OyIBzK7XlNGIywT
|
||||
p85zJc7/6TfTC9eM7lqh7JGR7KplBxSvgZL1pUr7y4rNpKms6uzOvPND79CcKnbU
|
||||
BBA9Tu4qdOkoOljsZKkvh3pihxyG9X6d8QTZ/uX3pkvliwSFBc+Sz9EootA3/4r5
|
||||
gVWpQ2t/AY7fY4hqzLIX/HivVaPj3cWk1G+SHm0XNQKBgQD5I9rijqFvV/p6FmUl
|
||||
FbnjJFFHHgZLivlGxAC5vOyJNQQaqdeDzg7yMotNmQTggVGjT6sjdosQb3n+ctuk
|
||||
EhQnZSU5VkNKv1+PTR35WrRkaECCaqz3Pv79pV9GVcX3it7UuYjNiOeSPqINWe+X
|
||||
49JwnJFz+qQ1BchAwOis4zkENwKBgQD4mShDaYLOO97VpgZj4cGxHHWyEK9CRQvp
|
||||
I7HxRmfaWS3JHwb88lOmALEU6pAj5cYJPAznv8BnUWcVHalZbkQ1JWYtUJRqj6OI
|
||||
Ym7rw/nm4Ay5ijbdEism173dSk3IjOe+PdAlxzsOuVzYdBTqElmeQWtBzhY9aHvX
|
||||
r+A02C2j+wKBgHHDo6Gsi57yR5gUPd9vSlCkNtEIrss0DJv5yHMIB+KnaNZcE+NF
|
||||
5qFF30Jxyz5RDtxJ9tXcvaeln8lG3XDQKI/MqfDCqTuqo5ImHrfMaW8oA70JxS2p
|
||||
gHqGVzkg1aMxsIrmpcdk6olnPExocvWivGdbtzeEjhMALu8Sp6y6nUCFAoGBAK5h
|
||||
KLgYw/OMVaQCIMthaa+l6f0s7PMMYe1453H6VBD6qz4/8HPwO7LfG1gzrUYxADgs
|
||||
ElVh0UHn/On383nS+i9Ze5Hfyyvwc+LQQURKJPrJQMPJavCptPE7NmiKnYNHK6vr
|
||||
yh0l4oxShAklbCJBGvICq4zuVfVfXDeQnDIVTfaPAoGBAMCrZqYdOUhUu+aUqxZq
|
||||
qO/TTQxrxftU63jGUg+o042TdgI4KWLn07wvHJ8/E2OqF35eXenvcuKbNLI1l72J
|
||||
4cp+3cUv8iAXThTRYEztr5CS/wta4o4CNN8zfjn5dV9AI4Hmt4V7EaGWpBcViGbj
|
||||
n0Mhag+dO8DHuenqi1yfMrAt
|
||||
-----END PRIVATE KEY-----
|
||||
152
tools/mk-sbuild-chroot.sh
Executable file
152
tools/mk-sbuild-chroot.sh
Executable file
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Bootstrap an sbuild chroot for the clean-room build gate (mkdeb.sh --sbuild).
|
||||
#
|
||||
# Uses the rootless unshare backend: no root, no schroot daemon. It builds a
|
||||
# minimal buildd chroot tarball into ~/.cache/sbuild/<dist>-<arch>.tar.zst, where
|
||||
# sbuild --dist=<dist> finds it automatically in unshare mode.
|
||||
#
|
||||
# Usage:
|
||||
# tools/mk-sbuild-chroot.sh [options]
|
||||
#
|
||||
# Options:
|
||||
# -d, --dist DIST suite to bootstrap (default: unstable)
|
||||
# -a, --arch ARCH architecture (default: dpkg --print-architecture)
|
||||
# -m, --mirror URL apt mirror (default: http://deb.debian.org/debian)
|
||||
# --components LIST comma-separated components (default: main)
|
||||
# -f, --force rebuild even if the tarball already exists
|
||||
# --write-sbuildrc add "$chroot_mode = 'unshare';" to ~/.sbuildrc if absent
|
||||
# -h, --help show this help
|
||||
#
|
||||
# One-time setup; refresh later with sbuild-update or by rerunning with --force.
|
||||
# Requires mmdebstrap and the uidmap tools (newuidmap) for the unshare backend.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
readonly PROGNAME=${0##*/}
|
||||
|
||||
die() {
|
||||
printf '%s: error: %s\n' "$PROGNAME" "$*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
info() {
|
||||
printf '==> %s\n' "$*" >&2
|
||||
}
|
||||
|
||||
usage() {
|
||||
sed -n '2,/^set -euo/{/^set -euo/!p}' "$0" | sed 's/^# \{0,1\}//'
|
||||
}
|
||||
|
||||
need() {
|
||||
local tool
|
||||
for tool in "$@"; do
|
||||
command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool"
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
local dist=unstable
|
||||
local arch=""
|
||||
local mirror=http://deb.debian.org/debian
|
||||
local components=main
|
||||
local force=0
|
||||
local write_sbuildrc=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-d | --dist)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
dist=$2
|
||||
shift 2
|
||||
;;
|
||||
-a | --arch)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
arch=$2
|
||||
shift 2
|
||||
;;
|
||||
-m | --mirror)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
mirror=$2
|
||||
shift 2
|
||||
;;
|
||||
--components)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
components=$2
|
||||
shift 2
|
||||
;;
|
||||
-f | --force)
|
||||
force=1
|
||||
shift
|
||||
;;
|
||||
--write-sbuildrc)
|
||||
write_sbuildrc=1
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "unknown option: $1 (try --help)"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
need mmdebstrap dpkg
|
||||
# Unshare needs the setuid uid/gid mappers; mmdebstrap fails cryptically without.
|
||||
command -v newuidmap >/dev/null 2>&1 ||
|
||||
die "newuidmap not found; install the uidmap package for the unshare backend"
|
||||
|
||||
# Unshare maps a whole UID range, not just the caller's: the base install
|
||||
# creates system users, and without an /etc/subuid+subgid range the install
|
||||
# crashes (dpkg SIGSEGV) instead of erroring cleanly. Root uses mode=root and
|
||||
# needs no range.
|
||||
if [[ $(id -u) -ne 0 ]]; then
|
||||
local me
|
||||
me=$(id -un)
|
||||
if ! grep -qs "^$me:" /etc/subuid || ! grep -qs "^$me:" /etc/subgid; then
|
||||
# Suggest a range starting past every allocation in either file.
|
||||
local start
|
||||
start=$(awk -F: '{e = $2 + $3; if (e > m) m = e} END {print (m ? m : 100000)}' \
|
||||
/etc/subuid /etc/subgid 2>/dev/null)
|
||||
die "no /etc/subuid+subgid range for $me; the unshare backend needs one:
|
||||
sudo usermod --add-subuids $start-$((start + 65535)) --add-subgids $start-$((start + 65535)) $me"
|
||||
fi
|
||||
fi
|
||||
|
||||
: "${arch:=$(dpkg --print-architecture)}"
|
||||
local cache=$HOME/.cache/sbuild
|
||||
local tarball=$cache/${dist}-${arch}.tar.zst
|
||||
|
||||
if [[ -e $tarball && $force -eq 0 ]]; then
|
||||
info "chroot already exists: $tarball (use --force to rebuild)"
|
||||
else
|
||||
info "bootstrapping $dist/$arch chroot into $tarball"
|
||||
mkdir -p "$cache"
|
||||
mmdebstrap --variant=buildd --arch="$arch" --components="$components" \
|
||||
"$dist" "$tarball" "$mirror"
|
||||
info "chroot ready: $tarball"
|
||||
fi
|
||||
|
||||
local rc=$HOME/.sbuildrc
|
||||
local mode_line="\$chroot_mode = 'unshare';"
|
||||
# shellcheck disable=SC2016 # $chroot_mode is literal regex text, not a shell var.
|
||||
if grep -qsE '^[[:space:]]*\$chroot_mode[[:space:]]*=.*unshare' "$rc"; then
|
||||
: # already configured (active, non-commented line)
|
||||
elif [[ $write_sbuildrc -eq 1 ]]; then
|
||||
info "enabling the unshare backend in $rc"
|
||||
printf '%s\n' "$mode_line" >>"$rc"
|
||||
else
|
||||
cat >&2 <<EOF
|
||||
==> To use this chroot without passing --chroot-mode each time, add to $rc:
|
||||
$mode_line
|
||||
(or rerun with --write-sbuildrc). Then verify with:
|
||||
sbuild --dist=$dist path/to/package.dsc
|
||||
and build the release gate with:
|
||||
tools/mkdeb.sh --source-only --sbuild
|
||||
EOF
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
156
tools/mkdeb.sh
156
tools/mkdeb.sh
@@ -20,11 +20,27 @@
|
||||
# Options:
|
||||
# -k, --key KEYID GPG key for signing (default: $DEBSIGN_KEYID)
|
||||
# -o, --outdir DIR output directory (default: <repo>/dist)
|
||||
# --orig FILE reuse this upstream orig tarball instead of
|
||||
# regenerating it (required for a Debian revision
|
||||
# >= 2, whose orig is frozen in the archive)
|
||||
# -s, --source-only build only the source package
|
||||
# -u, --unsigned do not sign anything (implies no release sigs)
|
||||
# --no-release-artifacts skip the orig tarball .asc/.md5/.sha1
|
||||
# --sbuild additionally build the .dsc in a clean sbuild
|
||||
# chroot as a from-scratch verification gate
|
||||
# -h, --help show this help
|
||||
#
|
||||
# --sbuild reproduces the buildd environment: it builds the source package in a
|
||||
# minimal chroot holding only the declared Build-Depends, so an FTBFS or a
|
||||
# missing dependency fails here instead of on the archive's buildds (which, with
|
||||
# a source-only upload, are otherwise the first clean build). It needs an sbuild
|
||||
# chroot for the changelog's distribution; create one once with the companion
|
||||
# tools/mk-sbuild-chroot.sh (rootless unshare backend).
|
||||
#
|
||||
# The Debian revision in debian/changelog decides the orig: revision 1 builds a
|
||||
# fresh upstream tarball; revision >= 2 must reuse the orig frozen at revision 1
|
||||
# (the .dsc references it by checksum), so pass it with --orig.
|
||||
#
|
||||
# SOURCE_DATE_EPOCH is honored for reproducible output.
|
||||
|
||||
set -euo pipefail
|
||||
@@ -57,9 +73,11 @@ need() {
|
||||
main() {
|
||||
local key=${DEBSIGN_KEYID:-}
|
||||
local outdir=""
|
||||
local orig_in=""
|
||||
local source_only=0
|
||||
local unsigned=0
|
||||
local release_artifacts=1
|
||||
local sbuild=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
@@ -73,6 +91,11 @@ main() {
|
||||
outdir=$2
|
||||
shift 2
|
||||
;;
|
||||
--orig)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
orig_in=$2
|
||||
shift 2
|
||||
;;
|
||||
-s | --source-only)
|
||||
source_only=1
|
||||
shift
|
||||
@@ -85,6 +108,10 @@ main() {
|
||||
release_artifacts=0
|
||||
shift
|
||||
;;
|
||||
--sbuild)
|
||||
sbuild=1
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
@@ -95,7 +122,8 @@ main() {
|
||||
esac
|
||||
done
|
||||
|
||||
need git autoreconf debuild dcmd
|
||||
need git autoreconf debuild dcmd dpkg-parsechangelog
|
||||
[[ $sbuild -eq 1 ]] && need sbuild
|
||||
if [[ $unsigned -eq 0 ]]; then
|
||||
need gpg
|
||||
[[ -n $key ]] || die "no signing key (pass --key or set DEBSIGN_KEYID, or use --unsigned)"
|
||||
@@ -107,6 +135,11 @@ main() {
|
||||
mkdir -p "$outdir"
|
||||
outdir=$(cd "$outdir" && pwd)
|
||||
|
||||
if [[ -n $orig_in ]]; then
|
||||
[[ -r $orig_in ]] || die "--orig file not readable: $orig_in"
|
||||
orig_in=$(cd "$(dirname "$orig_in")" && pwd)/$(basename "$orig_in")
|
||||
fi
|
||||
|
||||
scratch=$(mktemp -d "${TMPDIR:-/tmp}/httrack-mkdeb.XXXXXX")
|
||||
trap 'rm -rf -- "$scratch"' EXIT
|
||||
|
||||
@@ -118,45 +151,65 @@ main() {
|
||||
git -C "$repo/src/coucal" archive --format=tar --prefix=src/coucal/ HEAD |
|
||||
tar -x -C "$export_dir"
|
||||
|
||||
# Refresh build system and man page, then build the tarball. We build here
|
||||
# only because regen-man needs the compiled binaries; the test suite is not
|
||||
# run in this pass. debuild (below) runs the full suite once, with the online
|
||||
# tests enabled, so a check here would just be a slower, offline-only repeat.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
# Build the tarball from a clean tree so no object files leak into it.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
# Upstream version and Debian revision drive the orig: revision 1 builds a
|
||||
# fresh tarball, revision >= 2 reuses the one frozen at -1 (the .dsc pins it
|
||||
# by checksum, so a regenerated orig with new mtimes would be rejected).
|
||||
local fullver ver rev
|
||||
fullver=$(cd "$export_dir" && dpkg-parsechangelog -S Version)
|
||||
ver=${fullver%-*}
|
||||
rev=${fullver##*-}
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
info "version $ver (Debian revision $rev)"
|
||||
|
||||
local tarball ver
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
tarball=${tarballs[0]##*/}
|
||||
ver=${tarball#httrack-}
|
||||
ver=${ver%.tar.gz}
|
||||
info "version $ver"
|
||||
# A signed build is upload-bound, so a revision >= 2 must reuse the frozen
|
||||
# orig (--orig); an unsigned build is a throwaway (CI, local) and may
|
||||
# regenerate it, since it can never reach the archive.
|
||||
if [[ -z $orig_in && $rev != 1 && $unsigned -eq 0 ]]; then
|
||||
die "Debian revision $rev needs --orig FILE (the orig is frozen from revision 1)"
|
||||
fi
|
||||
|
||||
if [[ -n $orig_in ]]; then
|
||||
info "reusing upstream tarball $orig_in"
|
||||
cp -- "$orig_in" "$scratch/$orig"
|
||||
else
|
||||
# Refresh build system and man page, then build the tarball. We build
|
||||
# here only because regen-man needs the compiled binaries; the test
|
||||
# suite is not run in this pass. debuild (below) runs the full suite
|
||||
# once, online tests enabled, so a check here would just repeat it.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
# Build the tarball from a clean tree so no object files leak in.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
local tarball=${tarballs[0]##*/}
|
||||
[[ $tarball == "httrack-$ver.tar.gz" ]] ||
|
||||
die "changelog version $ver disagrees with built tarball $tarball (configure.ac mismatch?)"
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
fi
|
||||
|
||||
# 3.0 (quilt): orig tarball is upstream-only; debian/ is overlaid on top.
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
(
|
||||
cd "$scratch"
|
||||
tar -xf "$orig"
|
||||
[[ -d httrack-$ver ]] || die "orig tarball does not unpack to httrack-$ver/"
|
||||
cp -a "$export_dir/debian" "httrack-$ver/debian"
|
||||
)
|
||||
|
||||
# Build (debuild also runs lintian and signs). --fail-on aborts on a lintian
|
||||
# error or warning, so neither a release nor CI produces an unclean package.
|
||||
local -a debuild_opts=(--lintian-opts -I -i "--fail-on=error,warning")
|
||||
# Build and sign. debuild runs lintian too but does NOT propagate its exit
|
||||
# status, so a broken package would pass unnoticed; disable it here and run
|
||||
# lintian ourselves below as the real gate.
|
||||
local -a debuild_opts=(--no-lintian)
|
||||
local -a build_opts=()
|
||||
[[ $source_only -eq 1 ]] && build_opts+=(-S)
|
||||
if [[ $unsigned -eq 1 ]]; then
|
||||
@@ -167,7 +220,8 @@ main() {
|
||||
info "building packages with debuild"
|
||||
(
|
||||
cd "$scratch/httrack-$ver"
|
||||
debuild "${build_opts[@]}" "${debuild_opts[@]}"
|
||||
# debuild options (--no-lintian) must precede the dpkg-buildpackage ones
|
||||
debuild "${debuild_opts[@]}" "${build_opts[@]}"
|
||||
)
|
||||
|
||||
# Collect every file the .changes references (orig, dsc, debs, ddebs, buildinfo).
|
||||
@@ -177,11 +231,49 @@ main() {
|
||||
changes=("$scratch"/*.changes)
|
||||
shopt -u nullglob
|
||||
[[ ${#changes[@]} -ge 1 ]] || die "debuild produced no .changes file"
|
||||
|
||||
# The real lintian gate (debuild only reports, it does not fail on tags).
|
||||
# --profile debian: CI runners are Ubuntu, whose vendor data would wrongly
|
||||
# reject the Debian "unstable" distribution. newer-standards-version only
|
||||
# means the local lintian is older than the buildds', not a package
|
||||
# defect, so suppress it. set -e turns any error/warning tag into a failure.
|
||||
info "running lintian gate (--fail-on=error,warning)"
|
||||
lintian --profile debian -I -i --fail-on=error,warning \
|
||||
--suppress-tags newer-standards-version "${changes[@]}"
|
||||
|
||||
dcmd cp -- "${changes[@]}" "$outdir/"
|
||||
|
||||
# Clean-room build gate: rebuild the source package in a minimal chroot that
|
||||
# holds only the declared Build-Depends, the same way the buildds will. An
|
||||
# undeclared dependency or any FTBFS aborts the release here instead of
|
||||
# surfacing after a source-only upload. Logs and clean-built debs land in
|
||||
# $outdir/sbuild for inspection.
|
||||
if [[ $sbuild -eq 1 ]]; then
|
||||
local -a dscs
|
||||
shopt -s nullglob
|
||||
dscs=("$scratch"/*.dsc)
|
||||
shopt -u nullglob
|
||||
[[ ${#dscs[@]} -ge 1 ]] || die "no .dsc to sbuild"
|
||||
|
||||
local dist
|
||||
dist=$(cd "$scratch/httrack-$ver" && dpkg-parsechangelog -S Distribution)
|
||||
[[ $dist == UNRELEASED ]] && dist=unstable
|
||||
|
||||
info "clean-room build with sbuild (dist $dist)"
|
||||
local sbdir=$outdir/sbuild
|
||||
rm -rf -- "$sbdir"
|
||||
mkdir -p "$sbdir"
|
||||
(cd "$sbdir" && sbuild --dist="$dist" -- "${dscs[0]}")
|
||||
info "sbuild clean-room build passed; logs in $sbdir"
|
||||
fi
|
||||
|
||||
# Release artifacts for the upstream tarball (detached sig + checksums).
|
||||
# A Debian revision >= 2 .changes omits the orig (it is already in the
|
||||
# archive), so dcmd above won't have copied it; place it from the build tree
|
||||
# so the website artifacts are produced regardless of the revision.
|
||||
if [[ $release_artifacts -eq 1 && $unsigned -eq 0 ]]; then
|
||||
info "signing upstream tarball"
|
||||
cp -- "$scratch/$orig" "$outdir/$orig"
|
||||
(
|
||||
cd "$outdir"
|
||||
gpg --armor --detach-sign --yes -u "$key" -- "$orig"
|
||||
|
||||
Reference in New Issue
Block a user