mirror of
https://github.com/xroche/httrack.git
synced 2026-06-21 09:38:24 +03:00
Compare commits
12 Commits
3.49.8-2
...
fix/empty-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02180549f6 | ||
|
|
1611dbcabf | ||
|
|
099501ee50 | ||
|
|
1b9eefa3b4 | ||
|
|
9c8d3a41eb | ||
|
|
ae77cd9d6d | ||
|
|
51b8dcd81c | ||
|
|
bcce664143 | ||
|
|
7a24add87c | ||
|
|
2308e7bafd | ||
|
|
ef5691fc47 | ||
|
|
0a6eb73903 |
5
.flake8
Normal file
5
.flake8
Normal file
@@ -0,0 +1,5 @@
|
||||
[flake8]
|
||||
# Match black's formatting so the two tools don't fight.
|
||||
max-line-length = 88
|
||||
# E203/W503 conflict with black's slice and line-break style.
|
||||
extend-ignore = E203, W503
|
||||
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
@@ -227,7 +227,8 @@ jobs:
|
||||
# Validate the Debian packaging via the same script maintainers release with.
|
||||
# One amd64/gcc run is enough: packaging (control/rules/manifest/lintian/quilt
|
||||
# source build) is arch- and compiler-independent, and the build matrix above
|
||||
# already covers compile portability. lintian runs with --fail-on=error.
|
||||
# already covers compile portability. mkdeb.sh runs lintian as an explicit gate
|
||||
# (debuild does not propagate lintian's exit) with --fail-on=error,warning.
|
||||
deb:
|
||||
name: deb package (lintian)
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
@@ -29,9 +29,9 @@ AC_CONFIG_SRCDIR(src/httrack.c)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
AM_INIT_AUTOMAKE([subdir-objects])
|
||||
# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to
|
||||
# 128), an incompatible ABI break, so bump current and reset revision/age.
|
||||
VERSION_INFO="3:0:0"
|
||||
# 4:0:0: htsblk gained the contenttype_given field, an incompatible ABI break,
|
||||
# so bump current and reset revision/age.
|
||||
VERSION_INFO="4:0:0"
|
||||
AM_MAINTAINER_MODE
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
|
||||
10
debian/changelog
vendored
10
debian/changelog
vendored
@@ -1,3 +1,13 @@
|
||||
httrack (3.49.8-3) unstable; urgency=medium
|
||||
|
||||
* Rename libhttrack3 to libhttrack4 to follow the SONAME bump to
|
||||
libhttrack.so.4: htsblk gained a contenttype_given field, an
|
||||
incompatible ABI change (VERSION_INFO 3 -> 4). The .files wildcard
|
||||
now tracks .so.4* so the runtime libraries land in the right
|
||||
package. New binary package, via NEW.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sat, 20 Jun 2026 19:46:16 +0200
|
||||
|
||||
httrack (3.49.8-2) unstable; urgency=medium
|
||||
|
||||
* Rename libhttrack2 to libhttrack3 to follow the SONAME, which the 3.49.8
|
||||
|
||||
6
debian/control
vendored
6
debian/control
vendored
@@ -58,13 +58,13 @@ Description: webhttrack common files
|
||||
This package is the common files of webhttrack, website copier and
|
||||
mirroring utility
|
||||
|
||||
Package: libhttrack3
|
||||
Package: libhttrack4
|
||||
Architecture: any
|
||||
Multi-Arch: same
|
||||
Section: libs
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}
|
||||
Replaces: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Breaks: libhttrack2, httrack (<< 3.49.8-2~)
|
||||
Replaces: libhttrack3, httrack (<< 3.49.8-3~)
|
||||
Breaks: libhttrack3, httrack (<< 3.49.8-3~)
|
||||
Description: Httrack website copier library
|
||||
This package is the library part of httrack, website copier and mirroring
|
||||
utility
|
||||
|
||||
3
debian/httrack-doc.lintian-overrides
vendored
3
debian/httrack-doc.lintian-overrides
vendored
@@ -4,3 +4,6 @@
|
||||
# so the path lives in the display pointer, not the override -- match with '*'.
|
||||
httrack-doc: extra-license-file *
|
||||
httrack-doc: package-contains-documentation-outside-usr-share-doc *
|
||||
# search.sh is a sample CGI shipped alongside the HTML manual, not meant to be
|
||||
# run from the package tree; it stays non-executable by design.
|
||||
httrack-doc: script-not-executable *
|
||||
|
||||
3
debian/libhttrack3.files
vendored
3
debian/libhttrack3.files
vendored
@@ -1,3 +0,0 @@
|
||||
usr/lib/*/libhttrack.so.3*
|
||||
usr/lib/*/libhtsjava.so.3*
|
||||
usr/share/httrack/templates
|
||||
3
debian/libhttrack4.files
vendored
Normal file
3
debian/libhttrack4.files
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
usr/lib/*/libhttrack.so.4*
|
||||
usr/lib/*/libhtsjava.so.4*
|
||||
usr/share/httrack/templates
|
||||
@@ -1,3 +1,3 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME plus a >= upstream-version dependency, see debian/rules).
|
||||
libhttrack3: no-symbols-control-file usr/lib/*
|
||||
libhttrack4: no-symbols-control-file usr/lib/*
|
||||
2
debian/rules
vendored
2
debian/rules
vendored
@@ -135,7 +135,7 @@ binary-arch: build install
|
||||
dh_makeshlibs -a -X/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest --version-info
|
||||
dh_installdeb -a
|
||||
# we depend on the current version (ABI may change)
|
||||
dh_shlibdeps -a -ldebian/libhttrack3/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_shlibdeps -a -ldebian/libhttrack4/usr/lib/$(DEB_HOST_MULTIARCH)
|
||||
dh_gencontrol -a
|
||||
dh_md5sums -a
|
||||
dh_builddeb -a
|
||||
|
||||
18
src/htslib.c
18
src/htslib.c
@@ -1396,6 +1396,8 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||
void treatfirstline(htsblk * retour, const char *rcvd) {
|
||||
const char *a = rcvd;
|
||||
|
||||
retour->contenttype_given = HTS_FALSE; /* set when a Content-Type is seen */
|
||||
|
||||
// exemple:
|
||||
// HTTP/1.0 200 OK
|
||||
if (*a) {
|
||||
@@ -1589,11 +1591,17 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
|
||||
}
|
||||
}
|
||||
}
|
||||
sscanf(rcvd + p, "%s", tempo);
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype, "application/octet-stream-unknown"); // erreur
|
||||
// An empty/whitespace Content-Type value yields no token; keep the
|
||||
// default type and the "not given" flag instead of reading uninit tempo.
|
||||
if (sscanf(rcvd + p, "%s", tempo) == 1) {
|
||||
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
||||
strcpybuff(retour->contenttype, tempo);
|
||||
else
|
||||
strcpybuff(retour->contenttype,
|
||||
"application/octet-stream-unknown"); // erreur
|
||||
retour->contenttype_given =
|
||||
HTS_TRUE; /* server declared a usable type */
|
||||
}
|
||||
}
|
||||
} else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
|
||||
// Content-Range: bytes 0-70870/70871
|
||||
|
||||
@@ -138,6 +138,34 @@ static void cleanEndingSpaceOrDot(char *s) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Should the wire Content-Type override the URL's own extension when naming the
|
||||
saved file? True when the type is patchable (may_unknown2) and either the URL
|
||||
extension implies no specific type or the server declared a disagreeing one.
|
||||
A URL extension mapping to a specific non-HTML type is kept only when the
|
||||
server sent NO Content-Type (the #267 mangle guard): a typeless .png stays
|
||||
.png, but a .pdf explicitly served as text/html is named .html. */
|
||||
static int wire_patches_ext(httrackp *opt, const char *wiremime,
|
||||
const char *file, int contenttype_given) {
|
||||
char urlmime[256];
|
||||
|
||||
if (may_unknown2(opt, wiremime, file))
|
||||
return 0; /* type kept verbatim (keep-list / bogus-multiple) */
|
||||
urlmime[0] = '\0';
|
||||
/* type implied by the URL extension, only when confidently known (flag 0) */
|
||||
if (!get_httptype_sized(opt, urlmime, sizeof(urlmime), file, 0))
|
||||
return 1; /* URL ext implies no known type: trust the wire type */
|
||||
if (strfield2(wiremime, urlmime))
|
||||
return 0; /* wire agrees with the ext: keep it (no .htm->.html churn) */
|
||||
/* wire disagrees with a specific non-HTML URL ext. Keep the ext only when
|
||||
the server sent NO Content-Type: a missing type is defaulted to text/html
|
||||
upstream and must not clobber e.g. a .png. An explicitly declared type is
|
||||
trusted, so a binary-looking URL that really serves HTML (login/error
|
||||
interstitial, soft-404) is named .html instead of kept as .pdf/.jpg. */
|
||||
if (!is_hypertext_mime(opt, urlmime, file) && !contenttype_given)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// forme le nom du fichier à sauver (save) à partir de fil et adr
|
||||
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
|
||||
int url_savename(lien_adrfilsave *const afs,
|
||||
@@ -325,7 +353,10 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
|
||||
/* replace shtml to html.. */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD)
|
||||
/* HARD delays every type, except one the user pinned with --assume: honor it
|
||||
immediately (ishtml() consults the user type), no delayed name (#56) */
|
||||
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD &&
|
||||
!is_userknowntype(opt, fil))
|
||||
is_html = -1; /* ALWAYS delay type */
|
||||
else
|
||||
is_html = ishtml(opt, fil);
|
||||
@@ -380,7 +411,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, r.cdispo);
|
||||
} else if (!may_unknown2(opt, r.contenttype, fil)) { // on peut patcher à priori?
|
||||
} else if (wire_patches_ext(opt, r.contenttype, fil,
|
||||
r.contenttype_given)) {
|
||||
if (give_mimext(s, sizeof(s),
|
||||
r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
@@ -425,7 +457,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(headers->r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, headers->r.cdispo);
|
||||
} else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
} else if (wire_patches_ext(opt, headers->r.contenttype,
|
||||
headers->url_fil,
|
||||
headers->r.contenttype_given)) {
|
||||
char s[16];
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
@@ -653,7 +687,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (strnotempty(back[b].r.cdispo)) { /* filename given */
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, back[b].r.cdispo);
|
||||
} else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
} else if (wire_patches_ext(opt, back[b].r.contenttype,
|
||||
back[b].url_fil,
|
||||
back[b].r.contenttype_given)) {
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
back[b].r.contenttype)) { // recognized extension
|
||||
|
||||
@@ -651,6 +651,8 @@ struct htsblk {
|
||||
int debugid; /**< connection debug id */
|
||||
/* */
|
||||
htsrequest req; /**< parameters used for the request */
|
||||
/* a Content-Type header was received (else contenttype holds a default) */
|
||||
hts_boolean contenttype_given;
|
||||
/*char digest[32+2]; // md5 digest generated by the engine ("" if none) */
|
||||
};
|
||||
|
||||
|
||||
15
tests/13_local-cookies.test
Executable file
15
tests/13_local-cookies.test
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Cookie chain against the local test server (replaces the old online
|
||||
# ut/cookies/*.php fixtures). entrance.php sets cat/cake; second.php checks
|
||||
# them and sets badger; third.php checks all three. A missing or wrong cookie
|
||||
# returns 500, which would surface as an httrack error and a missing file, so a
|
||||
# clean 3-files/0-errors run proves the cookie jar is replayed across links.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 3 \
|
||||
--found 'cookies/entrance.html' \
|
||||
--found 'cookies/second.html' \
|
||||
--found 'cookies/third.html' \
|
||||
httrack 'BASEURL/cookies/entrance.php'
|
||||
18
tests/14_local-https.test
Executable file
18
tests/14_local-https.test
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# HTTPS crawl against the local test server, using the shipped self-signed
|
||||
# cert. httrack does not verify certs (htslib.c: SSL_CTX_new with no
|
||||
# SSL_CTX_set_verify), so the self-signed cert is accepted as-is and this
|
||||
# exercises the real TLS path offline. basic.html links to link.html with four
|
||||
# distinct query strings, each saved under a hashed name -> 5 files.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
if test "$HTTPS_SUPPORT" == "no"; then
|
||||
echo "no https support compiled, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --tls --errors 0 --files 5 \
|
||||
--found 'simple/basic.html' \
|
||||
httrack 'BASEURL/simple/basic.html'
|
||||
25
tests/15_local-types.test
Normal file
25
tests/15_local-types.test
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Content-Type vs URL-extension naming (issue #267 family) under the default
|
||||
# delayed type check (-%N2). Policy: a MISSING Content-Type must not clobber a
|
||||
# URL extension that maps to a specific non-HTML type (.png/.pdf stay as-is);
|
||||
# an explicitly DECLARED type is trusted, so a binary-looking URL that really
|
||||
# serves HTML (text/html on .pdf/.jpg) is named .html. The "wrong" names are
|
||||
# asserted absent so a regression in either direction fails here.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/notype.png' \
|
||||
--found 'types/notype.pdf' --not-found 'types/notype.html' \
|
||||
--found 'types/photo.png' \
|
||||
--found 'types/doc.pdf' \
|
||||
--found 'types/lie.html' --not-found 'types/lie.png' \
|
||||
--found 'types/report.html' --not-found 'types/report.pdf' \
|
||||
--found 'types/page.htm' --not-found 'types/page.html' \
|
||||
--found 'types/script.js' \
|
||||
--found 'types/style.css' \
|
||||
--found 'types/data.json' \
|
||||
--found 'types/control.html' --not-found 'types/control.php' \
|
||||
--found 'types/gend61c.png' --not-found 'types/gend61c.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
11
tests/16_local-assume.test
Normal file
11
tests/16_local-assume.test
Normal file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# --assume under the default delayed type check (-%N2), issue #56. A user type
|
||||
# pinned with --assume must be honored immediately, not lost to the delayed
|
||||
# name: photo.png served as image/png but assumed text/html is saved as .html.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/photo.html' --not-found 'types/photo.png' \
|
||||
httrack 'BASEURL/types/photo.png' --assume png=text/html
|
||||
12
tests/17_local-empty-ct.test
Normal file
12
tests/17_local-empty-ct.test
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# An empty "Content-Type:" header value must be treated as "no usable type"
|
||||
# (keep the URL extension), not parsed from an uninitialized buffer. The crawl
|
||||
# also runs under ASan/UBSan in CI, which catches the uninitialized read this
|
||||
# guards against.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
|
||||
--found 'types/emptyct.png' --not-found 'types/emptyct.html' \
|
||||
httrack 'BASEURL/types/index.html'
|
||||
@@ -3,6 +3,8 @@
|
||||
# silently drop it from the dist tarball and break "make distcheck".
|
||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||
proxy-https-server.py \
|
||||
local-crawl.sh local-server.py server.crt server.key \
|
||||
server-root/simple/basic.html server-root/simple/link.html \
|
||||
fixtures/cache-golden/hts-cache/new.zip
|
||||
|
||||
TESTS_ENVIRONMENT =
|
||||
@@ -47,6 +49,11 @@ TESTS = \
|
||||
11_crawl-longurl.test \
|
||||
11_crawl-parsing.test \
|
||||
12_crawl_https.test \
|
||||
13_crawl_proxy_https.test
|
||||
13_crawl_proxy_https.test \
|
||||
13_local-cookies.test \
|
||||
14_local-https.test \
|
||||
15_local-types.test \
|
||||
16_local-assume.test \
|
||||
17_local-empty-ct.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
235
tests/local-crawl.sh
Executable file
235
tests/local-crawl.sh
Executable file
@@ -0,0 +1,235 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Launcher for httrack crawl tests against the local Python test server.
|
||||
#
|
||||
# Starts tests/local-server.py on an ephemeral port, discovers the port from
|
||||
# the server's stdout, then runs httrack against http(s)://127.0.0.1:$PORT and
|
||||
# audits the mirror. The server is always killed and the tmpdir removed on exit.
|
||||
#
|
||||
# The token BASEURL in any httrack argument is replaced with the discovered
|
||||
# http(s)://127.0.0.1:$PORT base. --found/--directory paths are relative to the
|
||||
# discovered host root (127.0.0.1_<port>/), since the random port leaks into
|
||||
# the mirror directory name.
|
||||
#
|
||||
# Usage:
|
||||
# bash local-crawl.sh [--tls] [--root DIR] \
|
||||
# --errors N --files N --found PATH ... --directory PATH ... \
|
||||
# httrack BASEURL/some/path [httrack-args...]
|
||||
|
||||
set -u
|
||||
|
||||
testdir=$(cd "$(dirname "$0")" && pwd)
|
||||
server="${testdir}/local-server.py"
|
||||
root="${LOCAL_SERVER_ROOT:-${testdir}/server-root}"
|
||||
cert="${testdir}/server.crt"
|
||||
key="${testdir}/server.key"
|
||||
|
||||
tls=
|
||||
verbose=
|
||||
tmpdir=
|
||||
serverpid=
|
||||
crawlpid=
|
||||
|
||||
function warning {
|
||||
echo "** $*" >&2
|
||||
return 0
|
||||
}
|
||||
function die {
|
||||
warning "$*"
|
||||
exit 1
|
||||
}
|
||||
function debug {
|
||||
test -n "$verbose" && echo "$*" >&2
|
||||
return 0
|
||||
}
|
||||
function info { printf "[%s] ..\t" "$*" >&2; }
|
||||
function result { echo "$*" >&2; }
|
||||
|
||||
function cleanup {
|
||||
if test -n "$crawlpid"; then
|
||||
kill -9 "$crawlpid" 2>/dev/null
|
||||
crawlpid=
|
||||
fi
|
||||
if test -n "$serverpid"; then
|
||||
kill "$serverpid" 2>/dev/null
|
||||
# Reap it so the port is released before we rm the tmpdir/log.
|
||||
wait "$serverpid" 2>/dev/null
|
||||
serverpid=
|
||||
fi
|
||||
if test -n "$tmpdir" && test -d "$tmpdir"; then
|
||||
test -n "$nopurge" || rm -rf "$tmpdir"
|
||||
fi
|
||||
}
|
||||
|
||||
function assert_equals {
|
||||
info "$1"
|
||||
if test ! "$2" == "$3"; then
|
||||
result "expected '$2', got '$3'"
|
||||
exit 1
|
||||
fi
|
||||
result "OK ($2)"
|
||||
}
|
||||
|
||||
nopurge=
|
||||
trap cleanup EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
# python3 is required; mirror check-network.sh's skip-with-77 convention.
|
||||
command -v python3 >/dev/null || ! echo "python3 not found; skipping local crawl tests" || exit 77
|
||||
|
||||
tmptopdir=${TMPDIR:-/tmp}
|
||||
test -d "$tmptopdir" || mkdir -p "$tmptopdir" || die "no temporary directory; set TMPDIR"
|
||||
tmpdir=$(mktemp -d "${tmptopdir}/httrack_local.XXXXXX") || die "could not create tmpdir"
|
||||
|
||||
# --- parse leading control flags --------------------------------------------
|
||||
declare -a audit=()
|
||||
scheme=http
|
||||
pos=0
|
||||
args=("$@")
|
||||
nargs=$#
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
case "${args[$pos]}" in
|
||||
--debug) verbose=1 ;;
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
audit+=("--no-purge")
|
||||
;;
|
||||
--tls)
|
||||
tls=1
|
||||
scheme=https
|
||||
;;
|
||||
--root)
|
||||
pos=$((pos + 1))
|
||||
root="${args[$pos]}"
|
||||
;;
|
||||
--errors | --files)
|
||||
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||
pos=$((pos + 1))
|
||||
;;
|
||||
--found | --not-found | --directory)
|
||||
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||
pos=$((pos + 1))
|
||||
;;
|
||||
httrack)
|
||||
pos=$((pos + 1))
|
||||
break
|
||||
;;
|
||||
*) die "unrecognized option ${args[$pos]}" ;;
|
||||
esac
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
|
||||
# --- start the server --------------------------------------------------------
|
||||
test -r "$server" || die "cannot read $server"
|
||||
serverlog="${tmpdir}/server.log"
|
||||
serverargs=(--root "$root")
|
||||
if test -n "$tls"; then
|
||||
serverargs+=(--tls --cert "$cert" --key "$key")
|
||||
fi
|
||||
debug "starting python3 $server ${serverargs[*]}"
|
||||
python3 "$server" "${serverargs[@]}" >"$serverlog" 2>&1 &
|
||||
serverpid=$!
|
||||
|
||||
# Wait for the "PORT <n>" line (server prints it once bound).
|
||||
port=
|
||||
for _ in $(seq 1 50); do
|
||||
if test -s "$serverlog"; then
|
||||
line=$(head -n1 "$serverlog")
|
||||
if test "${line%% *}" == "PORT"; then
|
||||
port="${line#PORT }"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
kill -0 "$serverpid" 2>/dev/null || die "server exited early: $(cat "$serverlog")"
|
||||
sleep 0.1
|
||||
done
|
||||
test -n "$port" || die "could not discover server port: $(cat "$serverlog")"
|
||||
debug "server listening on ${scheme}://127.0.0.1:${port}"
|
||||
|
||||
baseurl="${scheme}://127.0.0.1:${port}"
|
||||
|
||||
# --- substitute BASEURL in the remaining (httrack) args ----------------------
|
||||
declare -a hts=()
|
||||
while test "$pos" -lt "$nargs"; do
|
||||
hts+=("${args[$pos]//BASEURL/$baseurl}")
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
|
||||
# --- run httrack -------------------------------------------------------------
|
||||
which httrack >/dev/null || die "could not find httrack"
|
||||
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
|
||||
test -n "$ver" || die "could not run httrack"
|
||||
|
||||
out="${tmpdir}/crawl"
|
||||
mkdir "$out" || die "could not create $out"
|
||||
# Localhost is fast; disable the rate/bandwidth safety limits but keep a
|
||||
# max-time backstop so a hang cannot wedge the suite.
|
||||
declare -a moreargs=(--quiet --max-time=120 --timeout=30 --disable-security-limits --robots=0)
|
||||
log="${tmpdir}/log"
|
||||
info "running httrack ${hts[*]}"
|
||||
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" "${moreargs[@]}" "${hts[@]}" >"$log" 2>&1 &
|
||||
crawlpid=$!
|
||||
wait "$crawlpid"
|
||||
crawlres=$?
|
||||
crawlpid=
|
||||
# httrack exits 0 even on hard connect/DNS errors, so this is a backstop only;
|
||||
# the real guard is the audit below (--errors 0 plus the host-root existence check).
|
||||
test "$crawlres" -eq 0 || ! result "httrack exited $crawlres" || {
|
||||
cat "$log" >&2
|
||||
exit 1
|
||||
}
|
||||
result "OK"
|
||||
grep -iE "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt" >&2
|
||||
|
||||
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||
hostroot=
|
||||
for cand in "${out}/127.0.0.1_${port}" "${out}/127.0.0.1"; do
|
||||
if test -d "$cand"; then
|
||||
hostroot="$cand"
|
||||
break
|
||||
fi
|
||||
done
|
||||
test -n "$hostroot" || die "could not find host root under $out"
|
||||
debug "host root: $hostroot"
|
||||
|
||||
# --- audit -------------------------------------------------------------------
|
||||
i=0
|
||||
while test "$i" -lt "${#audit[@]}"; do
|
||||
case "${audit[$i]}" in
|
||||
--errors)
|
||||
i=$((i + 1))
|
||||
assert_equals "checking errors" "${audit[$i]}" \
|
||||
"$(grep -iEc "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt")"
|
||||
;;
|
||||
--files)
|
||||
i=$((i + 1))
|
||||
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${out}/hts-log.txt" |
|
||||
sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
|
||||
assert_equals "checking files" "${audit[$i]}" "$nFiles"
|
||||
;;
|
||||
--found)
|
||||
i=$((i + 1))
|
||||
info "checking for ${audit[$i]}"
|
||||
if test -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--not-found)
|
||||
i=$((i + 1))
|
||||
info "checking absence of ${audit[$i]}"
|
||||
if test ! -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "present"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--directory)
|
||||
i=$((i + 1))
|
||||
info "checking for dir ${audit[$i]}"
|
||||
if test -d "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
i=$((i + 1))
|
||||
done
|
||||
254
tests/local-server.py
Executable file
254
tests/local-server.py
Executable file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Self-contained local web server for httrack's crawl tests.
|
||||
|
||||
Serves static fixtures from a docroot plus a handful of dynamic endpoints
|
||||
(cookies, ...) so httrack can be exercised over loopback, deterministically and
|
||||
offline, instead of crawling the live ut.httrack.com.
|
||||
|
||||
Binds to an ephemeral port (port 0) and prints the chosen port to stdout as
|
||||
"PORT <n>\n" so a launcher can discover it. Pass --tls to wrap the socket with
|
||||
the shipped self-signed test cert; httrack does not verify certs, so no CA
|
||||
trust plumbing is needed.
|
||||
|
||||
stdlib only (http.server + ssl) -- no new build or runtime dependency.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
|
||||
from urllib.parse import quote, unquote, urlsplit
|
||||
|
||||
# Cookie chain replicated from the old ut/cookies/*.php fixtures.
|
||||
COOKIE_PATH = "/cookies/"
|
||||
COOKIES = {
|
||||
"cat": "dog",
|
||||
"cake": "is a lie!",
|
||||
"badger": "mushroom, with 'ants'",
|
||||
}
|
||||
|
||||
PAGE = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
\t"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
||||
<head>
|
||||
\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
\t<title>Sample test</title>
|
||||
</head>
|
||||
<body>
|
||||
{body}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class Handler(SimpleHTTPRequestHandler):
|
||||
# Quieter logging; the launcher captures httrack's own log anyway.
|
||||
def log_message(self, fmt, *args):
|
||||
if os.environ.get("LOCAL_SERVER_VERBOSE"):
|
||||
super().log_message(fmt, *args)
|
||||
|
||||
# --- helpers -----------------------------------------------------------
|
||||
|
||||
def request_cookies(self):
|
||||
"""Parse the Cookie header into {name: decoded-value}.
|
||||
|
||||
Mirrors PHP's $_COOKIE: values are url-decoded, matching the encoding
|
||||
applied when the cookie was set (see set_cookie)."""
|
||||
jar = {}
|
||||
raw = self.headers.get("Cookie", "")
|
||||
for pair in raw.split(";"):
|
||||
pair = pair.strip()
|
||||
if "=" in pair:
|
||||
name, value = pair.split("=", 1)
|
||||
jar[name.strip()] = unquote(value.strip())
|
||||
return jar
|
||||
|
||||
def set_cookie(self, name, value):
|
||||
"""Queue a Set-Cookie header, url-encoding the value like PHP's
|
||||
setcookie() so spaces/quotes/commas stay a single token that httrack
|
||||
can store and replay verbatim."""
|
||||
self._set_cookies.append(f"{name}={quote(value)}; Path={COOKIE_PATH}")
|
||||
|
||||
def send_html(self, body, status=200, extra_status=None):
|
||||
encoded = PAGE.format(body=body).encode("utf-8")
|
||||
self.send_response(status, extra_status)
|
||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(encoded)))
|
||||
for cookie in self._set_cookies:
|
||||
self.send_header("Set-Cookie", cookie)
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(encoded)
|
||||
|
||||
def fail_cookie(self, what):
|
||||
# The old PHPs answered 500 with the reason in the status line.
|
||||
self.send_html("", status=500, extra_status=f"The {what} is missing or invalid")
|
||||
|
||||
# --- dynamic routes ----------------------------------------------------
|
||||
|
||||
def route_entrance(self):
|
||||
self.set_cookie("cat", COOKIES["cat"])
|
||||
self.set_cookie("cake", COOKIES["cake"])
|
||||
self.send_html('\tThis is a <a href="second.php">link</a>')
|
||||
|
||||
def route_second(self):
|
||||
jar = self.request_cookies()
|
||||
if jar.get("cat") != COOKIES["cat"]:
|
||||
return self.fail_cookie("cat")
|
||||
if jar.get("cake") != COOKIES["cake"]:
|
||||
return self.fail_cookie("cake")
|
||||
self.set_cookie("badger", COOKIES["badger"])
|
||||
self.send_html('\tThis is a <a href="third.php">link</a>')
|
||||
|
||||
def route_third(self):
|
||||
jar = self.request_cookies()
|
||||
if jar.get("cat") != COOKIES["cat"]:
|
||||
return self.fail_cookie("cat")
|
||||
if jar.get("cake") != COOKIES["cake"]:
|
||||
return self.fail_cookie("cake")
|
||||
if jar.get("badger") != COOKIES["badger"]:
|
||||
return self.fail_cookie("badger")
|
||||
self.send_html("\tThis is a test.")
|
||||
|
||||
def route_robots(self):
|
||||
body = b"User-agent: *\nDisallow:\n"
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
|
||||
# --- type/extension matrix (issue #267 family) -------------------------
|
||||
|
||||
def send_raw(self, body, content_type):
|
||||
"""Send a raw body with an explicit Content-Type, or none at all when
|
||||
content_type is None (to observe httrack's typeless-file naming)."""
|
||||
self.send_response(200)
|
||||
if content_type is not None:
|
||||
self.send_header("Content-Type", content_type)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
|
||||
# Fake-binary blobs for the image/pdf/typeless cases.
|
||||
FAKE_PNG = b"\x89PNG\r\n\x1a\n" + b"\x00" * 64
|
||||
FAKE_PDF = b"%PDF-1.4\n" + b"\x00" * 64
|
||||
|
||||
# path -> (body, content_type); None sends no header, "" sends an empty
|
||||
# Content-Type value (no usable type, must be treated like None).
|
||||
TYPE_MATRIX = {
|
||||
"/types/control.php": (b"<html><body>control</body></html>", "text/html"),
|
||||
"/types/photo.png": (FAKE_PNG, "image/png"),
|
||||
"/types/doc.pdf": (FAKE_PDF, "application/pdf"),
|
||||
"/types/notype.png": (FAKE_PNG, None),
|
||||
"/types/notype.pdf": (FAKE_PDF, None),
|
||||
"/types/emptyct.png": (FAKE_PNG, ""),
|
||||
"/types/lie.png": (FAKE_PNG, "text/html"),
|
||||
"/types/report.pdf": (b"<html><body>real page</body></html>", "text/html"),
|
||||
"/types/page.htm": (b"<html><body>htm page</body></html>", "text/html"),
|
||||
"/types/script.js": (b"var x = 1;\n", "application/javascript"),
|
||||
"/types/style.css": (b"body { color: red; }\n", "text/css"),
|
||||
"/types/data.json": (b'{"k": "v"}\n', "application/json"),
|
||||
"/types/gen.php": (FAKE_PNG, "image/png"),
|
||||
}
|
||||
|
||||
def route_types_index(self):
|
||||
body = (
|
||||
'\t<a href="control.php">control</a>\n'
|
||||
'\t<img src="photo.png" />\n'
|
||||
'\t<a href="doc.pdf">doc</a>\n'
|
||||
'\t<img src="notype.png" />\n'
|
||||
'\t<a href="notype.pdf">notypepdf</a>\n'
|
||||
'\t<img src="emptyct.png" />\n'
|
||||
'\t<img src="lie.png" />\n'
|
||||
'\t<a href="report.pdf">report</a>\n'
|
||||
'\t<a href="page.htm">htm</a>\n'
|
||||
'\t<script src="script.js"></script>\n'
|
||||
'\t<link rel="stylesheet" href="style.css" />\n'
|
||||
'\t<a href="data.json">json</a>\n'
|
||||
'\t<img src="gen.php?id=5" />\n'
|
||||
)
|
||||
self.send_html(body)
|
||||
|
||||
def route_types(self):
|
||||
path = urlsplit(self.path).path
|
||||
body, ctype = self.TYPE_MATRIX[path]
|
||||
self.send_raw(body, ctype)
|
||||
|
||||
ROUTES = {
|
||||
"/cookies/entrance.php": route_entrance,
|
||||
"/cookies/second.php": route_second,
|
||||
"/cookies/third.php": route_third,
|
||||
"/robots.txt": route_robots,
|
||||
"/types/index.html": route_types_index,
|
||||
"/types/control.php": route_types,
|
||||
"/types/photo.png": route_types,
|
||||
"/types/doc.pdf": route_types,
|
||||
"/types/notype.png": route_types,
|
||||
"/types/notype.pdf": route_types,
|
||||
"/types/emptyct.png": route_types,
|
||||
"/types/lie.png": route_types,
|
||||
"/types/report.pdf": route_types,
|
||||
"/types/page.htm": route_types,
|
||||
"/types/script.js": route_types,
|
||||
"/types/style.css": route_types,
|
||||
"/types/data.json": route_types,
|
||||
"/types/gen.php": route_types,
|
||||
}
|
||||
|
||||
# --- dispatch ----------------------------------------------------------
|
||||
|
||||
def dispatch(self):
|
||||
self._set_cookies = []
|
||||
path = urlsplit(self.path).path
|
||||
handler = self.ROUTES.get(path)
|
||||
if handler is not None:
|
||||
handler(self)
|
||||
return True
|
||||
return False
|
||||
|
||||
def do_GET(self):
|
||||
if not self.dispatch():
|
||||
super().do_GET()
|
||||
|
||||
def do_HEAD(self):
|
||||
if not self.dispatch():
|
||||
super().do_HEAD()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="docroot for static files")
|
||||
parser.add_argument("--bind", default="127.0.0.1", help="bind address")
|
||||
parser.add_argument("--tls", action="store_true", help="serve HTTPS")
|
||||
parser.add_argument("--cert", help="TLS certificate (PEM)")
|
||||
parser.add_argument("--key", help="TLS private key (PEM)")
|
||||
args = parser.parse_args()
|
||||
|
||||
root = os.path.abspath(args.root)
|
||||
|
||||
def factory(*a, **kw):
|
||||
return Handler(*a, directory=root, **kw)
|
||||
|
||||
httpd = ThreadingHTTPServer((args.bind, 0), factory)
|
||||
|
||||
if args.tls:
|
||||
import ssl
|
||||
|
||||
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
ctx.load_cert_chain(certfile=args.cert, keyfile=args.key)
|
||||
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
|
||||
|
||||
port = httpd.socket.getsockname()[1]
|
||||
# The launcher reads this line to discover the ephemeral port.
|
||||
print(f"PORT {port}", flush=True)
|
||||
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
18
tests/server-root/simple/basic.html
Normal file
18
tests/server-root/simple/basic.html
Normal file
@@ -0,0 +1,18 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="fr">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Sample test</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
This is a <a href="link.html?v=1">link</a>
|
||||
This is a <a href='link.html?v=2'>link</a>
|
||||
This is a <a href="./link.html?v=3">link</a>
|
||||
This is a <a href=link.html?v=4>link</a>
|
||||
|
||||
</body>
|
||||
3
tests/server-root/simple/link.html
Normal file
3
tests/server-root/simple/link.html
Normal file
@@ -0,0 +1,3 @@
|
||||
This is a link.
|
||||
|
||||
Go back to <a href="basic.html">home</a>.
|
||||
21
tests/server.crt
Normal file
21
tests/server.crt
Normal file
@@ -0,0 +1,21 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDbzCCAlegAwIBAgIUdWkDDomnY3WW95UqJ+UOASuR/i0wDQYJKoZIhvcNAQEL
|
||||
BQAwODESMBAGA1UEAwwJMTI3LjAuMC4xMSIwIAYDVQQKDBlIVFRyYWNrIGxvY2Fs
|
||||
IHRlc3Qgc2VydmVyMCAXDTI2MDYxNTE0NDQxMFoYDzIwNTYwNjA3MTQ0NDEwWjA4
|
||||
MRIwEAYDVQQDDAkxMjcuMC4wLjExIjAgBgNVBAoMGUhUVHJhY2sgbG9jYWwgdGVz
|
||||
dCBzZXJ2ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDx78mogNhT
|
||||
noWwRa51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf
|
||||
3rwj79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJC
|
||||
SGxIin9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4
|
||||
ZbPgwep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaE
|
||||
nQrAlTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJx
|
||||
rjVEPfA8QSbtAgMBAAGjbzBtMB0GA1UdDgQWBBTHE0KKW8REV4HxajzVsIBxz3iL
|
||||
9zAfBgNVHSMEGDAWgBTHE0KKW8REV4HxajzVsIBxz3iL9zAPBgNVHRMBAf8EBTAD
|
||||
AQH/MBoGA1UdEQQTMBGHBH8AAAGCCWxvY2FsaG9zdDANBgkqhkiG9w0BAQsFAAOC
|
||||
AQEAYlTEftrwGJBXuPmtxhmtw2HO/VTC4TGnq67hH5H+ptwgZJuuxCQ5KW6flTyp
|
||||
FTyMhha33WD4EBL3wqqJsWr9Y4BXqi4G0lRqXBcC1oIUa2VYIDMER7kaY1qTSqE8
|
||||
ARpwdB2BhvngAzDLc+4Jt4jQMRGr8fHAwxpDBoIZ1knbyzYNP73Bajse6/8YtxUu
|
||||
nB2BsldjZnLvyHvRxUpWp92OyQih4jYSrlN6olDFlKDg7++kMhkHtJQW9a1t54VN
|
||||
0ZXrB1ZRuHUUvGBq26x71riTWor7HNOSQaGeCMQjZNQkh5tfshNygUGSZVXTEwhG
|
||||
xSrOL7NqBt2+EkVwf7LjGzjmBw==
|
||||
-----END CERTIFICATE-----
|
||||
28
tests/server.key
Normal file
28
tests/server.key
Normal file
@@ -0,0 +1,28 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDx78mogNhTnoWw
|
||||
Ra51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf3rwj
|
||||
79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJCSGxI
|
||||
in9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4ZbPg
|
||||
wep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaEnQrA
|
||||
lTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJxrjVE
|
||||
PfA8QSbtAgMBAAECggEACgNK4klq1T3IpKdNoBY5yoE7CbUQZBNkBpSPRxHgBezj
|
||||
SVFfgrZGnOySrIJSt4JHtuynG2Hl+0ku74HRep/ck+eOsh5W3mZvGvMLnGxhwR3u
|
||||
Or99osTIgU0VQTkpC0SLQ16FCnih0uJycNIikdLR7uuya1tt1OyIBzK7XlNGIywT
|
||||
p85zJc7/6TfTC9eM7lqh7JGR7KplBxSvgZL1pUr7y4rNpKms6uzOvPND79CcKnbU
|
||||
BBA9Tu4qdOkoOljsZKkvh3pihxyG9X6d8QTZ/uX3pkvliwSFBc+Sz9EootA3/4r5
|
||||
gVWpQ2t/AY7fY4hqzLIX/HivVaPj3cWk1G+SHm0XNQKBgQD5I9rijqFvV/p6FmUl
|
||||
FbnjJFFHHgZLivlGxAC5vOyJNQQaqdeDzg7yMotNmQTggVGjT6sjdosQb3n+ctuk
|
||||
EhQnZSU5VkNKv1+PTR35WrRkaECCaqz3Pv79pV9GVcX3it7UuYjNiOeSPqINWe+X
|
||||
49JwnJFz+qQ1BchAwOis4zkENwKBgQD4mShDaYLOO97VpgZj4cGxHHWyEK9CRQvp
|
||||
I7HxRmfaWS3JHwb88lOmALEU6pAj5cYJPAznv8BnUWcVHalZbkQ1JWYtUJRqj6OI
|
||||
Ym7rw/nm4Ay5ijbdEism173dSk3IjOe+PdAlxzsOuVzYdBTqElmeQWtBzhY9aHvX
|
||||
r+A02C2j+wKBgHHDo6Gsi57yR5gUPd9vSlCkNtEIrss0DJv5yHMIB+KnaNZcE+NF
|
||||
5qFF30Jxyz5RDtxJ9tXcvaeln8lG3XDQKI/MqfDCqTuqo5ImHrfMaW8oA70JxS2p
|
||||
gHqGVzkg1aMxsIrmpcdk6olnPExocvWivGdbtzeEjhMALu8Sp6y6nUCFAoGBAK5h
|
||||
KLgYw/OMVaQCIMthaa+l6f0s7PMMYe1453H6VBD6qz4/8HPwO7LfG1gzrUYxADgs
|
||||
ElVh0UHn/On383nS+i9Ze5Hfyyvwc+LQQURKJPrJQMPJavCptPE7NmiKnYNHK6vr
|
||||
yh0l4oxShAklbCJBGvICq4zuVfVfXDeQnDIVTfaPAoGBAMCrZqYdOUhUu+aUqxZq
|
||||
qO/TTQxrxftU63jGUg+o042TdgI4KWLn07wvHJ8/E2OqF35eXenvcuKbNLI1l72J
|
||||
4cp+3cUv8iAXThTRYEztr5CS/wta4o4CNN8zfjn5dV9AI4Hmt4V7EaGWpBcViGbj
|
||||
n0Mhag+dO8DHuenqi1yfMrAt
|
||||
-----END PRIVATE KEY-----
|
||||
119
tools/mkdeb.sh
119
tools/mkdeb.sh
@@ -20,6 +20,9 @@
|
||||
# Options:
|
||||
# -k, --key KEYID GPG key for signing (default: $DEBSIGN_KEYID)
|
||||
# -o, --outdir DIR output directory (default: <repo>/dist)
|
||||
# --orig FILE reuse this upstream orig tarball instead of
|
||||
# regenerating it (required for a Debian revision
|
||||
# >= 2, whose orig is frozen in the archive)
|
||||
# -s, --source-only build only the source package
|
||||
# -u, --unsigned do not sign anything (implies no release sigs)
|
||||
# --no-release-artifacts skip the orig tarball .asc/.md5/.sha1
|
||||
@@ -34,6 +37,10 @@
|
||||
# chroot for the changelog's distribution; create one once with the companion
|
||||
# tools/mk-sbuild-chroot.sh (rootless unshare backend).
|
||||
#
|
||||
# The Debian revision in debian/changelog decides the orig: revision 1 builds a
|
||||
# fresh upstream tarball; revision >= 2 must reuse the orig frozen at revision 1
|
||||
# (the .dsc references it by checksum), so pass it with --orig.
|
||||
#
|
||||
# SOURCE_DATE_EPOCH is honored for reproducible output.
|
||||
|
||||
set -euo pipefail
|
||||
@@ -66,6 +73,7 @@ need() {
|
||||
main() {
|
||||
local key=${DEBSIGN_KEYID:-}
|
||||
local outdir=""
|
||||
local orig_in=""
|
||||
local source_only=0
|
||||
local unsigned=0
|
||||
local release_artifacts=1
|
||||
@@ -83,6 +91,11 @@ main() {
|
||||
outdir=$2
|
||||
shift 2
|
||||
;;
|
||||
--orig)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
orig_in=$2
|
||||
shift 2
|
||||
;;
|
||||
-s | --source-only)
|
||||
source_only=1
|
||||
shift
|
||||
@@ -109,8 +122,8 @@ main() {
|
||||
esac
|
||||
done
|
||||
|
||||
need git autoreconf debuild dcmd
|
||||
[[ $sbuild -eq 1 ]] && need sbuild dpkg-parsechangelog
|
||||
need git autoreconf debuild dcmd dpkg-parsechangelog
|
||||
[[ $sbuild -eq 1 ]] && need sbuild
|
||||
if [[ $unsigned -eq 0 ]]; then
|
||||
need gpg
|
||||
[[ -n $key ]] || die "no signing key (pass --key or set DEBSIGN_KEYID, or use --unsigned)"
|
||||
@@ -122,6 +135,11 @@ main() {
|
||||
mkdir -p "$outdir"
|
||||
outdir=$(cd "$outdir" && pwd)
|
||||
|
||||
if [[ -n $orig_in ]]; then
|
||||
[[ -r $orig_in ]] || die "--orig file not readable: $orig_in"
|
||||
orig_in=$(cd "$(dirname "$orig_in")" && pwd)/$(basename "$orig_in")
|
||||
fi
|
||||
|
||||
scratch=$(mktemp -d "${TMPDIR:-/tmp}/httrack-mkdeb.XXXXXX")
|
||||
trap 'rm -rf -- "$scratch"' EXIT
|
||||
|
||||
@@ -133,45 +151,65 @@ main() {
|
||||
git -C "$repo/src/coucal" archive --format=tar --prefix=src/coucal/ HEAD |
|
||||
tar -x -C "$export_dir"
|
||||
|
||||
# Refresh build system and man page, then build the tarball. We build here
|
||||
# only because regen-man needs the compiled binaries; the test suite is not
|
||||
# run in this pass. debuild (below) runs the full suite once, with the online
|
||||
# tests enabled, so a check here would just be a slower, offline-only repeat.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
# Build the tarball from a clean tree so no object files leak into it.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
# Upstream version and Debian revision drive the orig: revision 1 builds a
|
||||
# fresh tarball, revision >= 2 reuses the one frozen at -1 (the .dsc pins it
|
||||
# by checksum, so a regenerated orig with new mtimes would be rejected).
|
||||
local fullver ver rev
|
||||
fullver=$(cd "$export_dir" && dpkg-parsechangelog -S Version)
|
||||
ver=${fullver%-*}
|
||||
rev=${fullver##*-}
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
info "version $ver (Debian revision $rev)"
|
||||
|
||||
local tarball ver
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
tarball=${tarballs[0]##*/}
|
||||
ver=${tarball#httrack-}
|
||||
ver=${ver%.tar.gz}
|
||||
info "version $ver"
|
||||
# A signed build is upload-bound, so a revision >= 2 must reuse the frozen
|
||||
# orig (--orig); an unsigned build is a throwaway (CI, local) and may
|
||||
# regenerate it, since it can never reach the archive.
|
||||
if [[ -z $orig_in && $rev != 1 && $unsigned -eq 0 ]]; then
|
||||
die "Debian revision $rev needs --orig FILE (the orig is frozen from revision 1)"
|
||||
fi
|
||||
|
||||
if [[ -n $orig_in ]]; then
|
||||
info "reusing upstream tarball $orig_in"
|
||||
cp -- "$orig_in" "$scratch/$orig"
|
||||
else
|
||||
# Refresh build system and man page, then build the tarball. We build
|
||||
# here only because regen-man needs the compiled binaries; the test
|
||||
# suite is not run in this pass. debuild (below) runs the full suite
|
||||
# once, online tests enabled, so a check here would just repeat it.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
# Build the tarball from a clean tree so no object files leak in.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
local tarball=${tarballs[0]##*/}
|
||||
[[ $tarball == "httrack-$ver.tar.gz" ]] ||
|
||||
die "changelog version $ver disagrees with built tarball $tarball (configure.ac mismatch?)"
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
fi
|
||||
|
||||
# 3.0 (quilt): orig tarball is upstream-only; debian/ is overlaid on top.
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
(
|
||||
cd "$scratch"
|
||||
tar -xf "$orig"
|
||||
[[ -d httrack-$ver ]] || die "orig tarball does not unpack to httrack-$ver/"
|
||||
cp -a "$export_dir/debian" "httrack-$ver/debian"
|
||||
)
|
||||
|
||||
# Build (debuild also runs lintian and signs). --fail-on aborts on a lintian
|
||||
# error or warning, so neither a release nor CI produces an unclean package.
|
||||
local -a debuild_opts=(--lintian-opts -I -i "--fail-on=error,warning")
|
||||
# Build and sign. debuild runs lintian too but does NOT propagate its exit
|
||||
# status, so a broken package would pass unnoticed; disable it here and run
|
||||
# lintian ourselves below as the real gate.
|
||||
local -a debuild_opts=(--no-lintian)
|
||||
local -a build_opts=()
|
||||
[[ $source_only -eq 1 ]] && build_opts+=(-S)
|
||||
if [[ $unsigned -eq 1 ]]; then
|
||||
@@ -182,7 +220,8 @@ main() {
|
||||
info "building packages with debuild"
|
||||
(
|
||||
cd "$scratch/httrack-$ver"
|
||||
debuild "${build_opts[@]}" "${debuild_opts[@]}"
|
||||
# debuild options (--no-lintian) must precede the dpkg-buildpackage ones
|
||||
debuild "${debuild_opts[@]}" "${build_opts[@]}"
|
||||
)
|
||||
|
||||
# Collect every file the .changes references (orig, dsc, debs, ddebs, buildinfo).
|
||||
@@ -192,6 +231,16 @@ main() {
|
||||
changes=("$scratch"/*.changes)
|
||||
shopt -u nullglob
|
||||
[[ ${#changes[@]} -ge 1 ]] || die "debuild produced no .changes file"
|
||||
|
||||
# The real lintian gate (debuild only reports, it does not fail on tags).
|
||||
# --profile debian: CI runners are Ubuntu, whose vendor data would wrongly
|
||||
# reject the Debian "unstable" distribution. newer-standards-version only
|
||||
# means the local lintian is older than the buildds', not a package
|
||||
# defect, so suppress it. set -e turns any error/warning tag into a failure.
|
||||
info "running lintian gate (--fail-on=error,warning)"
|
||||
lintian --profile debian -I -i --fail-on=error,warning \
|
||||
--suppress-tags newer-standards-version "${changes[@]}"
|
||||
|
||||
dcmd cp -- "${changes[@]}" "$outdir/"
|
||||
|
||||
# Clean-room build gate: rebuild the source package in a minimal chroot that
|
||||
@@ -219,8 +268,12 @@ main() {
|
||||
fi
|
||||
|
||||
# Release artifacts for the upstream tarball (detached sig + checksums).
|
||||
# A Debian revision >= 2 .changes omits the orig (it is already in the
|
||||
# archive), so dcmd above won't have copied it; place it from the build tree
|
||||
# so the website artifacts are produced regardless of the revision.
|
||||
if [[ $release_artifacts -eq 1 && $unsigned -eq 0 ]]; then
|
||||
info "signing upstream tarball"
|
||||
cp -- "$scratch/$orig" "$outdir/$orig"
|
||||
(
|
||||
cd "$outdir"
|
||||
gpg --armor --detach-sign --yes -u "$key" -- "$orig"
|
||||
|
||||
Reference in New Issue
Block a user