9 Commits

Author SHA1 Message Date
Xavier Roche
074965d38a debian: bump Standards-Version to 4.7.0
No packaging changes required. The 4.7.0 normative items do not apply to
httrack: it ships no maintainer scripts (so the systemd config
diversion/alternatives rule is moot), no services or init scripts (so the
systemd-unit requirement is moot), and it is in main (so the contrib/non-free
no-network rules target rule is moot).
2026-06-07 13:06:23 +02:00
Xavier Roche
87452f3b89 Merge pull request #310 from xroche/feat/in-tree-mkdeb
Build Debian packages in-tree via tools/mkdeb.sh
2026-06-07 11:47:18 +02:00
Xavier Roche
3220085797 tools: collect artifacts with dcmd, tidy tool checks
dcmd expands the .changes to its full file set (orig, dsc, debs, dbgsym
ddebs, buildinfo), replacing the hand-rolled copy loop that silently
dropped the dbgsym packages. need() now takes several tools at once;
drop the unused dpkg-parsechangelog check and require dcmd.
2026-06-07 11:45:40 +02:00
Xavier Roche
ba41415c15 scripts: shellcheck and shfmt -i 4 cleanup
makeman.sh: use CDPATH='' (SC1007) and 4-space indent; the manpage regen test
reformatted with shfmt -i 4. No behavior change.
2026-06-07 11:31:25 +02:00
Xavier Roche
49a6698ca5 debian: fix bogus maintainer email in the 3.49.6-1 changelog entry
roche@proliant.localnet was a local hostname that leaked into a released entry;
lintian flags it as bogus-mail-host. Use xavier@debian.org like the other
entries.
2026-06-07 11:31:25 +02:00
Xavier Roche
0de1b405e4 tools: add mkdeb.sh and a make deb target for signed packages
Replaces an external workstation script. mkdeb.sh exports committed HEAD plus
the coucal submodule to a scratch dir, refreshes the build system and man page
(reusing make -C man regen-man), builds a clean upstream tarball, overlays
debian/, and runs debuild (build + lintian + signing). It takes the GPG key and
options as arguments and writes nothing in the working tree. 'make deb
DEB_FLAGS=...' is a thin wrapper. Honors SOURCE_DATE_EPOCH.
2026-06-07 11:31:25 +02:00
Xavier Roche
dc50f25420 Merge pull request #309 from xroche/feat/manpage-generator
Generate the man page in-tree from --help (indent-aware)
2026-06-07 10:56:55 +02:00
Xavier Roche
7bab8263a1 man: regenerate httrack.1 with an in-tree, indent-aware script
The external makeman.sh turned the first token of every indented --help line
into an option, so prose like the -%! warning rendered as bogus -IMPORTANT and
-USE options (Debian #1061053). man/makeman.sh classifies lines by indentation,
reads README from the source tree, and honors SOURCE_DATE_EPOCH.
'make -C man regen-man' refreshes the page; tests/02_manpage-regen.test fails
if the committed page drifts from --help.
2026-06-07 10:53:03 +02:00
Xavier Roche
000017bce7 Merge pull request #308 from xroche/fix/openssl4-tls-init
Modernize OpenSSL TLS init for the 3.x->4.x transition
2026-06-07 08:55:44 +02:00
12 changed files with 672 additions and 303 deletions

View File

@@ -5,4 +5,13 @@ ACLOCAL_AMFLAGS = -I m4
EXTRA_DIST = INSTALL.Linux \
gpl-fr.txt license.txt greetings.txt history.txt \
httrack-doc.html lang.def README.md
httrack-doc.html lang.def README.md tools/mkdeb.sh
# Build the signed Debian packages from a clean source export. Pass the signing
# key and other options through DEB_FLAGS, e.g.:
# make deb DEB_FLAGS="--key BB71C7E6CB1AD8FAF53FE42A60C3AA7180598EFB"
# See tools/mkdeb.sh --help for all options.
DEB_FLAGS =
deb:
$(SHELL) $(top_srcdir)/tools/mkdeb.sh $(DEB_FLAGS)
.PHONY: deb

View File

@@ -160,7 +160,7 @@ am__define_uniq_tagged_files = \
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in AUTHORS \
COPYING ChangeLog INSTALL NEWS README compile config.guess \
config.sub depcomp install-sh ltmain.sh missing
config.sub install-sh ltmain.sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
@@ -349,8 +349,14 @@ SUBDIRS = src man m4 libtest templates lang html tests
ACLOCAL_AMFLAGS = -I m4
EXTRA_DIST = INSTALL.Linux \
gpl-fr.txt license.txt greetings.txt history.txt \
httrack-doc.html lang.def README.md
httrack-doc.html lang.def README.md tools/mkdeb.sh
# Build the signed Debian packages from a clean source export. Pass the signing
# key and other options through DEB_FLAGS, e.g.:
# make deb DEB_FLAGS="--key BB71C7E6CB1AD8FAF53FE42A60C3AA7180598EFB"
# See tools/mkdeb.sh --help for all options.
DEB_FLAGS =
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
@@ -841,6 +847,9 @@ uninstall-am:
.PRECIOUS: Makefile
deb:
$(SHELL) $(top_srcdir)/tools/mkdeb.sh $(DEB_FLAGS)
.PHONY: deb
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.

8
debian/changelog vendored
View File

@@ -1,3 +1,9 @@
httrack (3.49.7-2) unstable; urgency=medium
* Bump Standards-Version to 4.7.0 (no changes needed).
-- Xavier Roche <xavier@debian.org> Sun, 07 Jun 2026 13:05:53 +0200
httrack (3.49.7-1) unstable; urgency=medium
* New upstream release.
@@ -11,7 +17,7 @@ httrack (3.49.6-1) unstable; urgency=medium
* Updated to 3.49.6 (3.49-6)
Fixed CVE-2017-14062
-- Xavier Roche <roche@proliant.localnet> Tue, 11 Mar 2025 19:43:39 +0100
-- Xavier Roche <xavier@debian.org> Tue, 11 Mar 2025 19:43:39 +0100
httrack (3.49.5-1) unstable; urgency=medium

2
debian/control vendored
View File

@@ -2,7 +2,7 @@ Source: httrack
Section: web
Priority: optional
Maintainer: Xavier Roche <roche@httrack.com>
Standards-Version: 4.6.2
Standards-Version: 4.7.0
Build-Depends: debhelper (>= 12.0.0), dh-autoreconf, autotools-dev, autoconf, autoconf-archive, automake, libtool, zlib1g-dev, libssl-dev
Homepage: http://www.httrack.com
Vcs-Git: https://github.com/xroche/httrack.git

View File

@@ -2,4 +2,14 @@
# man_MANS = httrack.1
man_MANS = httrack.1 webhttrack.1 htsserver.1 proxytrack.1
EXTRA_DIST = $(man_MANS)
EXTRA_DIST = $(man_MANS) makeman.sh
# Regenerate httrack.1 from the "httrack --help" output and the top-level
# README. Run by hand after changing options or help text:
# make -C man regen-man
# The generated page is committed; this target only refreshes it. Honors
# SOURCE_DATE_EPOCH for a reproducible date.
regen-man: makeman.sh $(top_builddir)/src/httrack$(EXEEXT)
README='$(top_srcdir)/README' $(SHELL) $(srcdir)/makeman.sh \
'$(top_builddir)/src/httrack$(EXEEXT)' > $(srcdir)/httrack.1
.PHONY: regen-man

View File

@@ -297,7 +297,7 @@ top_srcdir = @top_srcdir@
# man_MANS = httrack.1
man_MANS = httrack.1 webhttrack.1 htsserver.1 proxytrack.1
EXTRA_DIST = $(man_MANS)
EXTRA_DIST = $(man_MANS) makeman.sh
all: all-am
.SUFFIXES:
@@ -541,6 +541,16 @@ uninstall-man: uninstall-man1
.PRECIOUS: Makefile
# Regenerate httrack.1 from the "httrack --help" output and the top-level
# README. Run by hand after changing options or help text:
# make -C man regen-man
# The generated page is committed; this target only refreshes it. Honors
# SOURCE_DATE_EPOCH for a reproducible date.
regen-man: makeman.sh $(top_builddir)/src/httrack$(EXEEXT)
README='$(top_srcdir)/README' $(SHELL) $(srcdir)/makeman.sh \
'$(top_builddir)/src/httrack$(EXEEXT)' > $(srcdir)/httrack.1
.PHONY: regen-man
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -1,218 +1,122 @@
.\" Process this file with
.\" groff -man -Tascii httrack.1
.\"
.TH httrack 1 "27 January 2024" "httrack website copier"
.\" This file is generated by man/makeman.sh; do not edit by hand.
.TH httrack 1 "07 June 2026" "httrack website copier"
.SH NAME
httrack \- offline browser : copy websites to a local directory
.SH SYNOPSIS
.B httrack [ url ]... [ \-filter ]... [ +filter ]... [
.B \-O, \-\-path
] [
.B \-w, \-\-mirror
] [
.B \-W, \-\-mirror\-wizard
] [
.B \-g, \-\-get\-files
] [
.B \-i, \-\-continue
] [
.B \-Y, \-\-mirrorlinks
] [
.B \-P, \-\-proxy
] [
.B \-%f, \-\-httpproxy\-ftp[=N]
] [
.B \-%b, \-\-bind
] [
.B \-rN, \-\-depth[=N]
] [
.B \-%eN, \-\-ext\-depth[=N]
] [
.B \-mN, \-\-max\-files[=N]
] [
.B \-MN, \-\-max\-size[=N]
] [
.B \-EN, \-\-max\-time[=N]
] [
.B \-AN, \-\-max\-rate[=N]
] [
.B \-%cN, \-\-connection\-per\-second[=N]
] [
.B \-GN, \-\-max\-pause[=N]
] [
.B \-cN, \-\-sockets[=N]
] [
.B \-TN, \-\-timeout[=N]
] [
.B \-RN, \-\-retries[=N]
] [
.B \-JN, \-\-min\-rate[=N]
] [
.B \-HN, \-\-host\-control[=N]
] [
.B \-%P, \-\-extended\-parsing[=N]
] [
.B \-n, \-\-near
] [
.B \-t, \-\-test
] [
.B \-%L, \-\-list
] [
.B \-%S, \-\-urllist
] [
.B \-NN, \-\-structure[=N]
] [
.B \-%D, \-\-cached\-delayed\-type\-check
] [
.B \-%M, \-\-mime\-html
] [
.B \-LN, \-\-long\-names[=N]
] [
.B \-KN, \-\-keep\-links[=N]
] [
.B \-x, \-\-replace\-external
] [
.B \-%x, \-\-disable\-passwords
] [
.B \-%q, \-\-include\-query\-string
] [
.B \-o, \-\-generate\-errors
] [
.B \-X, \-\-purge\-old[=N]
] [
.B \-%p, \-\-preserve
] [
.B \-%T, \-\-utf8\-conversion
] [
.B \-bN, \-\-cookies[=N]
] [
.B \-u, \-\-check\-type[=N]
] [
.B \-j, \-\-parse\-java[=N]
] [
.B \-sN, \-\-robots[=N]
] [
.B \-%h, \-\-http\-10
] [
.B \-%k, \-\-keep\-alive
] [
.B \-%B, \-\-tolerant
] [
.B \-%s, \-\-updatehack
] [
.B \-%u, \-\-urlhack
] [
.B \-%A, \-\-assume
] [
.B \-@iN, \-\-protocol[=N]
] [
.B \-%w, \-\-disable\-module
] [
.B \-F, \-\-user\-agent
] [
.B \-%R, \-\-referer
] [
.B \-%E, \-\-from
] [
.B \-%F, \-\-footer
] [
.B \-%l, \-\-language
] [
.B \-%a, \-\-accept
] [
.B \-%X, \-\-headers
] [
.B \-C, \-\-cache[=N]
] [
.B \-k, \-\-store\-all\-in\-cache
] [
.B \-%n, \-\-do\-not\-recatch
] [
.B \-%v, \-\-display
] [
.B \-Q, \-\-do\-not\-log
] [
.B \-q, \-\-quiet
] [
.B \-z, \-\-extra\-log
] [
.B \-Z, \-\-debug\-log
] [
.B \-v, \-\-verbose
] [
.B \-f, \-\-file\-log
] [
.B \-f2, \-\-single\-log
] [
.B \-I, \-\-index
] [
.B \-%i, \-\-build\-top\-index
] [
.B \-%I, \-\-search\-index
] [
.B \-pN, \-\-priority[=N]
] [
.B \-S, \-\-stay\-on\-same\-dir
] [
.B \-D, \-\-can\-go\-down
] [
.B \-U, \-\-can\-go\-up
] [
.B \-B, \-\-can\-go\-up\-and\-down
] [
.B \-a, \-\-stay\-on\-same\-address
] [
.B \-d, \-\-stay\-on\-same\-domain
] [
.B \-l, \-\-stay\-on\-same\-tld
] [
.B \-e, \-\-go\-everywhere
] [
.B \-%H, \-\-debug\-headers
] [
.B \-%!, \-\-disable\-security\-limits
] [
.B \-V, \-\-userdef\-cmd
] [
.B \-%W, \-\-callback
] [
.B \-K, \-\-keep\-links[=N]
] [
.B
.B httrack [ url ]... [ \-filter ]... [ +filter ]...
[ \fB\-O, \-\-path\fR ]
[ \fB\-w, \-\-mirror\fR ]
[ \fB\-W, \-\-mirror\-wizard\fR ]
[ \fB\-g, \-\-get\-files\fR ]
[ \fB\-i, \-\-continue\fR ]
[ \fB\-Y, \-\-mirrorlinks\fR ]
[ \fB\-P, \-\-proxy\fR ]
[ \fB\-%f, \-\-httpproxy\-ftp[=N]\fR ]
[ \fB\-%b, \-\-bind\fR ]
[ \fB\-rN, \-\-depth[=N]\fR ]
[ \fB\-%eN, \-\-ext\-depth[=N]\fR ]
[ \fB\-mN, \-\-max\-files[=N]\fR ]
[ \fB\-MN, \-\-max\-size[=N]\fR ]
[ \fB\-EN, \-\-max\-time[=N]\fR ]
[ \fB\-AN, \-\-max\-rate[=N]\fR ]
[ \fB\-%cN, \-\-connection\-per\-second[=N]\fR ]
[ \fB\-GN, \-\-max\-pause[=N]\fR ]
[ \fB\-cN, \-\-sockets[=N]\fR ]
[ \fB\-TN, \-\-timeout[=N]\fR ]
[ \fB\-RN, \-\-retries[=N]\fR ]
[ \fB\-JN, \-\-min\-rate[=N]\fR ]
[ \fB\-HN, \-\-host\-control[=N]\fR ]
[ \fB\-%P, \-\-extended\-parsing[=N]\fR ]
[ \fB\-n, \-\-near\fR ]
[ \fB\-t, \-\-test\fR ]
[ \fB\-%L, \-\-list\fR ]
[ \fB\-%S, \-\-urllist\fR ]
[ \fB\-NN, \-\-structure[=N]\fR ]
[ \fB\-%D, \-\-cached\-delayed\-type\-check\fR ]
[ \fB\-%M, \-\-mime\-html\fR ]
[ \fB\-LN, \-\-long\-names[=N]\fR ]
[ \fB\-KN, \-\-keep\-links[=N]\fR ]
[ \fB\-x, \-\-replace\-external\fR ]
[ \fB\-%x, \-\-disable\-passwords\fR ]
[ \fB\-%q, \-\-include\-query\-string\fR ]
[ \fB\-o, \-\-generate\-errors\fR ]
[ \fB\-X, \-\-purge\-old[=N]\fR ]
[ \fB\-%p, \-\-preserve\fR ]
[ \fB\-%T, \-\-utf8\-conversion\fR ]
[ \fB\-bN, \-\-cookies[=N]\fR ]
[ \fB\-u, \-\-check\-type[=N]\fR ]
[ \fB\-j, \-\-parse\-java[=N]\fR ]
[ \fB\-sN, \-\-robots[=N]\fR ]
[ \fB\-%h, \-\-http\-10\fR ]
[ \fB\-%k, \-\-keep\-alive\fR ]
[ \fB\-%B, \-\-tolerant\fR ]
[ \fB\-%s, \-\-updatehack\fR ]
[ \fB\-%u, \-\-urlhack\fR ]
[ \fB\-%A, \-\-assume\fR ]
[ \fB\-@iN, \-\-protocol[=N]\fR ]
[ \fB\-%w, \-\-disable\-module\fR ]
[ \fB\-F, \-\-user\-agent\fR ]
[ \fB\-%R, \-\-referer\fR ]
[ \fB\-%E, \-\-from\fR ]
[ \fB\-%F, \-\-footer\fR ]
[ \fB\-%l, \-\-language\fR ]
[ \fB\-%a, \-\-accept\fR ]
[ \fB\-%X, \-\-headers\fR ]
[ \fB\-C, \-\-cache[=N]\fR ]
[ \fB\-k, \-\-store\-all\-in\-cache\fR ]
[ \fB\-%n, \-\-do\-not\-recatch\fR ]
[ \fB\-%v, \-\-display\fR ]
[ \fB\-Q, \-\-do\-not\-log\fR ]
[ \fB\-q, \-\-quiet\fR ]
[ \fB\-z, \-\-extra\-log\fR ]
[ \fB\-Z, \-\-debug\-log\fR ]
[ \fB\-v, \-\-verbose\fR ]
[ \fB\-f, \-\-file\-log\fR ]
[ \fB\-f2, \-\-single\-log\fR ]
[ \fB\-I, \-\-index\fR ]
[ \fB\-%i, \-\-build\-top\-index\fR ]
[ \fB\-%I, \-\-search\-index\fR ]
[ \fB\-pN, \-\-priority[=N]\fR ]
[ \fB\-S, \-\-stay\-on\-same\-dir\fR ]
[ \fB\-D, \-\-can\-go\-down\fR ]
[ \fB\-U, \-\-can\-go\-up\fR ]
[ \fB\-B, \-\-can\-go\-up\-and\-down\fR ]
[ \fB\-a, \-\-stay\-on\-same\-address\fR ]
[ \fB\-d, \-\-stay\-on\-same\-domain\fR ]
[ \fB\-l, \-\-stay\-on\-same\-tld\fR ]
[ \fB\-e, \-\-go\-everywhere\fR ]
[ \fB\-%H, \-\-debug\-headers\fR ]
[ \fB\-%!, \-\-disable\-security\-limits\fR ]
[ \fB\-V, \-\-userdef\-cmd\fR ]
[ \fB\-%W, \-\-callback\fR ]
[ \fB\-K, \-\-keep\-links[=N]\fR ]
.SH DESCRIPTION
.B httrack
allows you to download a World Wide Web site from the Internet to a local directory, building recursively all directories, getting HTML, images, and other files from the server to your computer. HTTrack arranges the original site's relative link-structure. Simply open a page of the "mirrored" website in your browser, and you can browse the site from link to link, as if you were viewing it online. HTTrack can also update an existing mirrored site, and resume interrupted downloads.
.SH EXAMPLES
.TP
.B httrack www.someweb.com/bob/
mirror site www.someweb.com/bob/ and only this site
mirror site www.someweb.com/bob/ and only this site
.TP
.B httrack www.someweb.com/bob/ www.anothertest.com/mike/ +*.com/*.jpg \-mime:application/*
mirror the two sites together (with shared links) and accept any .jpg files on .com sites
mirror the two sites together (with shared links) and accept any .jpg files on .com sites
.TP
.B httrack www.someweb.com/bob/bobby.html +* \-r6
means get all files starting from bobby.html, with 6 link\-depth, and possibility of going everywhere on the web
.TP
.B httrack www.someweb.com/bob/bobby.html \-\-spider \-P proxy.myhost.com:8080
runs the spider on www.someweb.com/bob/bobby.html using a proxy
.TP
.B httrack \-\-update
updates a mirror in the current folder
.TP
.B httrack
will bring you to the interactive mode
.TP
.B httrack \-\-continue
continues a mirror in the current folder
.SH OPTIONS
.SS General options:
.IP \-O
path for mirror/logfiles+cache (\-O path
mirror[,path
cache
and
logfiles]) (\-\-path <param>)
path for mirror/logfiles+cache (\-O path_mirror[,path_cache_and_logfiles]) (\-\-path <param>)
.SS Action options:
.IP \-w
*mirror web sites (\-\-mirror)
@@ -224,15 +128,13 @@ just get files (saved in the current directory) (\-\-get\-files)
continue an interrupted mirror using the cache (\-\-continue)
.IP \-Y
mirror ALL links located in the first level pages (mirror links) (\-\-mirrorlinks)
.SS Proxy options:
.IP \-P
proxy use (\-P proxy:port or \-P user:pass@proxy:port) (\-\-proxy <param>)
.IP \-%f
*use proxy for ftp (f0 don t use) (\-\-httpproxy\-ftp[=N])
*use proxy for ftp (f0 don't use) (\-\-httpproxy\-ftp[=N])
.IP \-%b
use this local hostname to make/send requests (\-%b hostname) (\-\-bind <param>)
.SS Limits options:
.IP \-rN
set the mirror depth to N (* r9999) (\-\-depth[=N])
@@ -252,7 +154,6 @@ maximum transfer rate in bytes/seconds (1000=1KB/s max) (\-\-max\-rate[=N])
maximum number of connections/seconds (*%c10) (\-\-connection\-per\-second[=N])
.IP \-GN
pause transfer if N bytes reached, and wait until lock file is deleted (\-\-max\-pause[=N])
.SS Flow control:
.IP \-cN
number of multiple connections (*c8) (\-\-sockets[=N])
@@ -264,28 +165,26 @@ number of retries, in case of timeout or non\-fatal errors (*R1) (\-\-retries[=N
traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link (\-\-min\-rate[=N])
.IP \-HN
host is abandoned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow (\-\-host\-control[=N])
.SS Links options:
.IP \-%P
*extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don t use) (\-\-extended\-parsing[=N])
*extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use) (\-\-extended\-parsing[=N])
.IP \-n
get non\-html files near an html file (ex: an image located outside) (\-\-near)
get non\-html files 'near' an html file (ex: an image located outside) (\-\-near)
.IP \-t
test all URLs (even forbidden ones) (\-\-test)
.IP \-%L
<file> add all URL located in this text file (one URL per line) (\-\-list <param>)
.IP \-%S
<file> add all scan rules located in this text file (one scan rule per line) (\-\-urllist <param>)
.SS Build options:
.IP \-NN
structure type (0 *original structure, 1+: see below) (\-\-structure[=N])
.IP \-or
user defined structure (\-N "%h%p/%n%q.%t")
.br
or user defined structure (\-N "%h%p/%n%q.%t")
.IP \-%N
delayed type check, don t make any link test but wait for files download to start instead (experimental) (%N0 don t use, %N1 use for unknown extensions, * %N2 always use)
delayed type check, don't make any link test but wait for files download to start instead (experimental) (%N0 don't use, %N1 use for unknown extensions, * %N2 always use)
.IP \-%D
cached delayed type check, don t wait for remote type during updates, to speedup them (%D0 wait, * %D1 don t wait) (\-\-cached\-delayed\-type\-check)
cached delayed type check, don't wait for remote type during updates, to speedup them (%D0 wait, * %D1 don't wait) (\-\-cached\-delayed\-type\-check)
.IP \-%M
generate a RFC MIME\-encapsulated full\-archive (.mht) (\-\-mime\-html)
.IP \-LN
@@ -297,29 +196,28 @@ replace external html links by error pages (\-\-replace\-external)
.IP \-%x
do not include any password for external password protected websites (%x0 include) (\-\-disable\-passwords)
.IP \-%q
*include query string for local files (useless, for information purpose only) (%q0 don t include) (\-\-include\-query\-string)
*include query string for local files (useless, for information purpose only) (%q0 don't include) (\-\-include\-query\-string)
.IP \-o
*generate output html file in case of error (404..) (o0 don t generate) (\-\-generate\-errors)
*generate output html file in case of error (404..) (o0 don't generate) (\-\-generate\-errors)
.IP \-X
*purge old files after update (X0 keep delete) (\-\-purge\-old[=N])
.IP \-%p
preserve html files as is (identical to \-K4 \-%F "" ) (\-\-preserve)
preserve html files 'as is' (identical to '\-K4 \-%F ""') (\-\-preserve)
.IP \-%T
links conversion to UTF\-8 (\-\-utf8\-conversion)
.SS Spider options:
.IP \-bN
accept cookies in cookies.txt (0=do not accept,* 1=accept) (\-\-cookies[=N])
.IP \-u
check document type if unknown (cgi,asp..) (u0 don t check, * u1 check but /, u2 check always) (\-\-check\-type[=N])
check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always) (\-\-check\-type[=N])
.IP \-j
*parse Java Classes (j0 don t parse, bitmask: |1 parse default, |2 don t parse .class |4 don t parse .js |8 don t be aggressive) (\-\-parse\-java[=N])
*parse Java Classes (j0 don't parse, bitmask: |1 parse default, |2 don't parse .class |4 don't parse .js |8 don't be aggressive) (\-\-parse\-java[=N])
.IP \-sN
follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules)) (\-\-robots[=N])
.IP \-%h
force HTTP/1.0 requests (reduce update features, only for old servers or proxies) (\-\-http\-10)
.IP \-%k
use keep\-alive if possible, greately reducing latency for small files and test requests (%k0 don t use) (\-\-keep\-alive)
use keep\-alive if possible, greately reducing latency for small files and test requests (%k0 don't use) (\-\-keep\-alive)
.IP \-%B
tolerant requests (accept bogus responses on some servers, but not standard!) (\-\-tolerant)
.IP \-%s
@@ -328,13 +226,14 @@ update hacks: various hacks to limit re\-transfers when updating (identical size
url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..) (\-\-urlhack)
.IP \-%A
assume that a type (cgi,asp..) is always linked with a mime type (\-%A php3,cgi=text/html;dat,bin=application/x\-zip) (\-\-assume <param>)
.IP \-can
also be used to force a specific file type: \-\-assume foo.cgi=text/html
.br
shortcut: '\-\-assume standard' is equivalent to \-%A php2 php3 php4 php cgi asp jsp pl cfm nsf=text/html
.br
can also be used to force a specific file type: \-\-assume foo.cgi=text/html
.IP \-@iN
internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only) (\-\-protocol[=N])
.IP \-%w
disable a specific external mime module (\-%w htsswf \-%w htsjava) (\-\-disable\-module <param>)
.SS Browser ID:
.IP \-F
user\-agent field sent in HTTP headers (\-F "user\-agent name") (\-\-user\-agent <param>)
@@ -350,7 +249,6 @@ preffered language (\-%l "fr, en, jp, *" (\-\-language <param>)
accepted formats (\-%a "text/html,image/png;q=0.9,*/*;q=0.1" (\-\-accept <param>)
.IP \-%X
additional HTTP header line (\-%X "X\-Magic: 42" (\-\-headers <param>)
.SS Log, index, cache
.IP \-C
create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before) (\-\-cache[=N])
@@ -375,25 +273,24 @@ log on screen (\-\-verbose)
.IP \-f2
one single log file (\-\-single\-log)
.IP \-I
*make an index (I0 don t make) (\-\-index)
*make an index (I0 don't make) (\-\-index)
.IP \-%i
make a top index for a project folder (* %i0 don t make) (\-\-build\-top\-index)
make a top index for a project folder (* %i0 don't make) (\-\-build\-top\-index)
.IP \-%I
make an searchable index for this mirror (* %I0 don t make) (\-\-search\-index)
make an searchable index for this mirror (* %I0 don't make) (\-\-search\-index)
.SS Expert options:
.IP \-pN
priority mode: (* p3) (\-\-priority[=N])
.IP \-p0
just scan, don t save anything (for checking links)
.IP \-p1
save only html files
.IP \-p2
save only non html files
.IP \-*p3
save all files
.IP \-p7
get html files before, then treat other files
.br
p0 just scan, don't save anything (for checking links)
.br
p1 save only html files
.br
p2 save only non html files
.br
*p3 save all files
.br
p7 get html files before, then treat other files
.IP \-S
stay on the same directory (\-\-stay\-on\-same\-dir)
.IP \-D
@@ -412,18 +309,17 @@ stay on the same TLD (eg: .com) (\-\-stay\-on\-same\-tld)
go everywhere on the web (\-\-go\-everywhere)
.IP \-%H
debug HTTP headers in logfile (\-\-debug\-headers)
.SS Guru options: (do NOT use if possible)
.IP \-#X
*use optimized engine (limited memory boundary checks) (\-\-fast\-engine)
.IP \-#0
filter test (\-#0 *.gif www.bar.com/foo.gif ) (\-\-debug\-testfilters <param>)
filter test (\-#0 '*.gif' 'www.bar.com/foo.gif') (\-\-debug\-testfilters <param>)
.IP \-#1
simplify test (\-#1 ./foo/bar/../foobar)
.IP \-#2
type test (\-#2 /foo/bar.php)
.IP \-#C
cache list (\-#C *.com/spider*.gif (\-\-debug\-cache <param>)
cache list (\-#C '*.com/spider*.gif' (\-\-debug\-cache <param>)
.IP \-#R
cache repair (damaged cache) (\-\-repair\-cache)
.IP \-#d
@@ -452,7 +348,6 @@ generate transfer ops. log every minutes (\-\-debug\-xfrstats)
wait time (\-\-advanced\-wait)
.IP \-#Z
generate transfer rate statistics every minutes (\-\-debug\-ratestats)
.SS Dangerous options: (do NOT use unless you exactly know what you are doing)
.IP \-%!
bypass built\-in security limits aimed to avoid bandwidth abuses (bandwidth, simultaneous connections) (\-\-disable\-security\-limits)
@@ -460,13 +355,11 @@ bypass built\-in security limits aimed to avoid bandwidth abuses (bandwidth, sim
IMPORTANT NOTE: DANGEROUS OPTION, ONLY SUITABLE FOR EXPERTS
.br
USE IT WITH EXTREME CARE
.SS Command\-line specific options:
.IP \-V
execute system command after each files ($0 is the filename: \-V "rm \\$0") (\-\-userdef\-cmd <param>)
.IP \-%W
use an external library function as a wrapper (\-%W myfoo.so[,myparameters]) (\-\-callback <param>)
.SS Details: Option N
.IP \-N0
Site\-structure (default)
@@ -485,17 +378,17 @@ All files in web/, with random names (gadget !)
.IP \-N100
Site\-structure, without www.domain.xxx/
.IP \-N101
Identical to N1 except that "web" is replaced by the site s name
Identical to N1 except that "web" is replaced by the site's name
.IP \-N102
Identical to N2 except that "web" is replaced by the site s name
Identical to N2 except that "web" is replaced by the site's name
.IP \-N103
Identical to N3 except that "web" is replaced by the site s name
Identical to N3 except that "web" is replaced by the site's name
.IP \-N104
Identical to N4 except that "web" is replaced by the site s name
Identical to N4 except that "web" is replaced by the site's name
.IP \-N105
Identical to N5 except that "web" is replaced by the site s name
Identical to N5 except that "web" is replaced by the site's name
.IP \-N199
Identical to N99 except that "web" is replaced by the site s name
Identical to N99 except that "web" is replaced by the site's name
.IP \-N1001
Identical to N1 except that there is no "web" directory
.IP \-N1002
@@ -509,34 +402,47 @@ Identical to N5 except that there is no "web" directory
.IP \-N1099
Identical to N99 except that there is no "web" directory
.SS Details: User\-defined option N
%n Name of file without file type (ex: image)
%N Name of file, including file type (ex: image.gif)
%t File type (ex: gif)
%p Path [without ending /] (ex: /someimages)
%h Host name (ex: www.someweb.com)
%M URL MD5 (128 bits, 32 ascii bytes)
%Q query string MD5 (128 bits, 32 ascii bytes)
%k full query string
%r protocol name (ex: http)
%q small query string MD5 (16 bits, 4 ascii bytes)
%s? Short name version (ex: %sN)
%[param] param variable in query string
%[param:before:after:empty:notfound] advanced variable extraction
.IP \-%n
Name of file without file type (ex: image)
.IP \-%N
Name of file, including file type (ex: image.gif)
.IP \-%t
File type (ex: gif)
.IP \-%p
Path [without ending /] (ex: /someimages)
.IP \-%h
Host name (ex: www.someweb.com)
.IP \-%M
URL MD5 (128 bits, 32 ascii bytes)
.IP \-%Q
query string MD5 (128 bits, 32 ascii bytes)
.IP \-%k
full query string
.IP \-%r
protocol name (ex: http)
.IP \-%q
small query string MD5 (16 bits, 4 ascii bytes)
.br
\&'%s?' Short name version (ex: %sN)
.IP \-%[param]
param variable in query string
.IP \-%[param:before:after:empty:notfound]
advanced variable extraction
.SS Details: User\-defined option N and advanced variable extraction
%[param:before:after:empty:notfound]
.IP \-param
: parameter name
.IP \-before
: string to prepend if the parameter was found
.IP \-after
: string to append if the parameter was found
.IP \-notfound
: string replacement if the parameter could not be found
.IP \-empty
: string replacement if the parameter was empty
.IP \-all
fields, except the first one (the parameter name), can be empty
.br
%[param:before:after:empty:notfound]
.br
param : parameter name
.br
before : string to prepend if the parameter was found
.br
after : string to append if the parameter was found
.br
notfound : string replacement if the parameter could not be found
.br
empty : string replacement if the parameter was empty
.br
all fields, except the first one (the parameter name), can be empty
.SS Details: Option K
.IP \-K0
foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI, default)
@@ -548,37 +454,33 @@ foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI, default)
\-> foo.cgi?q=45 (original URL)
.IP \-K5
\-> http://www.foobar.com/folder/foo4B54.html?q=45 (transparent proxy URL)
.SS Shortcuts:
.IP \-\-mirror
<URLs> *make a mirror of site(s) (default)
<URLs> *make a mirror of site(s) (default)
.IP \-\-get
<URLs> get the files indicated, do not seek other URLs (\-qg)
<URLs> get the files indicated, do not seek other URLs (\-qg)
.IP \-\-list
<text file> add all URL located in this text file (\-%L)
<text file> add all URL located in this text file (\-%L)
.IP \-\-mirrorlinks
<URLs> mirror all links in 1st level pages (\-Y)
.IP \-\-testlinks
<URLs> test links in pages (\-r1p0C0I0t)
<URLs> test links in pages (\-r1p0C0I0t)
.IP \-\-spider
<URLs> spider site(s), to test links: reports Errors & Warnings (\-p0C0I0t)
<URLs> spider site(s), to test links: reports Errors & Warnings (\-p0C0I0t)
.IP \-\-testsite
<URLs> identical to \-\-spider
<URLs> identical to \-\-spider
.IP \-\-skeleton
<URLs> make a mirror, but gets only html files (\-p1)
<URLs> make a mirror, but gets only html files (\-p1)
.IP \-\-update
update a mirror, without confirmation (\-iC2)
update a mirror, without confirmation (\-iC2)
.IP \-\-continue
continue a mirror, without confirmation (\-iC1)
continue a mirror, without confirmation (\-iC1)
.IP \-\-catchurl
create a temporary proxy to capture an URL or a form post URL
create a temporary proxy to capture an URL or a form post URL
.IP \-\-clean
erase cache & log files
erase cache & log files
.IP \-\-http10
force http/1.0 requests (\-%h)
force http/1.0 requests (\-%h)
.SS Details: Option %W: External callbacks prototypes
.SS see htsdefines.h
.SH FILES
@@ -588,29 +490,28 @@ The system wide configuration file.
.SH ENVIRONMENT
.IP HOME
Is being used if you defined in /etc/httrack.conf the line
.I path ~/websites/#
.I path ~/websites/#
.SH DIAGNOSTICS
Errors/Warnings are reported to
Errors/Warnings are reported to
.I hts\-log.txt
by default, or to stderr if the
.I -v
.I \-v
option was specified.
.SH LIMITS
These are the principals limits of HTTrack for that moment. Note that we did not heard about any other utility
that would have solved them.
.SM - Several scripts generating complex filenames may not find them (ex: img.src='image'+a+Mobj.dst+'.gif')
.SM - Some java classes may not find some files on them (class included)
.SM - Cgi-bin links may not work properly in some cases (parameters needed). To avoid them: use filters like -*cgi-bin*
.SM
\- Several scripts generating complex filenames may not find them (ex: img.src='image'+a+Mobj.dst+'.gif')
.SM
\- Some java classes may not find some files on them (class included)
.SM
\- Cgi\-bin links may not work properly in some cases (parameters needed). To avoid them: use filters like \-*cgi\-bin*
.SH BUGS
Please reports bugs to
.B <bugs@httrack.com>.
Include a complete, self-contained example that will allow the bug to be reproduced, and say which version of httrack you are using. Do not forget to detail options used, OS version, and any other information you deem necessary.
.SH COPYRIGHT
Copyright (C) 1998-2024 Xavier Roche and other contributors
Copyright (C) 1998-2026 Xavier Roche and other contributors
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -631,8 +532,8 @@ The most recent released version of httrack can be found at:
.SH AUTHOR
Xavier Roche <roche@httrack.com>
.SH "SEE ALSO"
The
.B HTML
The
.B HTML
documentation (available online at
.B http://www.httrack.com/html/
) contains more detailed information. Please also refer to the

189
man/makeman.sh Executable file
View File

@@ -0,0 +1,189 @@
#!/bin/sh
#
# Regenerate man/httrack.1 from "httrack --help" and the top-level README.
#
# Usage:
# man/makeman.sh [HTTRACK_BINARY] > man/httrack.1
#
# HTTRACK_BINARY defaults to "httrack" (looked up in $PATH). Set SOURCE_DATE_EPOCH
# for a reproducible page date.
#
# The OPTIONS section is derived from --help by indentation, which is what makes
# it robust (no more prose turning into bogus options, see Debian #1061053):
# column 0 starting with "--" -> long option (.IP)
# column 0 otherwise -> section header (.SS)
# 1-2 leading spaces -> option (.IP)
# 3+ leading spaces -> continuation / sub-value (description text)
#
# This replaces the previous out-of-tree script that grepped the first token of
# every indented line and mislabelled continuations as options.
set -eu
httrack=${1:-httrack}
script_dir=$(CDPATH='' cd -- "$(dirname -- "$0")" && pwd)
topdir=${TOPDIR:-$(CDPATH='' cd -- "$script_dir/.." && pwd)}
readme=${README:-$topdir/README}
# Reproducible date when SOURCE_DATE_EPOCH is set, otherwise today.
if [ -n "${SOURCE_DATE_EPOCH:-}" ]; then
date_str=$(LC_ALL=C date -u -d "@${SOURCE_DATE_EPOCH}" '+%d %B %Y' 2>/dev/null ||
LC_ALL=C date -u -r "${SOURCE_DATE_EPOCH}" '+%d %B %Y')
else
date_str=$(LC_ALL=C date '+%d %B %Y')
fi
year=${date_str##* }
help=$("$httrack" --quiet --help 2>/dev/null)
st=$(printf '%s\n' "$help" | grep -n 'General options' | head -1 | cut -d: -f1)
en=$(printf '%s\n' "$help" | grep -nE '^example' | head -1 | cut -d: -f1)
en2=$(printf '%s\n' "$help" | grep -nE '^HTTrack version' | tail -1 | cut -d: -f1)
# SYNOPSIS: one "[ -x, --long ]" per option carrying a long name (skip "#" guru
# options, as the original did).
synopsis=$(printf '%s\n' "$help" | awk '
$0 ~ /\(--/ && $0 !~ / #/ {
short = $1
if (match($0, /\(--[^ )]+/)) {
lng = substr($0, RSTART + 3, RLENGTH - 3)
gsub(/-/, "\\-", short); gsub(/-/, "\\-", lng)
printf "[ \\fB\\-%s, \\-\\-%s\\fR ]\n", short, lng
}
}')
# OPTIONS: indentation-driven classifier (see header comment).
options=$(printf '%s\n' "$help" | sed -n "${st},$((en - 2))p" | awk '
function esc(s) {
gsub(/\\/, "\\\\", s)
gsub(/-/, "\\-", s)
return s
}
function emit(s) { # body text: escape + guard ./%apostrophe leaders
s = esc(s)
if (substr(s, 1, 1) == "." || substr(s, 1, 1) == "\x27") s = "\\&" s
print s
}
/^[ \t]*$/ { next }
{
match($0, /^ */); ind = RLENGTH
if (ind == 0 && substr($0, 1, 2) == "--") { # long option
opt = $1
rest = $0; sub(/^[^ \t]+[ \t]+/, "", rest)
printf ".IP %s\n", esc(opt)
emit(rest)
} else if (ind == 0) { # section header
printf ".SS %s\n", esc($0)
} else if (ind <= 2) { # option
opt = $1
gsub(/^\x27|\x27$/, "", opt) # drop quotes around tokens like %t
rest = $0; sub(/^[ \t]+[^ \t]+[ \t]*/, "", rest)
printf ".IP \\-%s\n", esc(opt)
if (rest != "") emit(rest)
} else { # continuation / sub-value
line = $0; sub(/^[ \t]+/, "", line)
print ".br"
emit(line)
}
}')
# EXAMPLES: "example: <cmd>" / "means: <text>" pairs after the options block.
examples=$(printf '%s\n' "$help" | sed -n "${en},$((en2 - 1))p" | awk '
function esc(s) { gsub(/\\/, "\\\\", s); gsub(/-/, "\\-", s); return s }
/^example:/ { sub(/^example:[ \t]*/, ""); printf ".TP\n.B %s\n", esc($0); next }
/^means:/ { sub(/^means:[ \t]*/, ""); if ($0 != "") print esc($0); next }
')
# LIMITS: the "Engine limits" block from the README.
limits=$(awk '
function esc(s) { gsub(/\\/, "\\\\", s); gsub(/-/, "\\-", s); return s }
/^Engine limits/ { grab = 1; next }
/^Advanced options/ { grab = 0 }
grab {
if ($0 ~ /^-/) { print ".SM"; print esc($0) }
else if ($0 !~ /^[ \t]*$/) print esc($0)
}' "$readme")
# --- assemble the page: static prose in quoted heredocs, dynamic parts printf'd ---
cat <<'EOF'
.\" Process this file with
.\" groff -man -Tascii httrack.1
.\"
.\" This file is generated by man/makeman.sh; do not edit by hand.
EOF
printf '.TH httrack 1 "%s" "httrack website copier"\n' "$date_str"
cat <<'EOF'
.SH NAME
httrack \- offline browser : copy websites to a local directory
.SH SYNOPSIS
.B httrack [ url ]... [ \-filter ]... [ +filter ]...
EOF
printf '%s\n' "$synopsis"
cat <<'EOF'
.SH DESCRIPTION
.B httrack
allows you to download a World Wide Web site from the Internet to a local directory, building recursively all directories, getting HTML, images, and other files from the server to your computer. HTTrack arranges the original site's relative link-structure. Simply open a page of the "mirrored" website in your browser, and you can browse the site from link to link, as if you were viewing it online. HTTrack can also update an existing mirrored site, and resume interrupted downloads.
.SH EXAMPLES
EOF
printf '%s\n' "$examples"
cat <<'EOF'
.SH OPTIONS
EOF
printf '%s\n' "$options"
cat <<'EOF'
.SH FILES
.I /etc/httrack.conf
.RS
The system wide configuration file.
.SH ENVIRONMENT
.IP HOME
Is being used if you defined in /etc/httrack.conf the line
.I path ~/websites/#
.SH DIAGNOSTICS
Errors/Warnings are reported to
.I hts\-log.txt
by default, or to stderr if the
.I \-v
option was specified.
.SH LIMITS
EOF
printf '%s\n' "$limits"
cat <<'EOF'
.SH BUGS
Please reports bugs to
.B <bugs@httrack.com>.
Include a complete, self-contained example that will allow the bug to be reproduced, and say which version of httrack you are using. Do not forget to detail options used, OS version, and any other information you deem necessary.
.SH COPYRIGHT
EOF
printf 'Copyright (C) 1998-%s Xavier Roche and other contributors\n' "$year"
cat <<'EOF'
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
.SH AVAILABILITY
The most recent released version of httrack can be found at:
.B http://www.httrack.com
.SH AUTHOR
Xavier Roche <roche@httrack.com>
.SH "SEE ALSO"
The
.B HTML
documentation (available online at
.B http://www.httrack.com/html/
) contains more detailed information. Please also refer to the
.B httrack FAQ
(available online at
.B http://www.httrack.com/html/faq.html
)
EOF

39
tests/02_manpage-regen.test Executable file
View File

@@ -0,0 +1,39 @@
#!/bin/bash
#
# The committed man/httrack.1 must match what man/makeman.sh produces from the
# current "httrack --help" output. This catches a --help change that was not
# followed by "make -C man regen-man".
: "${top_srcdir:=..}"
gen="$top_srcdir/man/makeman.sh"
committed="$top_srcdir/man/httrack.1"
# Need the generator and a runnable httrack.
test -f "$gen" || {
echo "makeman.sh not found; skipping" >&2
exit 77
}
command -v httrack >/dev/null 2>&1 || {
echo "httrack not in PATH; skipping" >&2
exit 77
}
tmp=$(mktemp) || exit 1
trap 'rm -f "$tmp"' EXIT
README="$top_srcdir/README" bash "$gen" httrack >"$tmp" 2>/dev/null || {
echo "makeman.sh failed" >&2
exit 1
}
# Ignore the two intentionally date-dependent lines (page date, copyright year).
strip_volatile() { grep -vE '^\.TH httrack |^Copyright \(C\) 1998-'; }
if diff <(strip_volatile <"$committed") <(strip_volatile <"$tmp") >/dev/null; then
exit 0
fi
echo "man/httrack.1 is out of date. Regenerate with: make -C man regen-man" >&2
diff <(strip_volatile <"$committed") <(strip_volatile <"$tmp") | head -40 >&2
exit 1

View File

@@ -6,8 +6,9 @@ TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
TESTS_ENVIRONMENT += ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS)
TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
TEST_EXTENSIONS = .test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
CLEANFILES = check-network_sh.cache

View File

@@ -470,9 +470,9 @@ EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS) \
HTTPS_SUPPORT=$(HTTPS_SUPPORT)
HTTPS_SUPPORT=$(HTTPS_SUPPORT) top_srcdir=$(top_srcdir)
TEST_EXTENSIONS = .test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
CLEANFILES = check-network_sh.cache
all: all-am

195
tools/mkdeb.sh Executable file
View File

@@ -0,0 +1,195 @@
#!/usr/bin/env bash
#
# Build the httrack Debian packages from a clean, committed source export.
#
# It exports HEAD (plus the coucal submodule) into a scratch directory, refreshes
# the build system and man page, builds the upstream tarball, overlays debian/,
# and runs debuild (which builds, runs lintian, and signs). Nothing is built in
# the working tree, and no hard-coded paths are used.
#
# Output (in --outdir, default <repo>/dist):
# httrack_<ver>.orig.tar.gz upstream tarball (Debian orig name)
# httrack_<ver>-*.dsc / .debian.tar.* source package
# *.deb binary packages
# *.changes / *.buildinfo build metadata
# httrack_<ver>.orig.tar.gz.{asc,md5,sha1} release artifacts (unless disabled)
#
# Usage:
# tools/mkdeb.sh [options]
#
# Options:
# -k, --key KEYID GPG key for signing (default: $DEBSIGN_KEYID)
# -o, --outdir DIR output directory (default: <repo>/dist)
# -s, --source-only build only the source package
# -u, --unsigned do not sign anything (implies no release sigs)
# --no-release-artifacts skip the orig tarball .asc/.md5/.sha1
# -h, --help show this help
#
# SOURCE_DATE_EPOCH is honored for reproducible output.
set -euo pipefail
readonly PROGNAME=${0##*/}
# Scratch dir, global so the EXIT trap can see it.
scratch=""
die() {
printf '%s: error: %s\n' "$PROGNAME" "$*" >&2
exit 1
}
info() {
printf '==> %s\n' "$*" >&2
}
usage() {
sed -n '2,/^set -euo/{/^set -euo/!p}' "$0" | sed 's/^# \{0,1\}//'
}
need() {
local tool
for tool in "$@"; do
command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool"
done
}
main() {
local key=${DEBSIGN_KEYID:-}
local outdir=""
local source_only=0
local unsigned=0
local release_artifacts=1
while [[ $# -gt 0 ]]; do
case $1 in
-k | --key)
[[ $# -ge 2 ]] || die "missing argument for $1"
key=$2
shift 2
;;
-o | --outdir)
[[ $# -ge 2 ]] || die "missing argument for $1"
outdir=$2
shift 2
;;
-s | --source-only)
source_only=1
shift
;;
-u | --unsigned)
unsigned=1
shift
;;
--no-release-artifacts)
release_artifacts=0
shift
;;
-h | --help)
usage
exit 0
;;
*)
die "unknown option: $1 (try --help)"
;;
esac
done
need git autoreconf debuild dcmd
if [[ $unsigned -eq 0 ]]; then
need gpg
[[ -n $key ]] || die "no signing key (pass --key or set DEBSIGN_KEYID, or use --unsigned)"
fi
local repo
repo=$(git rev-parse --show-toplevel) || die "not inside a git repository"
: "${outdir:=$repo/dist}"
mkdir -p "$outdir"
outdir=$(cd "$outdir" && pwd)
scratch=$(mktemp -d "${TMPDIR:-/tmp}/httrack-mkdeb.XXXXXX")
trap 'rm -rf -- "$scratch"' EXIT
# Pristine export of committed HEAD plus the coucal submodule.
info "exporting committed sources"
local export_dir=$scratch/src
mkdir -p "$export_dir"
git -C "$repo" archive --format=tar HEAD | tar -x -C "$export_dir"
git -C "$repo/src/coucal" archive --format=tar --prefix=src/coucal/ HEAD |
tar -x -C "$export_dir"
# Refresh build system and man page, then build and validate the tarball.
info "regenerating build system and man page"
(
cd "$export_dir"
autoreconf -fi
./configure --quiet
make -s -j"$(nproc)"
make -s -C man regen-man
info "running test suite"
make -s check
# Build the tarball from a clean tree so no object files leak into it.
make -s clean
make -s dist
)
local tarball ver
local -a tarballs
shopt -s nullglob
tarballs=("$export_dir"/httrack-*.tar.gz)
shopt -u nullglob
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
tarball=${tarballs[0]##*/}
ver=${tarball#httrack-}
ver=${ver%.tar.gz}
info "version $ver"
# 3.0 (quilt): orig tarball is upstream-only; debian/ is overlaid on top.
local orig=httrack_${ver}.orig.tar.gz
cp -- "$export_dir/$tarball" "$scratch/$orig"
(
cd "$scratch"
tar -xf "$orig"
cp -a "$export_dir/debian" "httrack-$ver/debian"
)
# Build (debuild also runs lintian and signs).
local -a debuild_opts=(--lintian-opts -I -i)
local -a build_opts=()
[[ $source_only -eq 1 ]] && build_opts+=(-S)
if [[ $unsigned -eq 1 ]]; then
build_opts+=(-us -uc)
else
build_opts+=("-k$key")
fi
info "building packages with debuild"
(
cd "$scratch/httrack-$ver"
debuild "${build_opts[@]}" "${debuild_opts[@]}"
)
# Collect every file the .changes references (orig, dsc, debs, ddebs, buildinfo).
info "collecting artifacts into $outdir"
local -a changes
shopt -s nullglob
changes=("$scratch"/*.changes)
shopt -u nullglob
[[ ${#changes[@]} -ge 1 ]] || die "debuild produced no .changes file"
dcmd cp -- "${changes[@]}" "$outdir/"
# Release artifacts for the upstream tarball (detached sig + checksums).
if [[ $release_artifacts -eq 1 && $unsigned -eq 0 ]]; then
info "signing upstream tarball"
(
cd "$outdir"
gpg --armor --detach-sign --yes -u "$key" -- "$orig"
md5sum -- "$orig" >"$orig.md5"
sha1sum -- "$orig" >"$orig.sha1"
)
fi
info "done. artifacts in $outdir:"
ls -1 "$outdir" >&2
}
main "$@"