mirror of
https://github.com/xroche/httrack.git
synced 2026-06-10 04:13:38 +03:00
Compare commits
7 Commits
fix/openss
...
feat/in-tr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3220085797 | ||
|
|
ba41415c15 | ||
|
|
49a6698ca5 | ||
|
|
0de1b405e4 | ||
|
|
dc50f25420 | ||
|
|
7bab8263a1 | ||
|
|
000017bce7 |
11
Makefile.am
11
Makefile.am
@@ -5,4 +5,13 @@ ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
EXTRA_DIST = INSTALL.Linux \
|
||||
gpl-fr.txt license.txt greetings.txt history.txt \
|
||||
httrack-doc.html lang.def README.md
|
||||
httrack-doc.html lang.def README.md tools/mkdeb.sh
|
||||
|
||||
# Build the signed Debian packages from a clean source export. Pass the signing
|
||||
# key and other options through DEB_FLAGS, e.g.:
|
||||
# make deb DEB_FLAGS="--key BB71C7E6CB1AD8FAF53FE42A60C3AA7180598EFB"
|
||||
# See tools/mkdeb.sh --help for all options.
|
||||
DEB_FLAGS =
|
||||
deb:
|
||||
$(SHELL) $(top_srcdir)/tools/mkdeb.sh $(DEB_FLAGS)
|
||||
.PHONY: deb
|
||||
|
||||
13
Makefile.in
13
Makefile.in
@@ -160,7 +160,7 @@ am__define_uniq_tagged_files = \
|
||||
DIST_SUBDIRS = $(SUBDIRS)
|
||||
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in AUTHORS \
|
||||
COPYING ChangeLog INSTALL NEWS README compile config.guess \
|
||||
config.sub depcomp install-sh ltmain.sh missing
|
||||
config.sub install-sh ltmain.sh missing
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
distdir = $(PACKAGE)-$(VERSION)
|
||||
top_distdir = $(distdir)
|
||||
@@ -349,8 +349,14 @@ SUBDIRS = src man m4 libtest templates lang html tests
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
EXTRA_DIST = INSTALL.Linux \
|
||||
gpl-fr.txt license.txt greetings.txt history.txt \
|
||||
httrack-doc.html lang.def README.md
|
||||
httrack-doc.html lang.def README.md tools/mkdeb.sh
|
||||
|
||||
|
||||
# Build the signed Debian packages from a clean source export. Pass the signing
|
||||
# key and other options through DEB_FLAGS, e.g.:
|
||||
# make deb DEB_FLAGS="--key BB71C7E6CB1AD8FAF53FE42A60C3AA7180598EFB"
|
||||
# See tools/mkdeb.sh --help for all options.
|
||||
DEB_FLAGS =
|
||||
all: config.h
|
||||
$(MAKE) $(AM_MAKEFLAGS) all-recursive
|
||||
|
||||
@@ -841,6 +847,9 @@ uninstall-am:
|
||||
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
deb:
|
||||
$(SHELL) $(top_srcdir)/tools/mkdeb.sh $(DEB_FLAGS)
|
||||
.PHONY: deb
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
|
||||
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -11,7 +11,7 @@ httrack (3.49.6-1) unstable; urgency=medium
|
||||
* Updated to 3.49.6 (3.49-6)
|
||||
Fixed CVE-2017-14062
|
||||
|
||||
-- Xavier Roche <roche@proliant.localnet> Tue, 11 Mar 2025 19:43:39 +0100
|
||||
-- Xavier Roche <xavier@debian.org> Tue, 11 Mar 2025 19:43:39 +0100
|
||||
|
||||
httrack (3.49.5-1) unstable; urgency=medium
|
||||
|
||||
|
||||
@@ -2,4 +2,14 @@
|
||||
# man_MANS = httrack.1
|
||||
man_MANS = httrack.1 webhttrack.1 htsserver.1 proxytrack.1
|
||||
|
||||
EXTRA_DIST = $(man_MANS)
|
||||
EXTRA_DIST = $(man_MANS) makeman.sh
|
||||
|
||||
# Regenerate httrack.1 from the "httrack --help" output and the top-level
|
||||
# README. Run by hand after changing options or help text:
|
||||
# make -C man regen-man
|
||||
# The generated page is committed; this target only refreshes it. Honors
|
||||
# SOURCE_DATE_EPOCH for a reproducible date.
|
||||
regen-man: makeman.sh $(top_builddir)/src/httrack$(EXEEXT)
|
||||
README='$(top_srcdir)/README' $(SHELL) $(srcdir)/makeman.sh \
|
||||
'$(top_builddir)/src/httrack$(EXEEXT)' > $(srcdir)/httrack.1
|
||||
.PHONY: regen-man
|
||||
|
||||
@@ -297,7 +297,7 @@ top_srcdir = @top_srcdir@
|
||||
|
||||
# man_MANS = httrack.1
|
||||
man_MANS = httrack.1 webhttrack.1 htsserver.1 proxytrack.1
|
||||
EXTRA_DIST = $(man_MANS)
|
||||
EXTRA_DIST = $(man_MANS) makeman.sh
|
||||
all: all-am
|
||||
|
||||
.SUFFIXES:
|
||||
@@ -541,6 +541,16 @@ uninstall-man: uninstall-man1
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
|
||||
# Regenerate httrack.1 from the "httrack --help" output and the top-level
|
||||
# README. Run by hand after changing options or help text:
|
||||
# make -C man regen-man
|
||||
# The generated page is committed; this target only refreshes it. Honors
|
||||
# SOURCE_DATE_EPOCH for a reproducible date.
|
||||
regen-man: makeman.sh $(top_builddir)/src/httrack$(EXEEXT)
|
||||
README='$(top_srcdir)/README' $(SHELL) $(srcdir)/makeman.sh \
|
||||
'$(top_builddir)/src/httrack$(EXEEXT)' > $(srcdir)/httrack.1
|
||||
.PHONY: regen-man
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
||||
|
||||
487
man/httrack.1
487
man/httrack.1
@@ -1,218 +1,122 @@
|
||||
.\" Process this file with
|
||||
.\" groff -man -Tascii httrack.1
|
||||
.\"
|
||||
.TH httrack 1 "27 January 2024" "httrack website copier"
|
||||
.\" This file is generated by man/makeman.sh; do not edit by hand.
|
||||
.TH httrack 1 "07 June 2026" "httrack website copier"
|
||||
.SH NAME
|
||||
httrack \- offline browser : copy websites to a local directory
|
||||
.SH SYNOPSIS
|
||||
.B httrack [ url ]... [ \-filter ]... [ +filter ]... [
|
||||
.B \-O, \-\-path
|
||||
] [
|
||||
.B \-w, \-\-mirror
|
||||
] [
|
||||
.B \-W, \-\-mirror\-wizard
|
||||
] [
|
||||
.B \-g, \-\-get\-files
|
||||
] [
|
||||
.B \-i, \-\-continue
|
||||
] [
|
||||
.B \-Y, \-\-mirrorlinks
|
||||
] [
|
||||
.B \-P, \-\-proxy
|
||||
] [
|
||||
.B \-%f, \-\-httpproxy\-ftp[=N]
|
||||
] [
|
||||
.B \-%b, \-\-bind
|
||||
] [
|
||||
.B \-rN, \-\-depth[=N]
|
||||
] [
|
||||
.B \-%eN, \-\-ext\-depth[=N]
|
||||
] [
|
||||
.B \-mN, \-\-max\-files[=N]
|
||||
] [
|
||||
.B \-MN, \-\-max\-size[=N]
|
||||
] [
|
||||
.B \-EN, \-\-max\-time[=N]
|
||||
] [
|
||||
.B \-AN, \-\-max\-rate[=N]
|
||||
] [
|
||||
.B \-%cN, \-\-connection\-per\-second[=N]
|
||||
] [
|
||||
.B \-GN, \-\-max\-pause[=N]
|
||||
] [
|
||||
.B \-cN, \-\-sockets[=N]
|
||||
] [
|
||||
.B \-TN, \-\-timeout[=N]
|
||||
] [
|
||||
.B \-RN, \-\-retries[=N]
|
||||
] [
|
||||
.B \-JN, \-\-min\-rate[=N]
|
||||
] [
|
||||
.B \-HN, \-\-host\-control[=N]
|
||||
] [
|
||||
.B \-%P, \-\-extended\-parsing[=N]
|
||||
] [
|
||||
.B \-n, \-\-near
|
||||
] [
|
||||
.B \-t, \-\-test
|
||||
] [
|
||||
.B \-%L, \-\-list
|
||||
] [
|
||||
.B \-%S, \-\-urllist
|
||||
] [
|
||||
.B \-NN, \-\-structure[=N]
|
||||
] [
|
||||
.B \-%D, \-\-cached\-delayed\-type\-check
|
||||
] [
|
||||
.B \-%M, \-\-mime\-html
|
||||
] [
|
||||
.B \-LN, \-\-long\-names[=N]
|
||||
] [
|
||||
.B \-KN, \-\-keep\-links[=N]
|
||||
] [
|
||||
.B \-x, \-\-replace\-external
|
||||
] [
|
||||
.B \-%x, \-\-disable\-passwords
|
||||
] [
|
||||
.B \-%q, \-\-include\-query\-string
|
||||
] [
|
||||
.B \-o, \-\-generate\-errors
|
||||
] [
|
||||
.B \-X, \-\-purge\-old[=N]
|
||||
] [
|
||||
.B \-%p, \-\-preserve
|
||||
] [
|
||||
.B \-%T, \-\-utf8\-conversion
|
||||
] [
|
||||
.B \-bN, \-\-cookies[=N]
|
||||
] [
|
||||
.B \-u, \-\-check\-type[=N]
|
||||
] [
|
||||
.B \-j, \-\-parse\-java[=N]
|
||||
] [
|
||||
.B \-sN, \-\-robots[=N]
|
||||
] [
|
||||
.B \-%h, \-\-http\-10
|
||||
] [
|
||||
.B \-%k, \-\-keep\-alive
|
||||
] [
|
||||
.B \-%B, \-\-tolerant
|
||||
] [
|
||||
.B \-%s, \-\-updatehack
|
||||
] [
|
||||
.B \-%u, \-\-urlhack
|
||||
] [
|
||||
.B \-%A, \-\-assume
|
||||
] [
|
||||
.B \-@iN, \-\-protocol[=N]
|
||||
] [
|
||||
.B \-%w, \-\-disable\-module
|
||||
] [
|
||||
.B \-F, \-\-user\-agent
|
||||
] [
|
||||
.B \-%R, \-\-referer
|
||||
] [
|
||||
.B \-%E, \-\-from
|
||||
] [
|
||||
.B \-%F, \-\-footer
|
||||
] [
|
||||
.B \-%l, \-\-language
|
||||
] [
|
||||
.B \-%a, \-\-accept
|
||||
] [
|
||||
.B \-%X, \-\-headers
|
||||
] [
|
||||
.B \-C, \-\-cache[=N]
|
||||
] [
|
||||
.B \-k, \-\-store\-all\-in\-cache
|
||||
] [
|
||||
.B \-%n, \-\-do\-not\-recatch
|
||||
] [
|
||||
.B \-%v, \-\-display
|
||||
] [
|
||||
.B \-Q, \-\-do\-not\-log
|
||||
] [
|
||||
.B \-q, \-\-quiet
|
||||
] [
|
||||
.B \-z, \-\-extra\-log
|
||||
] [
|
||||
.B \-Z, \-\-debug\-log
|
||||
] [
|
||||
.B \-v, \-\-verbose
|
||||
] [
|
||||
.B \-f, \-\-file\-log
|
||||
] [
|
||||
.B \-f2, \-\-single\-log
|
||||
] [
|
||||
.B \-I, \-\-index
|
||||
] [
|
||||
.B \-%i, \-\-build\-top\-index
|
||||
] [
|
||||
.B \-%I, \-\-search\-index
|
||||
] [
|
||||
.B \-pN, \-\-priority[=N]
|
||||
] [
|
||||
.B \-S, \-\-stay\-on\-same\-dir
|
||||
] [
|
||||
.B \-D, \-\-can\-go\-down
|
||||
] [
|
||||
.B \-U, \-\-can\-go\-up
|
||||
] [
|
||||
.B \-B, \-\-can\-go\-up\-and\-down
|
||||
] [
|
||||
.B \-a, \-\-stay\-on\-same\-address
|
||||
] [
|
||||
.B \-d, \-\-stay\-on\-same\-domain
|
||||
] [
|
||||
.B \-l, \-\-stay\-on\-same\-tld
|
||||
] [
|
||||
.B \-e, \-\-go\-everywhere
|
||||
] [
|
||||
.B \-%H, \-\-debug\-headers
|
||||
] [
|
||||
.B \-%!, \-\-disable\-security\-limits
|
||||
] [
|
||||
.B \-V, \-\-userdef\-cmd
|
||||
] [
|
||||
.B \-%W, \-\-callback
|
||||
] [
|
||||
.B \-K, \-\-keep\-links[=N]
|
||||
] [
|
||||
.B
|
||||
.B httrack [ url ]... [ \-filter ]... [ +filter ]...
|
||||
[ \fB\-O, \-\-path\fR ]
|
||||
[ \fB\-w, \-\-mirror\fR ]
|
||||
[ \fB\-W, \-\-mirror\-wizard\fR ]
|
||||
[ \fB\-g, \-\-get\-files\fR ]
|
||||
[ \fB\-i, \-\-continue\fR ]
|
||||
[ \fB\-Y, \-\-mirrorlinks\fR ]
|
||||
[ \fB\-P, \-\-proxy\fR ]
|
||||
[ \fB\-%f, \-\-httpproxy\-ftp[=N]\fR ]
|
||||
[ \fB\-%b, \-\-bind\fR ]
|
||||
[ \fB\-rN, \-\-depth[=N]\fR ]
|
||||
[ \fB\-%eN, \-\-ext\-depth[=N]\fR ]
|
||||
[ \fB\-mN, \-\-max\-files[=N]\fR ]
|
||||
[ \fB\-MN, \-\-max\-size[=N]\fR ]
|
||||
[ \fB\-EN, \-\-max\-time[=N]\fR ]
|
||||
[ \fB\-AN, \-\-max\-rate[=N]\fR ]
|
||||
[ \fB\-%cN, \-\-connection\-per\-second[=N]\fR ]
|
||||
[ \fB\-GN, \-\-max\-pause[=N]\fR ]
|
||||
[ \fB\-cN, \-\-sockets[=N]\fR ]
|
||||
[ \fB\-TN, \-\-timeout[=N]\fR ]
|
||||
[ \fB\-RN, \-\-retries[=N]\fR ]
|
||||
[ \fB\-JN, \-\-min\-rate[=N]\fR ]
|
||||
[ \fB\-HN, \-\-host\-control[=N]\fR ]
|
||||
[ \fB\-%P, \-\-extended\-parsing[=N]\fR ]
|
||||
[ \fB\-n, \-\-near\fR ]
|
||||
[ \fB\-t, \-\-test\fR ]
|
||||
[ \fB\-%L, \-\-list\fR ]
|
||||
[ \fB\-%S, \-\-urllist\fR ]
|
||||
[ \fB\-NN, \-\-structure[=N]\fR ]
|
||||
[ \fB\-%D, \-\-cached\-delayed\-type\-check\fR ]
|
||||
[ \fB\-%M, \-\-mime\-html\fR ]
|
||||
[ \fB\-LN, \-\-long\-names[=N]\fR ]
|
||||
[ \fB\-KN, \-\-keep\-links[=N]\fR ]
|
||||
[ \fB\-x, \-\-replace\-external\fR ]
|
||||
[ \fB\-%x, \-\-disable\-passwords\fR ]
|
||||
[ \fB\-%q, \-\-include\-query\-string\fR ]
|
||||
[ \fB\-o, \-\-generate\-errors\fR ]
|
||||
[ \fB\-X, \-\-purge\-old[=N]\fR ]
|
||||
[ \fB\-%p, \-\-preserve\fR ]
|
||||
[ \fB\-%T, \-\-utf8\-conversion\fR ]
|
||||
[ \fB\-bN, \-\-cookies[=N]\fR ]
|
||||
[ \fB\-u, \-\-check\-type[=N]\fR ]
|
||||
[ \fB\-j, \-\-parse\-java[=N]\fR ]
|
||||
[ \fB\-sN, \-\-robots[=N]\fR ]
|
||||
[ \fB\-%h, \-\-http\-10\fR ]
|
||||
[ \fB\-%k, \-\-keep\-alive\fR ]
|
||||
[ \fB\-%B, \-\-tolerant\fR ]
|
||||
[ \fB\-%s, \-\-updatehack\fR ]
|
||||
[ \fB\-%u, \-\-urlhack\fR ]
|
||||
[ \fB\-%A, \-\-assume\fR ]
|
||||
[ \fB\-@iN, \-\-protocol[=N]\fR ]
|
||||
[ \fB\-%w, \-\-disable\-module\fR ]
|
||||
[ \fB\-F, \-\-user\-agent\fR ]
|
||||
[ \fB\-%R, \-\-referer\fR ]
|
||||
[ \fB\-%E, \-\-from\fR ]
|
||||
[ \fB\-%F, \-\-footer\fR ]
|
||||
[ \fB\-%l, \-\-language\fR ]
|
||||
[ \fB\-%a, \-\-accept\fR ]
|
||||
[ \fB\-%X, \-\-headers\fR ]
|
||||
[ \fB\-C, \-\-cache[=N]\fR ]
|
||||
[ \fB\-k, \-\-store\-all\-in\-cache\fR ]
|
||||
[ \fB\-%n, \-\-do\-not\-recatch\fR ]
|
||||
[ \fB\-%v, \-\-display\fR ]
|
||||
[ \fB\-Q, \-\-do\-not\-log\fR ]
|
||||
[ \fB\-q, \-\-quiet\fR ]
|
||||
[ \fB\-z, \-\-extra\-log\fR ]
|
||||
[ \fB\-Z, \-\-debug\-log\fR ]
|
||||
[ \fB\-v, \-\-verbose\fR ]
|
||||
[ \fB\-f, \-\-file\-log\fR ]
|
||||
[ \fB\-f2, \-\-single\-log\fR ]
|
||||
[ \fB\-I, \-\-index\fR ]
|
||||
[ \fB\-%i, \-\-build\-top\-index\fR ]
|
||||
[ \fB\-%I, \-\-search\-index\fR ]
|
||||
[ \fB\-pN, \-\-priority[=N]\fR ]
|
||||
[ \fB\-S, \-\-stay\-on\-same\-dir\fR ]
|
||||
[ \fB\-D, \-\-can\-go\-down\fR ]
|
||||
[ \fB\-U, \-\-can\-go\-up\fR ]
|
||||
[ \fB\-B, \-\-can\-go\-up\-and\-down\fR ]
|
||||
[ \fB\-a, \-\-stay\-on\-same\-address\fR ]
|
||||
[ \fB\-d, \-\-stay\-on\-same\-domain\fR ]
|
||||
[ \fB\-l, \-\-stay\-on\-same\-tld\fR ]
|
||||
[ \fB\-e, \-\-go\-everywhere\fR ]
|
||||
[ \fB\-%H, \-\-debug\-headers\fR ]
|
||||
[ \fB\-%!, \-\-disable\-security\-limits\fR ]
|
||||
[ \fB\-V, \-\-userdef\-cmd\fR ]
|
||||
[ \fB\-%W, \-\-callback\fR ]
|
||||
[ \fB\-K, \-\-keep\-links[=N]\fR ]
|
||||
.SH DESCRIPTION
|
||||
.B httrack
|
||||
allows you to download a World Wide Web site from the Internet to a local directory, building recursively all directories, getting HTML, images, and other files from the server to your computer. HTTrack arranges the original site's relative link-structure. Simply open a page of the "mirrored" website in your browser, and you can browse the site from link to link, as if you were viewing it online. HTTrack can also update an existing mirrored site, and resume interrupted downloads.
|
||||
.SH EXAMPLES
|
||||
.TP
|
||||
.B httrack www.someweb.com/bob/
|
||||
mirror site www.someweb.com/bob/ and only this site
|
||||
mirror site www.someweb.com/bob/ and only this site
|
||||
.TP
|
||||
.B httrack www.someweb.com/bob/ www.anothertest.com/mike/ +*.com/*.jpg \-mime:application/*
|
||||
mirror the two sites together (with shared links) and accept any .jpg files on .com sites
|
||||
mirror the two sites together (with shared links) and accept any .jpg files on .com sites
|
||||
.TP
|
||||
.B httrack www.someweb.com/bob/bobby.html +* \-r6
|
||||
means get all files starting from bobby.html, with 6 link\-depth, and possibility of going everywhere on the web
|
||||
.TP
|
||||
.B httrack www.someweb.com/bob/bobby.html \-\-spider \-P proxy.myhost.com:8080
|
||||
runs the spider on www.someweb.com/bob/bobby.html using a proxy
|
||||
.TP
|
||||
.B httrack \-\-update
|
||||
updates a mirror in the current folder
|
||||
.TP
|
||||
.B httrack
|
||||
will bring you to the interactive mode
|
||||
.TP
|
||||
.B httrack \-\-continue
|
||||
continues a mirror in the current folder
|
||||
.SH OPTIONS
|
||||
.SS General options:
|
||||
.IP \-O
|
||||
path for mirror/logfiles+cache (\-O path
|
||||
mirror[,path
|
||||
cache
|
||||
and
|
||||
logfiles]) (\-\-path <param>)
|
||||
|
||||
path for mirror/logfiles+cache (\-O path_mirror[,path_cache_and_logfiles]) (\-\-path <param>)
|
||||
.SS Action options:
|
||||
.IP \-w
|
||||
*mirror web sites (\-\-mirror)
|
||||
@@ -224,15 +128,13 @@ just get files (saved in the current directory) (\-\-get\-files)
|
||||
continue an interrupted mirror using the cache (\-\-continue)
|
||||
.IP \-Y
|
||||
mirror ALL links located in the first level pages (mirror links) (\-\-mirrorlinks)
|
||||
|
||||
.SS Proxy options:
|
||||
.IP \-P
|
||||
proxy use (\-P proxy:port or \-P user:pass@proxy:port) (\-\-proxy <param>)
|
||||
.IP \-%f
|
||||
*use proxy for ftp (f0 don t use) (\-\-httpproxy\-ftp[=N])
|
||||
*use proxy for ftp (f0 don't use) (\-\-httpproxy\-ftp[=N])
|
||||
.IP \-%b
|
||||
use this local hostname to make/send requests (\-%b hostname) (\-\-bind <param>)
|
||||
|
||||
.SS Limits options:
|
||||
.IP \-rN
|
||||
set the mirror depth to N (* r9999) (\-\-depth[=N])
|
||||
@@ -252,7 +154,6 @@ maximum transfer rate in bytes/seconds (1000=1KB/s max) (\-\-max\-rate[=N])
|
||||
maximum number of connections/seconds (*%c10) (\-\-connection\-per\-second[=N])
|
||||
.IP \-GN
|
||||
pause transfer if N bytes reached, and wait until lock file is deleted (\-\-max\-pause[=N])
|
||||
|
||||
.SS Flow control:
|
||||
.IP \-cN
|
||||
number of multiple connections (*c8) (\-\-sockets[=N])
|
||||
@@ -264,28 +165,26 @@ number of retries, in case of timeout or non\-fatal errors (*R1) (\-\-retries[=N
|
||||
traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link (\-\-min\-rate[=N])
|
||||
.IP \-HN
|
||||
host is abandoned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow (\-\-host\-control[=N])
|
||||
|
||||
.SS Links options:
|
||||
.IP \-%P
|
||||
*extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don t use) (\-\-extended\-parsing[=N])
|
||||
*extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use) (\-\-extended\-parsing[=N])
|
||||
.IP \-n
|
||||
get non\-html files near an html file (ex: an image located outside) (\-\-near)
|
||||
get non\-html files 'near' an html file (ex: an image located outside) (\-\-near)
|
||||
.IP \-t
|
||||
test all URLs (even forbidden ones) (\-\-test)
|
||||
.IP \-%L
|
||||
<file> add all URL located in this text file (one URL per line) (\-\-list <param>)
|
||||
.IP \-%S
|
||||
<file> add all scan rules located in this text file (one scan rule per line) (\-\-urllist <param>)
|
||||
|
||||
.SS Build options:
|
||||
.IP \-NN
|
||||
structure type (0 *original structure, 1+: see below) (\-\-structure[=N])
|
||||
.IP \-or
|
||||
user defined structure (\-N "%h%p/%n%q.%t")
|
||||
.br
|
||||
or user defined structure (\-N "%h%p/%n%q.%t")
|
||||
.IP \-%N
|
||||
delayed type check, don t make any link test but wait for files download to start instead (experimental) (%N0 don t use, %N1 use for unknown extensions, * %N2 always use)
|
||||
delayed type check, don't make any link test but wait for files download to start instead (experimental) (%N0 don't use, %N1 use for unknown extensions, * %N2 always use)
|
||||
.IP \-%D
|
||||
cached delayed type check, don t wait for remote type during updates, to speedup them (%D0 wait, * %D1 don t wait) (\-\-cached\-delayed\-type\-check)
|
||||
cached delayed type check, don't wait for remote type during updates, to speedup them (%D0 wait, * %D1 don't wait) (\-\-cached\-delayed\-type\-check)
|
||||
.IP \-%M
|
||||
generate a RFC MIME\-encapsulated full\-archive (.mht) (\-\-mime\-html)
|
||||
.IP \-LN
|
||||
@@ -297,29 +196,28 @@ replace external html links by error pages (\-\-replace\-external)
|
||||
.IP \-%x
|
||||
do not include any password for external password protected websites (%x0 include) (\-\-disable\-passwords)
|
||||
.IP \-%q
|
||||
*include query string for local files (useless, for information purpose only) (%q0 don t include) (\-\-include\-query\-string)
|
||||
*include query string for local files (useless, for information purpose only) (%q0 don't include) (\-\-include\-query\-string)
|
||||
.IP \-o
|
||||
*generate output html file in case of error (404..) (o0 don t generate) (\-\-generate\-errors)
|
||||
*generate output html file in case of error (404..) (o0 don't generate) (\-\-generate\-errors)
|
||||
.IP \-X
|
||||
*purge old files after update (X0 keep delete) (\-\-purge\-old[=N])
|
||||
.IP \-%p
|
||||
preserve html files as is (identical to \-K4 \-%F "" ) (\-\-preserve)
|
||||
preserve html files 'as is' (identical to '\-K4 \-%F ""') (\-\-preserve)
|
||||
.IP \-%T
|
||||
links conversion to UTF\-8 (\-\-utf8\-conversion)
|
||||
|
||||
.SS Spider options:
|
||||
.IP \-bN
|
||||
accept cookies in cookies.txt (0=do not accept,* 1=accept) (\-\-cookies[=N])
|
||||
.IP \-u
|
||||
check document type if unknown (cgi,asp..) (u0 don t check, * u1 check but /, u2 check always) (\-\-check\-type[=N])
|
||||
check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always) (\-\-check\-type[=N])
|
||||
.IP \-j
|
||||
*parse Java Classes (j0 don t parse, bitmask: |1 parse default, |2 don t parse .class |4 don t parse .js |8 don t be aggressive) (\-\-parse\-java[=N])
|
||||
*parse Java Classes (j0 don't parse, bitmask: |1 parse default, |2 don't parse .class |4 don't parse .js |8 don't be aggressive) (\-\-parse\-java[=N])
|
||||
.IP \-sN
|
||||
follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules)) (\-\-robots[=N])
|
||||
.IP \-%h
|
||||
force HTTP/1.0 requests (reduce update features, only for old servers or proxies) (\-\-http\-10)
|
||||
.IP \-%k
|
||||
use keep\-alive if possible, greately reducing latency for small files and test requests (%k0 don t use) (\-\-keep\-alive)
|
||||
use keep\-alive if possible, greately reducing latency for small files and test requests (%k0 don't use) (\-\-keep\-alive)
|
||||
.IP \-%B
|
||||
tolerant requests (accept bogus responses on some servers, but not standard!) (\-\-tolerant)
|
||||
.IP \-%s
|
||||
@@ -328,13 +226,14 @@ update hacks: various hacks to limit re\-transfers when updating (identical size
|
||||
url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..) (\-\-urlhack)
|
||||
.IP \-%A
|
||||
assume that a type (cgi,asp..) is always linked with a mime type (\-%A php3,cgi=text/html;dat,bin=application/x\-zip) (\-\-assume <param>)
|
||||
.IP \-can
|
||||
also be used to force a specific file type: \-\-assume foo.cgi=text/html
|
||||
.br
|
||||
shortcut: '\-\-assume standard' is equivalent to \-%A php2 php3 php4 php cgi asp jsp pl cfm nsf=text/html
|
||||
.br
|
||||
can also be used to force a specific file type: \-\-assume foo.cgi=text/html
|
||||
.IP \-@iN
|
||||
internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only) (\-\-protocol[=N])
|
||||
.IP \-%w
|
||||
disable a specific external mime module (\-%w htsswf \-%w htsjava) (\-\-disable\-module <param>)
|
||||
|
||||
.SS Browser ID:
|
||||
.IP \-F
|
||||
user\-agent field sent in HTTP headers (\-F "user\-agent name") (\-\-user\-agent <param>)
|
||||
@@ -350,7 +249,6 @@ preffered language (\-%l "fr, en, jp, *" (\-\-language <param>)
|
||||
accepted formats (\-%a "text/html,image/png;q=0.9,*/*;q=0.1" (\-\-accept <param>)
|
||||
.IP \-%X
|
||||
additional HTTP header line (\-%X "X\-Magic: 42" (\-\-headers <param>)
|
||||
|
||||
.SS Log, index, cache
|
||||
.IP \-C
|
||||
create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before) (\-\-cache[=N])
|
||||
@@ -375,25 +273,24 @@ log on screen (\-\-verbose)
|
||||
.IP \-f2
|
||||
one single log file (\-\-single\-log)
|
||||
.IP \-I
|
||||
*make an index (I0 don t make) (\-\-index)
|
||||
*make an index (I0 don't make) (\-\-index)
|
||||
.IP \-%i
|
||||
make a top index for a project folder (* %i0 don t make) (\-\-build\-top\-index)
|
||||
make a top index for a project folder (* %i0 don't make) (\-\-build\-top\-index)
|
||||
.IP \-%I
|
||||
make an searchable index for this mirror (* %I0 don t make) (\-\-search\-index)
|
||||
|
||||
make an searchable index for this mirror (* %I0 don't make) (\-\-search\-index)
|
||||
.SS Expert options:
|
||||
.IP \-pN
|
||||
priority mode: (* p3) (\-\-priority[=N])
|
||||
.IP \-p0
|
||||
just scan, don t save anything (for checking links)
|
||||
.IP \-p1
|
||||
save only html files
|
||||
.IP \-p2
|
||||
save only non html files
|
||||
.IP \-*p3
|
||||
save all files
|
||||
.IP \-p7
|
||||
get html files before, then treat other files
|
||||
.br
|
||||
p0 just scan, don't save anything (for checking links)
|
||||
.br
|
||||
p1 save only html files
|
||||
.br
|
||||
p2 save only non html files
|
||||
.br
|
||||
*p3 save all files
|
||||
.br
|
||||
p7 get html files before, then treat other files
|
||||
.IP \-S
|
||||
stay on the same directory (\-\-stay\-on\-same\-dir)
|
||||
.IP \-D
|
||||
@@ -412,18 +309,17 @@ stay on the same TLD (eg: .com) (\-\-stay\-on\-same\-tld)
|
||||
go everywhere on the web (\-\-go\-everywhere)
|
||||
.IP \-%H
|
||||
debug HTTP headers in logfile (\-\-debug\-headers)
|
||||
|
||||
.SS Guru options: (do NOT use if possible)
|
||||
.IP \-#X
|
||||
*use optimized engine (limited memory boundary checks) (\-\-fast\-engine)
|
||||
.IP \-#0
|
||||
filter test (\-#0 *.gif www.bar.com/foo.gif ) (\-\-debug\-testfilters <param>)
|
||||
filter test (\-#0 '*.gif' 'www.bar.com/foo.gif') (\-\-debug\-testfilters <param>)
|
||||
.IP \-#1
|
||||
simplify test (\-#1 ./foo/bar/../foobar)
|
||||
.IP \-#2
|
||||
type test (\-#2 /foo/bar.php)
|
||||
.IP \-#C
|
||||
cache list (\-#C *.com/spider*.gif (\-\-debug\-cache <param>)
|
||||
cache list (\-#C '*.com/spider*.gif' (\-\-debug\-cache <param>)
|
||||
.IP \-#R
|
||||
cache repair (damaged cache) (\-\-repair\-cache)
|
||||
.IP \-#d
|
||||
@@ -452,7 +348,6 @@ generate transfer ops. log every minutes (\-\-debug\-xfrstats)
|
||||
wait time (\-\-advanced\-wait)
|
||||
.IP \-#Z
|
||||
generate transfer rate statistics every minutes (\-\-debug\-ratestats)
|
||||
|
||||
.SS Dangerous options: (do NOT use unless you exactly know what you are doing)
|
||||
.IP \-%!
|
||||
bypass built\-in security limits aimed to avoid bandwidth abuses (bandwidth, simultaneous connections) (\-\-disable\-security\-limits)
|
||||
@@ -460,13 +355,11 @@ bypass built\-in security limits aimed to avoid bandwidth abuses (bandwidth, sim
|
||||
IMPORTANT NOTE: DANGEROUS OPTION, ONLY SUITABLE FOR EXPERTS
|
||||
.br
|
||||
USE IT WITH EXTREME CARE
|
||||
|
||||
.SS Command\-line specific options:
|
||||
.IP \-V
|
||||
execute system command after each files ($0 is the filename: \-V "rm \\$0") (\-\-userdef\-cmd <param>)
|
||||
.IP \-%W
|
||||
use an external library function as a wrapper (\-%W myfoo.so[,myparameters]) (\-\-callback <param>)
|
||||
|
||||
.SS Details: Option N
|
||||
.IP \-N0
|
||||
Site\-structure (default)
|
||||
@@ -485,17 +378,17 @@ All files in web/, with random names (gadget !)
|
||||
.IP \-N100
|
||||
Site\-structure, without www.domain.xxx/
|
||||
.IP \-N101
|
||||
Identical to N1 except that "web" is replaced by the site s name
|
||||
Identical to N1 except that "web" is replaced by the site's name
|
||||
.IP \-N102
|
||||
Identical to N2 except that "web" is replaced by the site s name
|
||||
Identical to N2 except that "web" is replaced by the site's name
|
||||
.IP \-N103
|
||||
Identical to N3 except that "web" is replaced by the site s name
|
||||
Identical to N3 except that "web" is replaced by the site's name
|
||||
.IP \-N104
|
||||
Identical to N4 except that "web" is replaced by the site s name
|
||||
Identical to N4 except that "web" is replaced by the site's name
|
||||
.IP \-N105
|
||||
Identical to N5 except that "web" is replaced by the site s name
|
||||
Identical to N5 except that "web" is replaced by the site's name
|
||||
.IP \-N199
|
||||
Identical to N99 except that "web" is replaced by the site s name
|
||||
Identical to N99 except that "web" is replaced by the site's name
|
||||
.IP \-N1001
|
||||
Identical to N1 except that there is no "web" directory
|
||||
.IP \-N1002
|
||||
@@ -509,34 +402,47 @@ Identical to N5 except that there is no "web" directory
|
||||
.IP \-N1099
|
||||
Identical to N99 except that there is no "web" directory
|
||||
.SS Details: User\-defined option N
|
||||
%n Name of file without file type (ex: image)
|
||||
%N Name of file, including file type (ex: image.gif)
|
||||
%t File type (ex: gif)
|
||||
%p Path [without ending /] (ex: /someimages)
|
||||
%h Host name (ex: www.someweb.com)
|
||||
%M URL MD5 (128 bits, 32 ascii bytes)
|
||||
%Q query string MD5 (128 bits, 32 ascii bytes)
|
||||
%k full query string
|
||||
%r protocol name (ex: http)
|
||||
%q small query string MD5 (16 bits, 4 ascii bytes)
|
||||
%s? Short name version (ex: %sN)
|
||||
%[param] param variable in query string
|
||||
%[param:before:after:empty:notfound] advanced variable extraction
|
||||
.IP \-%n
|
||||
Name of file without file type (ex: image)
|
||||
.IP \-%N
|
||||
Name of file, including file type (ex: image.gif)
|
||||
.IP \-%t
|
||||
File type (ex: gif)
|
||||
.IP \-%p
|
||||
Path [without ending /] (ex: /someimages)
|
||||
.IP \-%h
|
||||
Host name (ex: www.someweb.com)
|
||||
.IP \-%M
|
||||
URL MD5 (128 bits, 32 ascii bytes)
|
||||
.IP \-%Q
|
||||
query string MD5 (128 bits, 32 ascii bytes)
|
||||
.IP \-%k
|
||||
full query string
|
||||
.IP \-%r
|
||||
protocol name (ex: http)
|
||||
.IP \-%q
|
||||
small query string MD5 (16 bits, 4 ascii bytes)
|
||||
.br
|
||||
\&'%s?' Short name version (ex: %sN)
|
||||
.IP \-%[param]
|
||||
param variable in query string
|
||||
.IP \-%[param:before:after:empty:notfound]
|
||||
advanced variable extraction
|
||||
.SS Details: User\-defined option N and advanced variable extraction
|
||||
%[param:before:after:empty:notfound]
|
||||
.IP \-param
|
||||
: parameter name
|
||||
.IP \-before
|
||||
: string to prepend if the parameter was found
|
||||
.IP \-after
|
||||
: string to append if the parameter was found
|
||||
.IP \-notfound
|
||||
: string replacement if the parameter could not be found
|
||||
.IP \-empty
|
||||
: string replacement if the parameter was empty
|
||||
.IP \-all
|
||||
fields, except the first one (the parameter name), can be empty
|
||||
|
||||
.br
|
||||
%[param:before:after:empty:notfound]
|
||||
.br
|
||||
param : parameter name
|
||||
.br
|
||||
before : string to prepend if the parameter was found
|
||||
.br
|
||||
after : string to append if the parameter was found
|
||||
.br
|
||||
notfound : string replacement if the parameter could not be found
|
||||
.br
|
||||
empty : string replacement if the parameter was empty
|
||||
.br
|
||||
all fields, except the first one (the parameter name), can be empty
|
||||
.SS Details: Option K
|
||||
.IP \-K0
|
||||
foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI, default)
|
||||
@@ -548,37 +454,33 @@ foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI, default)
|
||||
\-> foo.cgi?q=45 (original URL)
|
||||
.IP \-K5
|
||||
\-> http://www.foobar.com/folder/foo4B54.html?q=45 (transparent proxy URL)
|
||||
|
||||
.SS Shortcuts:
|
||||
.IP \-\-mirror
|
||||
<URLs> *make a mirror of site(s) (default)
|
||||
<URLs> *make a mirror of site(s) (default)
|
||||
.IP \-\-get
|
||||
<URLs> get the files indicated, do not seek other URLs (\-qg)
|
||||
<URLs> get the files indicated, do not seek other URLs (\-qg)
|
||||
.IP \-\-list
|
||||
<text file> add all URL located in this text file (\-%L)
|
||||
<text file> add all URL located in this text file (\-%L)
|
||||
.IP \-\-mirrorlinks
|
||||
<URLs> mirror all links in 1st level pages (\-Y)
|
||||
.IP \-\-testlinks
|
||||
<URLs> test links in pages (\-r1p0C0I0t)
|
||||
<URLs> test links in pages (\-r1p0C0I0t)
|
||||
.IP \-\-spider
|
||||
<URLs> spider site(s), to test links: reports Errors & Warnings (\-p0C0I0t)
|
||||
<URLs> spider site(s), to test links: reports Errors & Warnings (\-p0C0I0t)
|
||||
.IP \-\-testsite
|
||||
<URLs> identical to \-\-spider
|
||||
<URLs> identical to \-\-spider
|
||||
.IP \-\-skeleton
|
||||
<URLs> make a mirror, but gets only html files (\-p1)
|
||||
<URLs> make a mirror, but gets only html files (\-p1)
|
||||
.IP \-\-update
|
||||
update a mirror, without confirmation (\-iC2)
|
||||
update a mirror, without confirmation (\-iC2)
|
||||
.IP \-\-continue
|
||||
continue a mirror, without confirmation (\-iC1)
|
||||
|
||||
continue a mirror, without confirmation (\-iC1)
|
||||
.IP \-\-catchurl
|
||||
create a temporary proxy to capture an URL or a form post URL
|
||||
create a temporary proxy to capture an URL or a form post URL
|
||||
.IP \-\-clean
|
||||
erase cache & log files
|
||||
|
||||
erase cache & log files
|
||||
.IP \-\-http10
|
||||
force http/1.0 requests (\-%h)
|
||||
|
||||
force http/1.0 requests (\-%h)
|
||||
.SS Details: Option %W: External callbacks prototypes
|
||||
.SS see htsdefines.h
|
||||
.SH FILES
|
||||
@@ -588,29 +490,28 @@ The system wide configuration file.
|
||||
.SH ENVIRONMENT
|
||||
.IP HOME
|
||||
Is being used if you defined in /etc/httrack.conf the line
|
||||
.I path ~/websites/#
|
||||
.I path ~/websites/#
|
||||
.SH DIAGNOSTICS
|
||||
Errors/Warnings are reported to
|
||||
Errors/Warnings are reported to
|
||||
.I hts\-log.txt
|
||||
by default, or to stderr if the
|
||||
.I -v
|
||||
.I \-v
|
||||
option was specified.
|
||||
.SH LIMITS
|
||||
These are the principals limits of HTTrack for that moment. Note that we did not heard about any other utility
|
||||
that would have solved them.
|
||||
|
||||
|
||||
.SM - Several scripts generating complex filenames may not find them (ex: img.src='image'+a+Mobj.dst+'.gif')
|
||||
|
||||
.SM - Some java classes may not find some files on them (class included)
|
||||
|
||||
.SM - Cgi-bin links may not work properly in some cases (parameters needed). To avoid them: use filters like -*cgi-bin*
|
||||
.SM
|
||||
\- Several scripts generating complex filenames may not find them (ex: img.src='image'+a+Mobj.dst+'.gif')
|
||||
.SM
|
||||
\- Some java classes may not find some files on them (class included)
|
||||
.SM
|
||||
\- Cgi\-bin links may not work properly in some cases (parameters needed). To avoid them: use filters like \-*cgi\-bin*
|
||||
.SH BUGS
|
||||
Please reports bugs to
|
||||
.B <bugs@httrack.com>.
|
||||
Include a complete, self-contained example that will allow the bug to be reproduced, and say which version of httrack you are using. Do not forget to detail options used, OS version, and any other information you deem necessary.
|
||||
.SH COPYRIGHT
|
||||
Copyright (C) 1998-2024 Xavier Roche and other contributors
|
||||
Copyright (C) 1998-2026 Xavier Roche and other contributors
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -631,8 +532,8 @@ The most recent released version of httrack can be found at:
|
||||
.SH AUTHOR
|
||||
Xavier Roche <roche@httrack.com>
|
||||
.SH "SEE ALSO"
|
||||
The
|
||||
.B HTML
|
||||
The
|
||||
.B HTML
|
||||
documentation (available online at
|
||||
.B http://www.httrack.com/html/
|
||||
) contains more detailed information. Please also refer to the
|
||||
|
||||
189
man/makeman.sh
Executable file
189
man/makeman.sh
Executable file
@@ -0,0 +1,189 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Regenerate man/httrack.1 from "httrack --help" and the top-level README.
|
||||
#
|
||||
# Usage:
|
||||
# man/makeman.sh [HTTRACK_BINARY] > man/httrack.1
|
||||
#
|
||||
# HTTRACK_BINARY defaults to "httrack" (looked up in $PATH). Set SOURCE_DATE_EPOCH
|
||||
# for a reproducible page date.
|
||||
#
|
||||
# The OPTIONS section is derived from --help by indentation, which is what makes
|
||||
# it robust (no more prose turning into bogus options, see Debian #1061053):
|
||||
# column 0 starting with "--" -> long option (.IP)
|
||||
# column 0 otherwise -> section header (.SS)
|
||||
# 1-2 leading spaces -> option (.IP)
|
||||
# 3+ leading spaces -> continuation / sub-value (description text)
|
||||
#
|
||||
# This replaces the previous out-of-tree script that grepped the first token of
|
||||
# every indented line and mislabelled continuations as options.
|
||||
|
||||
set -eu
|
||||
|
||||
httrack=${1:-httrack}
|
||||
script_dir=$(CDPATH='' cd -- "$(dirname -- "$0")" && pwd)
|
||||
topdir=${TOPDIR:-$(CDPATH='' cd -- "$script_dir/.." && pwd)}
|
||||
readme=${README:-$topdir/README}
|
||||
|
||||
# Reproducible date when SOURCE_DATE_EPOCH is set, otherwise today.
|
||||
if [ -n "${SOURCE_DATE_EPOCH:-}" ]; then
|
||||
date_str=$(LC_ALL=C date -u -d "@${SOURCE_DATE_EPOCH}" '+%d %B %Y' 2>/dev/null ||
|
||||
LC_ALL=C date -u -r "${SOURCE_DATE_EPOCH}" '+%d %B %Y')
|
||||
else
|
||||
date_str=$(LC_ALL=C date '+%d %B %Y')
|
||||
fi
|
||||
year=${date_str##* }
|
||||
|
||||
help=$("$httrack" --quiet --help 2>/dev/null)
|
||||
|
||||
st=$(printf '%s\n' "$help" | grep -n 'General options' | head -1 | cut -d: -f1)
|
||||
en=$(printf '%s\n' "$help" | grep -nE '^example' | head -1 | cut -d: -f1)
|
||||
en2=$(printf '%s\n' "$help" | grep -nE '^HTTrack version' | tail -1 | cut -d: -f1)
|
||||
|
||||
# SYNOPSIS: one "[ -x, --long ]" per option carrying a long name (skip "#" guru
|
||||
# options, as the original did).
|
||||
synopsis=$(printf '%s\n' "$help" | awk '
|
||||
$0 ~ /\(--/ && $0 !~ / #/ {
|
||||
short = $1
|
||||
if (match($0, /\(--[^ )]+/)) {
|
||||
lng = substr($0, RSTART + 3, RLENGTH - 3)
|
||||
gsub(/-/, "\\-", short); gsub(/-/, "\\-", lng)
|
||||
printf "[ \\fB\\-%s, \\-\\-%s\\fR ]\n", short, lng
|
||||
}
|
||||
}')
|
||||
|
||||
# OPTIONS: indentation-driven classifier (see header comment).
|
||||
options=$(printf '%s\n' "$help" | sed -n "${st},$((en - 2))p" | awk '
|
||||
function esc(s) {
|
||||
gsub(/\\/, "\\\\", s)
|
||||
gsub(/-/, "\\-", s)
|
||||
return s
|
||||
}
|
||||
function emit(s) { # body text: escape + guard ./%apostrophe leaders
|
||||
s = esc(s)
|
||||
if (substr(s, 1, 1) == "." || substr(s, 1, 1) == "\x27") s = "\\&" s
|
||||
print s
|
||||
}
|
||||
/^[ \t]*$/ { next }
|
||||
{
|
||||
match($0, /^ */); ind = RLENGTH
|
||||
if (ind == 0 && substr($0, 1, 2) == "--") { # long option
|
||||
opt = $1
|
||||
rest = $0; sub(/^[^ \t]+[ \t]+/, "", rest)
|
||||
printf ".IP %s\n", esc(opt)
|
||||
emit(rest)
|
||||
} else if (ind == 0) { # section header
|
||||
printf ".SS %s\n", esc($0)
|
||||
} else if (ind <= 2) { # option
|
||||
opt = $1
|
||||
gsub(/^\x27|\x27$/, "", opt) # drop quotes around tokens like %t
|
||||
rest = $0; sub(/^[ \t]+[^ \t]+[ \t]*/, "", rest)
|
||||
printf ".IP \\-%s\n", esc(opt)
|
||||
if (rest != "") emit(rest)
|
||||
} else { # continuation / sub-value
|
||||
line = $0; sub(/^[ \t]+/, "", line)
|
||||
print ".br"
|
||||
emit(line)
|
||||
}
|
||||
}')
|
||||
|
||||
# EXAMPLES: "example: <cmd>" / "means: <text>" pairs after the options block.
|
||||
examples=$(printf '%s\n' "$help" | sed -n "${en},$((en2 - 1))p" | awk '
|
||||
function esc(s) { gsub(/\\/, "\\\\", s); gsub(/-/, "\\-", s); return s }
|
||||
/^example:/ { sub(/^example:[ \t]*/, ""); printf ".TP\n.B %s\n", esc($0); next }
|
||||
/^means:/ { sub(/^means:[ \t]*/, ""); if ($0 != "") print esc($0); next }
|
||||
')
|
||||
|
||||
# LIMITS: the "Engine limits" block from the README.
|
||||
limits=$(awk '
|
||||
function esc(s) { gsub(/\\/, "\\\\", s); gsub(/-/, "\\-", s); return s }
|
||||
/^Engine limits/ { grab = 1; next }
|
||||
/^Advanced options/ { grab = 0 }
|
||||
grab {
|
||||
if ($0 ~ /^-/) { print ".SM"; print esc($0) }
|
||||
else if ($0 !~ /^[ \t]*$/) print esc($0)
|
||||
}' "$readme")
|
||||
|
||||
# --- assemble the page: static prose in quoted heredocs, dynamic parts printf'd ---
|
||||
cat <<'EOF'
|
||||
.\" Process this file with
|
||||
.\" groff -man -Tascii httrack.1
|
||||
.\"
|
||||
.\" This file is generated by man/makeman.sh; do not edit by hand.
|
||||
EOF
|
||||
printf '.TH httrack 1 "%s" "httrack website copier"\n' "$date_str"
|
||||
cat <<'EOF'
|
||||
.SH NAME
|
||||
httrack \- offline browser : copy websites to a local directory
|
||||
.SH SYNOPSIS
|
||||
.B httrack [ url ]... [ \-filter ]... [ +filter ]...
|
||||
EOF
|
||||
printf '%s\n' "$synopsis"
|
||||
cat <<'EOF'
|
||||
.SH DESCRIPTION
|
||||
.B httrack
|
||||
allows you to download a World Wide Web site from the Internet to a local directory, building recursively all directories, getting HTML, images, and other files from the server to your computer. HTTrack arranges the original site's relative link-structure. Simply open a page of the "mirrored" website in your browser, and you can browse the site from link to link, as if you were viewing it online. HTTrack can also update an existing mirrored site, and resume interrupted downloads.
|
||||
.SH EXAMPLES
|
||||
EOF
|
||||
printf '%s\n' "$examples"
|
||||
cat <<'EOF'
|
||||
.SH OPTIONS
|
||||
EOF
|
||||
printf '%s\n' "$options"
|
||||
cat <<'EOF'
|
||||
.SH FILES
|
||||
.I /etc/httrack.conf
|
||||
.RS
|
||||
The system wide configuration file.
|
||||
.SH ENVIRONMENT
|
||||
.IP HOME
|
||||
Is being used if you defined in /etc/httrack.conf the line
|
||||
.I path ~/websites/#
|
||||
.SH DIAGNOSTICS
|
||||
Errors/Warnings are reported to
|
||||
.I hts\-log.txt
|
||||
by default, or to stderr if the
|
||||
.I \-v
|
||||
option was specified.
|
||||
.SH LIMITS
|
||||
EOF
|
||||
printf '%s\n' "$limits"
|
||||
cat <<'EOF'
|
||||
.SH BUGS
|
||||
Please reports bugs to
|
||||
.B <bugs@httrack.com>.
|
||||
Include a complete, self-contained example that will allow the bug to be reproduced, and say which version of httrack you are using. Do not forget to detail options used, OS version, and any other information you deem necessary.
|
||||
.SH COPYRIGHT
|
||||
EOF
|
||||
printf 'Copyright (C) 1998-%s Xavier Roche and other contributors\n' "$year"
|
||||
cat <<'EOF'
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
.SH AVAILABILITY
|
||||
The most recent released version of httrack can be found at:
|
||||
.B http://www.httrack.com
|
||||
.SH AUTHOR
|
||||
Xavier Roche <roche@httrack.com>
|
||||
.SH "SEE ALSO"
|
||||
The
|
||||
.B HTML
|
||||
documentation (available online at
|
||||
.B http://www.httrack.com/html/
|
||||
) contains more detailed information. Please also refer to the
|
||||
.B httrack FAQ
|
||||
(available online at
|
||||
.B http://www.httrack.com/html/faq.html
|
||||
)
|
||||
EOF
|
||||
39
tests/02_manpage-regen.test
Executable file
39
tests/02_manpage-regen.test
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# The committed man/httrack.1 must match what man/makeman.sh produces from the
|
||||
# current "httrack --help" output. This catches a --help change that was not
|
||||
# followed by "make -C man regen-man".
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
gen="$top_srcdir/man/makeman.sh"
|
||||
committed="$top_srcdir/man/httrack.1"
|
||||
|
||||
# Need the generator and a runnable httrack.
|
||||
test -f "$gen" || {
|
||||
echo "makeman.sh not found; skipping" >&2
|
||||
exit 77
|
||||
}
|
||||
command -v httrack >/dev/null 2>&1 || {
|
||||
echo "httrack not in PATH; skipping" >&2
|
||||
exit 77
|
||||
}
|
||||
|
||||
tmp=$(mktemp) || exit 1
|
||||
trap 'rm -f "$tmp"' EXIT
|
||||
|
||||
README="$top_srcdir/README" bash "$gen" httrack >"$tmp" 2>/dev/null || {
|
||||
echo "makeman.sh failed" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Ignore the two intentionally date-dependent lines (page date, copyright year).
|
||||
strip_volatile() { grep -vE '^\.TH httrack |^Copyright \(C\) 1998-'; }
|
||||
|
||||
if diff <(strip_volatile <"$committed") <(strip_volatile <"$tmp") >/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "man/httrack.1 is out of date. Regenerate with: make -C man regen-man" >&2
|
||||
diff <(strip_volatile <"$committed") <(strip_volatile <"$tmp") | head -40 >&2
|
||||
exit 1
|
||||
@@ -6,8 +6,9 @@ TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
|
||||
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
|
||||
TESTS_ENVIRONMENT += ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS)
|
||||
TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
|
||||
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
|
||||
|
||||
TEST_EXTENSIONS = .test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
@@ -470,9 +470,9 @@ EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh
|
||||
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
|
||||
TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
|
||||
ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS) \
|
||||
HTTPS_SUPPORT=$(HTTPS_SUPPORT)
|
||||
HTTPS_SUPPORT=$(HTTPS_SUPPORT) top_srcdir=$(top_srcdir)
|
||||
TEST_EXTENSIONS = .test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
CLEANFILES = check-network_sh.cache
|
||||
all: all-am
|
||||
|
||||
|
||||
195
tools/mkdeb.sh
Executable file
195
tools/mkdeb.sh
Executable file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Build the httrack Debian packages from a clean, committed source export.
|
||||
#
|
||||
# It exports HEAD (plus the coucal submodule) into a scratch directory, refreshes
|
||||
# the build system and man page, builds the upstream tarball, overlays debian/,
|
||||
# and runs debuild (which builds, runs lintian, and signs). Nothing is built in
|
||||
# the working tree, and no hard-coded paths are used.
|
||||
#
|
||||
# Output (in --outdir, default <repo>/dist):
|
||||
# httrack_<ver>.orig.tar.gz upstream tarball (Debian orig name)
|
||||
# httrack_<ver>-*.dsc / .debian.tar.* source package
|
||||
# *.deb binary packages
|
||||
# *.changes / *.buildinfo build metadata
|
||||
# httrack_<ver>.orig.tar.gz.{asc,md5,sha1} release artifacts (unless disabled)
|
||||
#
|
||||
# Usage:
|
||||
# tools/mkdeb.sh [options]
|
||||
#
|
||||
# Options:
|
||||
# -k, --key KEYID GPG key for signing (default: $DEBSIGN_KEYID)
|
||||
# -o, --outdir DIR output directory (default: <repo>/dist)
|
||||
# -s, --source-only build only the source package
|
||||
# -u, --unsigned do not sign anything (implies no release sigs)
|
||||
# --no-release-artifacts skip the orig tarball .asc/.md5/.sha1
|
||||
# -h, --help show this help
|
||||
#
|
||||
# SOURCE_DATE_EPOCH is honored for reproducible output.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
readonly PROGNAME=${0##*/}
|
||||
|
||||
# Scratch dir, global so the EXIT trap can see it.
|
||||
scratch=""
|
||||
|
||||
die() {
|
||||
printf '%s: error: %s\n' "$PROGNAME" "$*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
info() {
|
||||
printf '==> %s\n' "$*" >&2
|
||||
}
|
||||
|
||||
usage() {
|
||||
sed -n '2,/^set -euo/{/^set -euo/!p}' "$0" | sed 's/^# \{0,1\}//'
|
||||
}
|
||||
|
||||
need() {
|
||||
local tool
|
||||
for tool in "$@"; do
|
||||
command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool"
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
local key=${DEBSIGN_KEYID:-}
|
||||
local outdir=""
|
||||
local source_only=0
|
||||
local unsigned=0
|
||||
local release_artifacts=1
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-k | --key)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
key=$2
|
||||
shift 2
|
||||
;;
|
||||
-o | --outdir)
|
||||
[[ $# -ge 2 ]] || die "missing argument for $1"
|
||||
outdir=$2
|
||||
shift 2
|
||||
;;
|
||||
-s | --source-only)
|
||||
source_only=1
|
||||
shift
|
||||
;;
|
||||
-u | --unsigned)
|
||||
unsigned=1
|
||||
shift
|
||||
;;
|
||||
--no-release-artifacts)
|
||||
release_artifacts=0
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "unknown option: $1 (try --help)"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
need git autoreconf debuild dcmd
|
||||
if [[ $unsigned -eq 0 ]]; then
|
||||
need gpg
|
||||
[[ -n $key ]] || die "no signing key (pass --key or set DEBSIGN_KEYID, or use --unsigned)"
|
||||
fi
|
||||
|
||||
local repo
|
||||
repo=$(git rev-parse --show-toplevel) || die "not inside a git repository"
|
||||
: "${outdir:=$repo/dist}"
|
||||
mkdir -p "$outdir"
|
||||
outdir=$(cd "$outdir" && pwd)
|
||||
|
||||
scratch=$(mktemp -d "${TMPDIR:-/tmp}/httrack-mkdeb.XXXXXX")
|
||||
trap 'rm -rf -- "$scratch"' EXIT
|
||||
|
||||
# Pristine export of committed HEAD plus the coucal submodule.
|
||||
info "exporting committed sources"
|
||||
local export_dir=$scratch/src
|
||||
mkdir -p "$export_dir"
|
||||
git -C "$repo" archive --format=tar HEAD | tar -x -C "$export_dir"
|
||||
git -C "$repo/src/coucal" archive --format=tar --prefix=src/coucal/ HEAD |
|
||||
tar -x -C "$export_dir"
|
||||
|
||||
# Refresh build system and man page, then build and validate the tarball.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
autoreconf -fi
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
info "running test suite"
|
||||
make -s check
|
||||
# Build the tarball from a clean tree so no object files leak into it.
|
||||
make -s clean
|
||||
make -s dist
|
||||
)
|
||||
|
||||
local tarball ver
|
||||
local -a tarballs
|
||||
shopt -s nullglob
|
||||
tarballs=("$export_dir"/httrack-*.tar.gz)
|
||||
shopt -u nullglob
|
||||
[[ ${#tarballs[@]} -ge 1 ]] || die "make dist produced no tarball"
|
||||
tarball=${tarballs[0]##*/}
|
||||
ver=${tarball#httrack-}
|
||||
ver=${ver%.tar.gz}
|
||||
info "version $ver"
|
||||
|
||||
# 3.0 (quilt): orig tarball is upstream-only; debian/ is overlaid on top.
|
||||
local orig=httrack_${ver}.orig.tar.gz
|
||||
cp -- "$export_dir/$tarball" "$scratch/$orig"
|
||||
(
|
||||
cd "$scratch"
|
||||
tar -xf "$orig"
|
||||
cp -a "$export_dir/debian" "httrack-$ver/debian"
|
||||
)
|
||||
|
||||
# Build (debuild also runs lintian and signs).
|
||||
local -a debuild_opts=(--lintian-opts -I -i)
|
||||
local -a build_opts=()
|
||||
[[ $source_only -eq 1 ]] && build_opts+=(-S)
|
||||
if [[ $unsigned -eq 1 ]]; then
|
||||
build_opts+=(-us -uc)
|
||||
else
|
||||
build_opts+=("-k$key")
|
||||
fi
|
||||
info "building packages with debuild"
|
||||
(
|
||||
cd "$scratch/httrack-$ver"
|
||||
debuild "${build_opts[@]}" "${debuild_opts[@]}"
|
||||
)
|
||||
|
||||
# Collect every file the .changes references (orig, dsc, debs, ddebs, buildinfo).
|
||||
info "collecting artifacts into $outdir"
|
||||
local -a changes
|
||||
shopt -s nullglob
|
||||
changes=("$scratch"/*.changes)
|
||||
shopt -u nullglob
|
||||
[[ ${#changes[@]} -ge 1 ]] || die "debuild produced no .changes file"
|
||||
dcmd cp -- "${changes[@]}" "$outdir/"
|
||||
|
||||
# Release artifacts for the upstream tarball (detached sig + checksums).
|
||||
if [[ $release_artifacts -eq 1 && $unsigned -eq 0 ]]; then
|
||||
info "signing upstream tarball"
|
||||
(
|
||||
cd "$outdir"
|
||||
gpg --armor --detach-sign --yes -u "$key" -- "$orig"
|
||||
md5sum -- "$orig" >"$orig.md5"
|
||||
sha1sum -- "$orig" >"$orig.sha1"
|
||||
)
|
||||
fi
|
||||
|
||||
info "done. artifacts in $outdir:"
|
||||
ls -1 "$outdir" >&2
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user