16 Commits

Author SHA1 Message Date
Xavier Roche
cad9ccb7aa created tag 3.48.8 2014-05-15 17:25:58 +00:00
Xavier Roche
3059bd6f53 3.48.8-1 2014-05-15 17:22:16 +00:00
Xavier Roche
e5b3c85156 3.48.8 2014-05-15 17:08:29 +00:00
Xavier Roche
843faaf83d WIN32: zlib 1.2.8 2014-05-15 17:06:59 +00:00
Xavier Roche
75969b1147 Cosmetic 2014-05-14 20:27:55 +00:00
Xavier Roche
6ad604624d Fixed macro. 2014-05-14 20:20:01 +00:00
Xavier Roche
c05f54ae04 Clean hts_set_error_callback() and hts_get_error_callback() 2014-05-14 20:12:42 +00:00
Xavier Roche
ae1db762e7 Missing htsCallbackErr definition 2014-05-14 19:51:33 +00:00
Xavier Roche
16aec722bf 3.48.7 2014-05-14 17:57:18 +00:00
Xavier Roche
4ff55249ed 3.48.7 2014-05-14 17:51:09 +00:00
Xavier Roche
fea8122ed3 Fixed hashtable corruption because of dirty code directly modifying the host address in memory, leading to have hashtable positions not anymore valid.
This issue was especially triggered when a redirect was processed ("Warning moved treated for .." messages)
  * closes: #43
2014-05-14 17:48:04 +00:00
Xavier Roche
7323230eb3 Added debug logging facility. 2014-05-14 17:45:51 +00:00
Xavier Roche
c9f656fdeb Better stdarg.h 2014-05-13 20:12:10 +00:00
Xavier Roche
042525a1db #include <stdarg.h> 2014-05-13 20:11:41 +00:00
Xavier Roche
3fde59c090 #include <stdarg.h> 2014-05-13 20:11:02 +00:00
Xavier Roche
edaaa73328 #include <stdarg.h> 2014-05-13 20:08:53 +00:00
18 changed files with 548 additions and 340 deletions

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for httrack 3.48.5.
# Generated by GNU Autoconf 2.69 for httrack 3.48.8.
#
# Report bugs to <roche+packaging@httrack.com>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='httrack'
PACKAGE_TARNAME='httrack'
PACKAGE_VERSION='3.48.5'
PACKAGE_STRING='httrack 3.48.5'
PACKAGE_VERSION='3.48.8'
PACKAGE_STRING='httrack 3.48.8'
PACKAGE_BUGREPORT='roche+packaging@httrack.com'
PACKAGE_URL='http://www.httrack.com/'
@@ -1337,7 +1337,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures httrack 3.48.5 to adapt to many kinds of systems.
\`configure' configures httrack 3.48.8 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1407,7 +1407,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of httrack 3.48.5:";;
short | recursive ) echo "Configuration of httrack 3.48.8:";;
esac
cat <<\_ACEOF
@@ -1521,7 +1521,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
httrack configure 3.48.5
httrack configure 3.48.8
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2248,7 +2248,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by httrack $as_me 3.48.5, which was
It was created by httrack $as_me 3.48.8, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -3067,7 +3067,7 @@ fi
# Define the identity of the package.
PACKAGE='httrack'
VERSION='3.48.5'
VERSION='3.48.8'
cat >>confdefs.h <<_ACEOF
@@ -16892,7 +16892,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by httrack $as_me 3.48.5, which was
This file was extended by httrack $as_me 3.48.8, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -16959,7 +16959,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
httrack config.status 3.48.5
httrack config.status 3.48.8
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([httrack], [3.48.5], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
AC_INIT([httrack], [3.48.8], [roche+packaging@httrack.com], [httrack], [http://www.httrack.com/])
AC_CONFIG_SRCDIR(src/httrack.c)
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADERS(config.h)

18
debian/changelog vendored
View File

@@ -1,3 +1,21 @@
httrack (3.48.8-1) unstable; urgency=low
* Updated to 3.48.8 (3.48-8)
-- Xavier Roche <xavier@debian.org> Thu, 15 May 2014 19:21:21 +0200
httrack (3.48.7-1) unstable; urgency=low
* Updated to 3.48.7 (3.48-7)
-- Xavier Roche <xavier@debian.org> Wed, 14 May 2014 19:51:57 +0200
httrack (3.48.6-1) unstable; urgency=low
* Updated to 3.48.6 (3.48-6)
-- Xavier Roche <xavier@debian.org> Tue, 13 May 2014 21:23:02 +0200
httrack (3.48.5-1) unstable; urgency=low
* Updated to 3.48.5 (3.48-5)

View File

@@ -4,7 +4,11 @@ HTTrack Website Copier release history:
This file lists all changes and fixes that have been made for HTTrack.
3.48-3
3.48-8
+ Fixed: new zlib version fixing CVE-2004-0797 and CVE-2005-2096
+ Fixed: more reliable crash reporting
+ Fixed: fixed infamous "hashtable internal error: cuckoo/stash collision" errors
+ Fixed: safety cleanup in many strings operations
+ Fixed: buggy option pannels
+ New: Enforce check against CVE-2014-0160
+ New: improved hashtables to speedup large mirrors

View File

@@ -51,98 +51,99 @@ offline browser : copy websites to a local directory</p>
<p style="margin-left:11%; margin-top: 1em"><b>httrack [
url ]... [ &minus;filter ]... [ +filter ]... [ &minus;O
&minus;&minus;path</b> ] [ <b>&minus;w
&minus;&minus;mirror</b> ] [ <b>&minus;W
&minus;&minus;mirror&minus;wizard</b> ] [ <b>&minus;g
&minus;&minus;get&minus;files</b> ] [ <b>&minus;i
&minus;&minus;continue</b> ] [ <b>&minus;Y
&minus;&minus;mirrorlinks</b> ] [ <b>&minus;P
&minus;&minus;proxy</b> ] [ <b>&minus;%f
&minus;&minus;httpproxy&minus;ftp[=N]</b> ] [ <b>&minus;%b
&minus;&minus;bind</b> ] [ <b>&minus;rN
&minus;&minus;depth[=N]</b> ] [ <b>&minus;%eN
&minus;&minus;ext&minus;depth[=N]</b> ] [ <b>&minus;mN
&minus;&minus;max&minus;files[=N]</b> ] [ <b>&minus;MN
&minus;&minus;max&minus;size[=N]</b> ] [ <b>&minus;EN
&minus;&minus;max&minus;time[=N]</b> ] [ <b>&minus;AN
&minus;&minus;max&minus;rate[=N]</b> ] [ <b>&minus;%cN
url ]... [ &minus;filter ]... [ +filter ]... [ &minus;O,
&minus;&minus;path</b> ] [ <b>&minus;w,
&minus;&minus;mirror</b> ] [ <b>&minus;W,
&minus;&minus;mirror&minus;wizard</b> ] [ <b>&minus;g,
&minus;&minus;get&minus;files</b> ] [ <b>&minus;i,
&minus;&minus;continue</b> ] [ <b>&minus;Y,
&minus;&minus;mirrorlinks</b> ] [ <b>&minus;P,
&minus;&minus;proxy</b> ] [ <b>&minus;%f,
&minus;&minus;httpproxy&minus;ftp[=N]</b> ] [ <b>&minus;%b,
&minus;&minus;bind</b> ] [ <b>&minus;rN,
&minus;&minus;depth[=N]</b> ] [ <b>&minus;%eN,
&minus;&minus;ext&minus;depth[=N]</b> ] [ <b>&minus;mN,
&minus;&minus;max&minus;files[=N]</b> ] [ <b>&minus;MN,
&minus;&minus;max&minus;size[=N]</b> ] [ <b>&minus;EN,
&minus;&minus;max&minus;time[=N]</b> ] [ <b>&minus;AN,
&minus;&minus;max&minus;rate[=N]</b> ] [ <b>&minus;%cN,
&minus;&minus;connection&minus;per&minus;second[=N]</b> ] [
<b>&minus;GN &minus;&minus;max&minus;pause[=N]</b> ] [
<b>&minus;cN &minus;&minus;sockets[=N]</b> ] [ <b>&minus;TN
&minus;&minus;timeout[=N]</b> ] [ <b>&minus;RN
&minus;&minus;retries[=N]</b> ] [ <b>&minus;JN
&minus;&minus;min&minus;rate[=N]</b> ] [ <b>&minus;HN
&minus;&minus;host&minus;control[=N]</b> ] [ <b>&minus;%P
&minus;&minus;extended&minus;parsing[=N]</b> ] [ <b>&minus;n
&minus;&minus;near</b> ] [ <b>&minus;t
&minus;&minus;test</b> ] [ <b>&minus;%L
&minus;&minus;list</b> ] [ <b>&minus;%S
&minus;&minus;urllist</b> ] [ <b>&minus;NN
&minus;&minus;structure[=N]</b> ] [ <b>&minus;%D
<b>&minus;GN, &minus;&minus;max&minus;pause[=N]</b> ] [
<b>&minus;cN, &minus;&minus;sockets[=N]</b> ] [
<b>&minus;TN, &minus;&minus;timeout[=N]</b> ] [
<b>&minus;RN, &minus;&minus;retries[=N]</b> ] [
<b>&minus;JN, &minus;&minus;min&minus;rate[=N]</b> ] [
<b>&minus;HN, &minus;&minus;host&minus;control[=N]</b> ] [
<b>&minus;%P, &minus;&minus;extended&minus;parsing[=N]</b> ]
[ <b>&minus;n, &minus;&minus;near</b> ] [ <b>&minus;t,
&minus;&minus;test</b> ] [ <b>&minus;%L,
&minus;&minus;list</b> ] [ <b>&minus;%S,
&minus;&minus;urllist</b> ] [ <b>&minus;NN,
&minus;&minus;structure[=N]</b> ] [ <b>&minus;%D,
&minus;&minus;cached&minus;delayed&minus;type&minus;check</b>
] [ <b>&minus;%M &minus;&minus;mime&minus;html</b> ] [
<b>&minus;LN &minus;&minus;long&minus;names[=N]</b> ] [
<b>&minus;KN &minus;&minus;keep&minus;links[=N]</b> ] [
<b>&minus;x &minus;&minus;replace&minus;external</b> ] [
<b>&minus;%x &minus;&minus;disable&minus;passwords</b> ] [
<b>&minus;%q
] [ <b>&minus;%M, &minus;&minus;mime&minus;html</b> ] [
<b>&minus;LN, &minus;&minus;long&minus;names[=N]</b> ] [
<b>&minus;KN, &minus;&minus;keep&minus;links[=N]</b> ] [
<b>&minus;x, &minus;&minus;replace&minus;external</b> ] [
<b>&minus;%x, &minus;&minus;disable&minus;passwords</b> ] [
<b>&minus;%q,
&minus;&minus;include&minus;query&minus;string</b> ] [
<b>&minus;o &minus;&minus;generate&minus;errors</b> ] [
<b>&minus;X &minus;&minus;purge&minus;old[=N]</b> ] [
<b>&minus;%p &minus;&minus;preserve</b> ] [ <b>&minus;%T
&minus;&minus;utf8&minus;conversion</b> ] [ <b>&minus;bN
&minus;&minus;cookies[=N]</b> ] [ <b>&minus;u
&minus;&minus;check&minus;type[=N]</b> ] [ <b>&minus;j
&minus;&minus;parse&minus;java[=N]</b> ] [ <b>&minus;sN
&minus;&minus;robots[=N]</b> ] [ <b>&minus;%h
&minus;&minus;http&minus;10</b> ] [ <b>&minus;%k
&minus;&minus;keep&minus;alive</b> ] [ <b>&minus;%B
&minus;&minus;tolerant</b> ] [ <b>&minus;%s
&minus;&minus;updatehack</b> ] [ <b>&minus;%u
&minus;&minus;urlhack</b> ] [ <b>&minus;%A
&minus;&minus;assume</b> ] [ <b>&minus;@iN
&minus;&minus;protocol[=N]</b> ] [ <b>&minus;%w
&minus;&minus;disable&minus;module</b> ] [ <b>&minus;F
&minus;&minus;user&minus;agent</b> ] [ <b>&minus;%R
&minus;&minus;referer</b> ] [ <b>&minus;%E
&minus;&minus;from</b> ] [ <b>&minus;%F
&minus;&minus;footer</b> ] [ <b>&minus;%l
&minus;&minus;language</b> ] [ <b>&minus;%a
&minus;&minus;accept</b> ] [ <b>&minus;%X
&minus;&minus;headers</b> ] [ <b>&minus;C
&minus;&minus;cache[=N]</b> ] [ <b>&minus;k
<b>&minus;o, &minus;&minus;generate&minus;errors</b> ] [
<b>&minus;X, &minus;&minus;purge&minus;old[=N]</b> ] [
<b>&minus;%p, &minus;&minus;preserve</b> ] [ <b>&minus;%T,
&minus;&minus;utf8&minus;conversion</b> ] [ <b>&minus;bN,
&minus;&minus;cookies[=N]</b> ] [ <b>&minus;u,
&minus;&minus;check&minus;type[=N]</b> ] [ <b>&minus;j,
&minus;&minus;parse&minus;java[=N]</b> ] [ <b>&minus;sN,
&minus;&minus;robots[=N]</b> ] [ <b>&minus;%h,
&minus;&minus;http&minus;10</b> ] [ <b>&minus;%k,
&minus;&minus;keep&minus;alive</b> ] [ <b>&minus;%B,
&minus;&minus;tolerant</b> ] [ <b>&minus;%s,
&minus;&minus;updatehack</b> ] [ <b>&minus;%u,
&minus;&minus;urlhack</b> ] [ <b>&minus;%A,
&minus;&minus;assume</b> ] [ <b>&minus;@iN,
&minus;&minus;protocol[=N]</b> ] [ <b>&minus;%w,
&minus;&minus;disable&minus;module</b> ] [ <b>&minus;F,
&minus;&minus;user&minus;agent</b> ] [ <b>&minus;%R,
&minus;&minus;referer</b> ] [ <b>&minus;%E,
&minus;&minus;from</b> ] [ <b>&minus;%F,
&minus;&minus;footer</b> ] [ <b>&minus;%l,
&minus;&minus;language</b> ] [ <b>&minus;%a,
&minus;&minus;accept</b> ] [ <b>&minus;%X,
&minus;&minus;headers</b> ] [ <b>&minus;C,
&minus;&minus;cache[=N]</b> ] [ <b>&minus;k,
&minus;&minus;store&minus;all&minus;in&minus;cache</b> ] [
<b>&minus;%n &minus;&minus;do&minus;not&minus;recatch</b> ]
[ <b>&minus;%v &minus;&minus;display</b> ] [ <b>&minus;Q
&minus;&minus;do&minus;not&minus;log</b> ] [ <b>&minus;q
&minus;&minus;quiet</b> ] [ <b>&minus;z
&minus;&minus;extra&minus;log</b> ] [ <b>&minus;Z
&minus;&minus;debug&minus;log</b> ] [ <b>&minus;v
&minus;&minus;verbose</b> ] [ <b>&minus;f
&minus;&minus;file&minus;log</b> ] [ <b>&minus;f2
&minus;&minus;single&minus;log</b> ] [ <b>&minus;I
&minus;&minus;index</b> ] [ <b>&minus;%i
<b>&minus;%n, &minus;&minus;do&minus;not&minus;recatch</b> ]
[ <b>&minus;%v, &minus;&minus;display</b> ] [ <b>&minus;Q,
&minus;&minus;do&minus;not&minus;log</b> ] [ <b>&minus;q,
&minus;&minus;quiet</b> ] [ <b>&minus;z,
&minus;&minus;extra&minus;log</b> ] [ <b>&minus;Z,
&minus;&minus;debug&minus;log</b> ] [ <b>&minus;v,
&minus;&minus;verbose</b> ] [ <b>&minus;f,
&minus;&minus;file&minus;log</b> ] [ <b>&minus;f2,
&minus;&minus;single&minus;log</b> ] [ <b>&minus;I,
&minus;&minus;index</b> ] [ <b>&minus;%i,
&minus;&minus;build&minus;top&minus;index</b> ] [
<b>&minus;%I &minus;&minus;search&minus;index</b> ] [
<b>&minus;pN &minus;&minus;priority[=N]</b> ] [ <b>&minus;S
<b>&minus;%I, &minus;&minus;search&minus;index</b> ] [
<b>&minus;pN, &minus;&minus;priority[=N]</b> ] [
<b>&minus;S,
&minus;&minus;stay&minus;on&minus;same&minus;dir</b> ] [
<b>&minus;D &minus;&minus;can&minus;go&minus;down</b> ] [
<b>&minus;U &minus;&minus;can&minus;go&minus;up</b> ] [
<b>&minus;B
<b>&minus;D, &minus;&minus;can&minus;go&minus;down</b> ] [
<b>&minus;U, &minus;&minus;can&minus;go&minus;up</b> ] [
<b>&minus;B,
&minus;&minus;can&minus;go&minus;up&minus;and&minus;down</b>
] [ <b>&minus;a
] [ <b>&minus;a,
&minus;&minus;stay&minus;on&minus;same&minus;address</b> ] [
<b>&minus;d
<b>&minus;d,
&minus;&minus;stay&minus;on&minus;same&minus;domain</b> ] [
<b>&minus;l
<b>&minus;l,
&minus;&minus;stay&minus;on&minus;same&minus;tld</b> ] [
<b>&minus;e &minus;&minus;go&minus;everywhere</b> ] [
<b>&minus;%H &minus;&minus;debug&minus;headers</b> ] [
<b>&minus;%!
<b>&minus;e, &minus;&minus;go&minus;everywhere</b> ] [
<b>&minus;%H, &minus;&minus;debug&minus;headers</b> ] [
<b>&minus;%!,
&minus;&minus;disable&minus;security&minus;limits</b> ] [
<b>&minus;V &minus;&minus;userdef&minus;cmd</b> ] [
<b>&minus;%W &minus;&minus;callback</b> ] [ <b>&minus;K
<b>&minus;V, &minus;&minus;userdef&minus;cmd</b> ] [
<b>&minus;%W, &minus;&minus;callback</b> ] [ <b>&minus;K,
&minus;&minus;keep&minus;links[=N]</b> ] [</p>
<h2>DESCRIPTION
@@ -184,7 +185,7 @@ sites</p>
www.someweb.com/bob/bobby.html +* &minus;r6</b></p>
<p style="margin-left:22%;">means get all files starting
from bobby.html with 6 link&minus;depth and possibility of
from bobby.html, with 6 link&minus;depth, and possibility of
going everywhere on the web</p>
<p style="margin-left:11%;"><b>httrack
@@ -233,7 +234,7 @@ options:</b></p>
<p>path for mirror/logfiles+cache (&minus;O path
mirror[path cache and logfiles]) (&minus;&minus;path
mirror[,path cache and logfiles]) (&minus;&minus;path
&lt;param&gt;)</p> </td></tr>
</table>
@@ -264,7 +265,7 @@ options:</b></p>
<td width="78%">
<p>mirror web sites semi&minus;automatic (asks questions)
<p>mirror web sites, semi&minus;automatic (asks questions)
(&minus;&minus;mirror&minus;wizard)</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -355,11 +356,11 @@ options:</b></p>
cellspacing="0" cellpadding="0">
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p style="margin-top: 1em">&minus;rN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
@@ -367,11 +368,11 @@ options:</b></p>
r9999) (&minus;&minus;depth[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;%eN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
@@ -379,11 +380,11 @@ r9999) (&minus;&minus;depth[=N])</p></td></tr>
(&minus;&minus;ext&minus;depth[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;mN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
@@ -391,22 +392,22 @@ r9999) (&minus;&minus;depth[=N])</p></td></tr>
(&minus;&minus;max&minus;files[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;mNN2</p></td>
<td width="4%"></td>
<p>&minus;mN,N2</p></td>
<td width="2%"></td>
<td width="78%">
<p>maximum file length for non html (N) and html (N2)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;MN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
@@ -414,23 +415,23 @@ r9999) (&minus;&minus;depth[=N])</p></td></tr>
(&minus;&minus;max&minus;size[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;EN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
<p>maximum mirror time in seconds (60=1 minute 3600=1 hour)
(&minus;&minus;max&minus;time[=N])</p> </td></tr>
<p>maximum mirror time in seconds (60=1 minute, 3600=1
hour) (&minus;&minus;max&minus;time[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;AN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
@@ -438,11 +439,11 @@ r9999) (&minus;&minus;depth[=N])</p></td></tr>
(&minus;&minus;max&minus;rate[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;%cN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
@@ -450,15 +451,15 @@ r9999) (&minus;&minus;depth[=N])</p></td></tr>
(&minus;&minus;connection&minus;per&minus;second[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="7%">
<td width="9%">
<p>&minus;GN</p></td>
<td width="4%"></td>
<td width="2%"></td>
<td width="78%">
<p>pause transfer if N bytes reached and wait until lock
<p>pause transfer if N bytes reached, and wait until lock
file is deleted (&minus;&minus;max&minus;pause[=N])</p></td></tr>
</table>
@@ -489,7 +490,7 @@ control:</b></p>
<td width="78%">
<p>timeout number of seconds after a non&minus;responding
<p>timeout, number of seconds after a non&minus;responding
link is shutdown (&minus;&minus;timeout[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -501,7 +502,7 @@ link is shutdown (&minus;&minus;timeout[=N])</p></td></tr>
<td width="78%">
<p>number of retries in case of timeout or non&minus;fatal
<p>number of retries, in case of timeout or non&minus;fatal
errors (*R1) (&minus;&minus;retries[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -513,7 +514,7 @@ errors (*R1) (&minus;&minus;retries[=N])</p></td></tr>
<td width="78%">
<p>traffic jam control minimum transfert rate
<p>traffic jam control, minimum transfert rate
(bytes/seconds) tolerated for a link
(&minus;&minus;min&minus;rate[=N])</p> </td></tr>
<tr valign="top" align="left">
@@ -526,7 +527,7 @@ errors (*R1) (&minus;&minus;retries[=N])</p></td></tr>
<td width="78%">
<p>host is abandonned if: 0=never 1=timeout 2=slow
<p>host is abandonned if: 0=never, 1=timeout, 2=slow,
3=timeout or slow (&minus;&minus;host&minus;control[=N])</p></td></tr>
</table>
@@ -545,8 +546,8 @@ options:</b></p>
<td width="78%">
<p style="margin-top: 1em">*extended parsing attempt to
parse all links even in unknown tags or Javascript (%P0 don
<p style="margin-top: 1em">*extended parsing, attempt to
parse all links, even in unknown tags or Javascript (%P0 don
t use) (&minus;&minus;extended&minus;parsing[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -615,7 +616,7 @@ options:</b></p>
<p style="margin-top: 1em">structure type (0 *original
structure 1+: see below) (&minus;&minus;structure[=N])</p></td></tr>
structure, 1+: see below) (&minus;&minus;structure[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="4%">
@@ -638,9 +639,9 @@ structure 1+: see below) (&minus;&minus;structure[=N])</p></td></tr>
<td width="78%">
<p>delayed type check don t make any link test but wait for
files download to start instead (experimental) (%N0 don t
use %N1 use for unknown extensions * %N2 always use)</p></td></tr>
<p>delayed type check, don t make any link test but wait
for files download to start instead (experimental) (%N0 don
t use, %N1 use for unknown extensions, * %N2 always use)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="4%">
@@ -651,8 +652,8 @@ use %N1 use for unknown extensions * %N2 always use)</p></td></tr>
<td width="78%">
<p>cached delayed type check don t wait for remote type
during updates to speedup them (%D0 wait * %D1 don t wait)
<p>cached delayed type check, don t wait for remote type
during updates, to speedup them (%D0 wait, * %D1 don t wait)
(&minus;&minus;cached&minus;delayed&minus;type&minus;check)</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -691,8 +692,8 @@ L2 ISO9660 compatible)
<p>keep original links (e.g. http://www.adr/link) (K0
*relative link K absolute links K4 original links K3
absolute URI links K5 transparent proxy link)
*relative link, K absolute links, K4 original links, K3
absolute URI links, K5 transparent proxy link)
(&minus;&minus;keep&minus;links[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -729,7 +730,7 @@ protected websites (%x0 include)
<td width="78%">
<p>*include query string for local files (useless for
<p>*include query string for local files (useless, for
information purpose only) (%q0 don t include)
(&minus;&minus;include&minus;query&minus;string)</p> </td></tr>
<tr valign="top" align="left">
@@ -798,7 +799,7 @@ options:</b></p>
<p style="margin-top: 1em">accept cookies in cookies.txt
(0=do not accept* 1=accept) (&minus;&minus;cookies[=N])</p></td></tr>
(0=do not accept,* 1=accept) (&minus;&minus;cookies[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -809,8 +810,8 @@ options:</b></p>
<td width="78%">
<p>check document type if unknown (cgiasp..) (u0 don t
check * u1 check but / u2 check always)
<p>check document type if unknown (cgi,asp..) (u0 don t
check, * u1 check but /, u2 check always)
(&minus;&minus;check&minus;type[=N])</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -822,9 +823,9 @@ check * u1 check but / u2 check always)
<td width="78%">
<p>*parse Java Classes (j0 don t parse bitmask: |1 parse
default |2 don t parse .class |4 don t parse .js |8 don t be
aggressive) (&minus;&minus;parse&minus;java[=N])</p></td></tr>
<p>*parse Java Classes (j0 don t parse, bitmask: |1 parse
default, |2 don t parse .class |4 don t parse .js |8 don t
be aggressive) (&minus;&minus;parse&minus;java[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -836,8 +837,8 @@ aggressive) (&minus;&minus;parse&minus;java[=N])</p></td></tr>
<p>follow robots.txt and meta robots tags
(0=never1=sometimes* 2=always 3=always (even strict rules))
(&minus;&minus;robots[=N])</p> </td></tr>
(0=never,1=sometimes,* 2=always, 3=always (even strict
rules)) (&minus;&minus;robots[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -848,8 +849,9 @@ aggressive) (&minus;&minus;parse&minus;java[=N])</p></td></tr>
<td width="78%">
<p>force HTTP/1.0 requests (reduce update features only for
old servers or proxies) (&minus;&minus;http&minus;10)</p></td></tr>
<p>force HTTP/1.0 requests (reduce update features, only
for old servers or proxies)
(&minus;&minus;http&minus;10)</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -860,7 +862,7 @@ old servers or proxies) (&minus;&minus;http&minus;10)</p></td></tr>
<td width="78%">
<p>use keep&minus;alive if possible greately reducing
<p>use keep&minus;alive if possible, greately reducing
latency for small files and test requests (%k0 don t use)
(&minus;&minus;keep&minus;alive)</p> </td></tr>
<tr valign="top" align="left">
@@ -874,7 +876,7 @@ latency for small files and test requests (%k0 don t use)
<p>tolerant requests (accept bogus responses on some
servers but not standard!) (&minus;&minus;tolerant)</p></td></tr>
servers, but not standard!) (&minus;&minus;tolerant)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -886,7 +888,7 @@ servers but not standard!) (&minus;&minus;tolerant)</p></td></tr>
<p>update hacks: various hacks to limit re&minus;transfers
when updating (identical size bogus response..)
when updating (identical size, bogus response..)
(&minus;&minus;updatehack)</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -899,7 +901,7 @@ when updating (identical size bogus response..)
<p>url hacks: various hacks to limit duplicate URLs (strip
// www.foo.com==foo.com..) (&minus;&minus;urlhack)</p></td></tr>
//, www.foo.com==foo.com..) (&minus;&minus;urlhack)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -910,9 +912,9 @@ when updating (identical size bogus response..)
<td width="78%">
<p>assume that a type (cgiasp..) is always linked with a
<p>assume that a type (cgi,asp..) is always linked with a
mime type (&minus;%A
php3cgi=text/html;datbin=application/x&minus;zip)
php3,cgi=text/html;dat,bin=application/x&minus;zip)
(&minus;&minus;assume &lt;param&gt;)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -936,7 +938,7 @@ php3cgi=text/html;datbin=application/x&minus;zip)
<td width="78%">
<p>internet protocol (0=both ipv6+ipv4 4=ipv4 only 6=ipv6
<p>internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6
only) (&minus;&minus;protocol[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -1018,7 +1020,7 @@ HTTP headers (&minus;F &quot;user&minus;agent name&quot;)
<td width="78%">
<p>preffered language (&minus;%l &quot;fr en jp *&quot;
<p>preffered language (&minus;%l &quot;fr, en, jp, *&quot;
(&minus;&minus;language &lt;param&gt;)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -1031,7 +1033,7 @@ HTTP headers (&minus;F &quot;user&minus;agent name&quot;)
<p>accepted formats (&minus;%l
&quot;text/htmlimage/pngimage/jpegimage/gif;q=0.9*/*;q=0.1&quot;
&quot;text/html,image/png,image/jpeg,image/gif;q=0.9,*/*;q=0.1&quot;
(&minus;&minus;accept &lt;param&gt;)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -1048,7 +1050,7 @@ HTTP headers (&minus;F &quot;user&minus;agent name&quot;)
&lt;param&gt;)</p> </td></tr>
</table>
<p style="margin-left:11%; margin-top: 1em"><b>Log index
<p style="margin-left:11%; margin-top: 1em"><b>Log, index,
cache</b></p>
<table width="100%" border="0" rules="none" frame="void"
@@ -1064,7 +1066,7 @@ cache</b></p>
<p style="margin-top: 1em">create/use a cache for updates
and retries (C0 no cacheC1 cache is prioritary* C2 test
and retries (C0 no cache,C1 cache is prioritary,* C2 test
update before) (&minus;&minus;cache[=N])</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -1247,7 +1249,7 @@ options:</b></p>
<td width="78%">
<p>just scan don t save anything (for checking links)</p></td></tr>
<p>just scan, don t save anything (for checking links)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -1291,7 +1293,7 @@ options:</b></p>
<td width="78%">
<p>get html files before then treat other files</p></td></tr>
<p>get html files before, then treat other files</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="6%">
@@ -1648,14 +1650,14 @@ doing)</b></p>
<p style="margin-top: 1em">bypass built&minus;in security
limits aimed to avoid bandwidth abuses (bandwidth
limits aimed to avoid bandwidth abuses (bandwidth,
simultaneous connections)
(&minus;&minus;disable&minus;security&minus;limits)</p> </td></tr>
</table>
<p style="margin-left:11%;">&minus;IMPORTANT</p>
<p style="margin-left:22%;">NOTE: DANGEROUS OPTION ONLY
<p style="margin-left:22%;">NOTE: DANGEROUS OPTION, ONLY
SUITABLE FOR EXPERTS</p>
<table width="100%" border="0" rules="none" frame="void"
@@ -1692,7 +1694,7 @@ specific options:</b></p>
<p style="margin-top: 1em">execute system command after
each files ($0 is the filename: &minus;V &quot;rm &quot;)
each files ($0 is the filename: &minus;V &quot;rm \$0&quot;)
(&minus;&minus;userdef&minus;cmd &lt;param&gt;)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -1705,7 +1707,7 @@ each files ($0 is the filename: &minus;V &quot;rm &quot;)
<p>use an external library function as a wrapper (&minus;%W
myfoo.so[myparameters]) (&minus;&minus;callback
myfoo.so[,myparameters]) (&minus;&minus;callback
&lt;param&gt;)</p> </td></tr>
</table>
@@ -1736,7 +1738,7 @@ Option N</b></p>
<td width="78%">
<p>HTML in web/ images/other files in web/images/</p></td></tr>
<p>HTML in web/, images/other files in web/images/</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="9%">
@@ -1747,7 +1749,7 @@ Option N</b></p>
<td width="78%">
<p>HTML in web/HTML images/other in web/images</p></td></tr>
<p>HTML in web/HTML, images/other in web/images</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="9%">
@@ -1758,7 +1760,7 @@ Option N</b></p>
<td width="78%">
<p>HTML in web/ images/other in web/</p></td></tr>
<p>HTML in web/, images/other in web/</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="9%">
@@ -1769,8 +1771,8 @@ Option N</b></p>
<td width="78%">
<p>HTML in web/ images/other in web/xxx where xxx is the
file extension (all gif will be placed onto web/gif for
<p>HTML in web/, images/other in web/xxx, where xxx is the
file extension (all gif will be placed onto web/gif, for
example)</p> </td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -1793,7 +1795,7 @@ example)</p> </td></tr>
<td width="78%">
<p>All files in web/ with random names (gadget !)</p></td></tr>
<p>All files in web/, with random names (gadget !)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="9%">
@@ -1804,7 +1806,7 @@ example)</p> </td></tr>
<td width="78%">
<p>Site&minus;structure without www.domain.xxx/</p></td></tr>
<p>Site&minus;structure, without www.domain.xxx/</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="9%">
@@ -1954,15 +1956,15 @@ directory</p> </td></tr>
<p style="margin-left:11%; margin-top: 1em"><b>Details:
User&minus;defined option N</b> <br>
%n Name of file without file type (ex: image) <br>
%N Name of file including file type (ex: image.gif) <br>
%N Name of file, including file type (ex: image.gif) <br>
%t File type (ex: gif) <br>
%p Path [without ending /] (ex: /someimages) <br>
%h Host name (ex: www.someweb.com) <br>
%M URL MD5 (128 bits 32 ascii bytes) <br>
%Q query string MD5 (128 bits 32 ascii bytes) <br>
%M URL MD5 (128 bits, 32 ascii bytes) <br>
%Q query string MD5 (128 bits, 32 ascii bytes) <br>
%k full query string <br>
%r protocol name (ex: http) <br>
%q small query string MD5 (16 bits 4 ascii bytes) <br>
%q small query string MD5 (16 bits, 4 ascii bytes) <br>
%s? Short name version (ex: %sN) <br>
%[param] param variable in query string <br>
%[param:before:after:empty:notfound] advanced variable
@@ -2040,8 +2042,8 @@ parameter could not be found</p>
<td width="78%">
<p>fields except the first one (the parameter name) can be
empty</p> </td></tr>
<p>fields, except the first one (the parameter name), can
be empty</p></td></tr>
</table>
<p style="margin-left:11%; margin-top: 1em"><b>Details:
@@ -2060,7 +2062,7 @@ Option K</b></p>
<p style="margin-top: 1em">foo.cgi?q=45 &minus;&gt;
foo4B54.html?q=45 (relative URI default)</p></td></tr>
foo4B54.html?q=45 (relative URI, default)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
<td width="4%">
@@ -2130,7 +2132,7 @@ site(s) (default)</p>
<td width="78%">
<p>&lt;URLs&gt; get the files indicated do not seek other
<p>&lt;URLs&gt; get the files indicated, do not seek other
URLs (&minus;qg)</p></td></tr>
<tr valign="top" align="left">
<td width="11%"></td>
@@ -2158,7 +2160,7 @@ pages (&minus;r1p0C0I0t)</p>
<p style="margin-left:11%;">&minus;&minus;spider</p>
<p style="margin-left:22%;">&lt;URLs&gt; spider site(s) to
<p style="margin-left:22%;">&lt;URLs&gt; spider site(s), to
test links: reports Errors &amp; Warnings
(&minus;p0C0I0t)</p>
@@ -2169,17 +2171,17 @@ test links: reports Errors &amp; Warnings
<p style="margin-left:11%;">&minus;&minus;skeleton</p>
<p style="margin-left:22%;">&lt;URLs&gt; make a mirror but
<p style="margin-left:22%;">&lt;URLs&gt; make a mirror, but
gets only html files (&minus;p1)</p>
<p style="margin-left:11%;">&minus;&minus;update</p>
<p style="margin-left:22%;">update a mirror without
<p style="margin-left:22%;">update a mirror, without
confirmation (&minus;iC2)</p>
<p style="margin-left:11%;">&minus;&minus;continue</p>
<p style="margin-left:22%;">continue a mirror without
<p style="margin-left:22%;">continue a mirror, without
confirmation (&minus;iC1)</p>
<p style="margin-left:11%;">&minus;&minus;catchurl</p>

View File

@@ -1,182 +1,182 @@
.\" Process this file with
.\" groff -man -Tascii httrack.1
.\"
.TH httrack 1 "02 May 2014" "httrack website copier"
.TH httrack 1 "14 May 2014" "httrack website copier"
.SH NAME
httrack \- offline browser : copy websites to a local directory
.SH SYNOPSIS
.B httrack [ url ]... [ \-filter ]... [ +filter ]... [
.B \-O\, \-\-path
.B \-O, \-\-path
] [
.B \-w\, \-\-mirror
.B \-w, \-\-mirror
] [
.B \-W\, \-\-mirror\-wizard
.B \-W, \-\-mirror\-wizard
] [
.B \-g\, \-\-get\-files
.B \-g, \-\-get\-files
] [
.B \-i\, \-\-continue
.B \-i, \-\-continue
] [
.B \-Y\, \-\-mirrorlinks
.B \-Y, \-\-mirrorlinks
] [
.B \-P\, \-\-proxy
.B \-P, \-\-proxy
] [
.B \-%f\, \-\-httpproxy\-ftp[=N]
.B \-%f, \-\-httpproxy\-ftp[=N]
] [
.B \-%b\, \-\-bind
.B \-%b, \-\-bind
] [
.B \-rN\, \-\-depth[=N]
.B \-rN, \-\-depth[=N]
] [
.B \-%eN\, \-\-ext\-depth[=N]
.B \-%eN, \-\-ext\-depth[=N]
] [
.B \-mN\, \-\-max\-files[=N]
.B \-mN, \-\-max\-files[=N]
] [
.B \-MN\, \-\-max\-size[=N]
.B \-MN, \-\-max\-size[=N]
] [
.B \-EN\, \-\-max\-time[=N]
.B \-EN, \-\-max\-time[=N]
] [
.B \-AN\, \-\-max\-rate[=N]
.B \-AN, \-\-max\-rate[=N]
] [
.B \-%cN\, \-\-connection\-per\-second[=N]
.B \-%cN, \-\-connection\-per\-second[=N]
] [
.B \-GN\, \-\-max\-pause[=N]
.B \-GN, \-\-max\-pause[=N]
] [
.B \-cN\, \-\-sockets[=N]
.B \-cN, \-\-sockets[=N]
] [
.B \-TN\, \-\-timeout[=N]
.B \-TN, \-\-timeout[=N]
] [
.B \-RN\, \-\-retries[=N]
.B \-RN, \-\-retries[=N]
] [
.B \-JN\, \-\-min\-rate[=N]
.B \-JN, \-\-min\-rate[=N]
] [
.B \-HN\, \-\-host\-control[=N]
.B \-HN, \-\-host\-control[=N]
] [
.B \-%P\, \-\-extended\-parsing[=N]
.B \-%P, \-\-extended\-parsing[=N]
] [
.B \-n\, \-\-near
.B \-n, \-\-near
] [
.B \-t\, \-\-test
.B \-t, \-\-test
] [
.B \-%L\, \-\-list
.B \-%L, \-\-list
] [
.B \-%S\, \-\-urllist
.B \-%S, \-\-urllist
] [
.B \-NN\, \-\-structure[=N]
.B \-NN, \-\-structure[=N]
] [
.B \-%D\, \-\-cached\-delayed\-type\-check
.B \-%D, \-\-cached\-delayed\-type\-check
] [
.B \-%M\, \-\-mime\-html
.B \-%M, \-\-mime\-html
] [
.B \-LN\, \-\-long\-names[=N]
.B \-LN, \-\-long\-names[=N]
] [
.B \-KN\, \-\-keep\-links[=N]
.B \-KN, \-\-keep\-links[=N]
] [
.B \-x\, \-\-replace\-external
.B \-x, \-\-replace\-external
] [
.B \-%x\, \-\-disable\-passwords
.B \-%x, \-\-disable\-passwords
] [
.B \-%q\, \-\-include\-query\-string
.B \-%q, \-\-include\-query\-string
] [
.B \-o\, \-\-generate\-errors
.B \-o, \-\-generate\-errors
] [
.B \-X\, \-\-purge\-old[=N]
.B \-X, \-\-purge\-old[=N]
] [
.B \-%p\, \-\-preserve
.B \-%p, \-\-preserve
] [
.B \-%T\, \-\-utf8\-conversion
.B \-%T, \-\-utf8\-conversion
] [
.B \-bN\, \-\-cookies[=N]
.B \-bN, \-\-cookies[=N]
] [
.B \-u\, \-\-check\-type[=N]
.B \-u, \-\-check\-type[=N]
] [
.B \-j\, \-\-parse\-java[=N]
.B \-j, \-\-parse\-java[=N]
] [
.B \-sN\, \-\-robots[=N]
.B \-sN, \-\-robots[=N]
] [
.B \-%h\, \-\-http\-10
.B \-%h, \-\-http\-10
] [
.B \-%k\, \-\-keep\-alive
.B \-%k, \-\-keep\-alive
] [
.B \-%B\, \-\-tolerant
.B \-%B, \-\-tolerant
] [
.B \-%s\, \-\-updatehack
.B \-%s, \-\-updatehack
] [
.B \-%u\, \-\-urlhack
.B \-%u, \-\-urlhack
] [
.B \-%A\, \-\-assume
.B \-%A, \-\-assume
] [
.B \-@iN\, \-\-protocol[=N]
.B \-@iN, \-\-protocol[=N]
] [
.B \-%w\, \-\-disable\-module
.B \-%w, \-\-disable\-module
] [
.B \-F\, \-\-user\-agent
.B \-F, \-\-user\-agent
] [
.B \-%R\, \-\-referer
.B \-%R, \-\-referer
] [
.B \-%E\, \-\-from
.B \-%E, \-\-from
] [
.B \-%F\, \-\-footer
.B \-%F, \-\-footer
] [
.B \-%l\, \-\-language
.B \-%l, \-\-language
] [
.B \-%a\, \-\-accept
.B \-%a, \-\-accept
] [
.B \-%X\, \-\-headers
.B \-%X, \-\-headers
] [
.B \-C\, \-\-cache[=N]
.B \-C, \-\-cache[=N]
] [
.B \-k\, \-\-store\-all\-in\-cache
.B \-k, \-\-store\-all\-in\-cache
] [
.B \-%n\, \-\-do\-not\-recatch
.B \-%n, \-\-do\-not\-recatch
] [
.B \-%v\, \-\-display
.B \-%v, \-\-display
] [
.B \-Q\, \-\-do\-not\-log
.B \-Q, \-\-do\-not\-log
] [
.B \-q\, \-\-quiet
.B \-q, \-\-quiet
] [
.B \-z\, \-\-extra\-log
.B \-z, \-\-extra\-log
] [
.B \-Z\, \-\-debug\-log
.B \-Z, \-\-debug\-log
] [
.B \-v\, \-\-verbose
.B \-v, \-\-verbose
] [
.B \-f\, \-\-file\-log
.B \-f, \-\-file\-log
] [
.B \-f2\, \-\-single\-log
.B \-f2, \-\-single\-log
] [
.B \-I\, \-\-index
.B \-I, \-\-index
] [
.B \-%i\, \-\-build\-top\-index
.B \-%i, \-\-build\-top\-index
] [
.B \-%I\, \-\-search\-index
.B \-%I, \-\-search\-index
] [
.B \-pN\, \-\-priority[=N]
.B \-pN, \-\-priority[=N]
] [
.B \-S\, \-\-stay\-on\-same\-dir
.B \-S, \-\-stay\-on\-same\-dir
] [
.B \-D\, \-\-can\-go\-down
.B \-D, \-\-can\-go\-down
] [
.B \-U\, \-\-can\-go\-up
.B \-U, \-\-can\-go\-up
] [
.B \-B\, \-\-can\-go\-up\-and\-down
.B \-B, \-\-can\-go\-up\-and\-down
] [
.B \-a\, \-\-stay\-on\-same\-address
.B \-a, \-\-stay\-on\-same\-address
] [
.B \-d\, \-\-stay\-on\-same\-domain
.B \-d, \-\-stay\-on\-same\-domain
] [
.B \-l\, \-\-stay\-on\-same\-tld
.B \-l, \-\-stay\-on\-same\-tld
] [
.B \-e\, \-\-go\-everywhere
.B \-e, \-\-go\-everywhere
] [
.B \-%H\, \-\-debug\-headers
.B \-%H, \-\-debug\-headers
] [
.B \-%!\, \-\-disable\-security\-limits
.B \-%!, \-\-disable\-security\-limits
] [
.B \-V\, \-\-userdef\-cmd
.B \-V, \-\-userdef\-cmd
] [
.B \-%W\, \-\-callback
.B \-%W, \-\-callback
] [
.B \-K\, \-\-keep\-links[=N]
.B \-K, \-\-keep\-links[=N]
] [
.B
.SH DESCRIPTION
@@ -191,7 +191,7 @@ allows you to download a World Wide Web site from the Internet to a local direct
mirror the two sites together (with shared links) and accept any .jpg files on .com sites
.TP
.B httrack www.someweb.com/bob/bobby.html +* \-r6
means get all files starting from bobby.html\, with 6 link\-depth\, and possibility of going everywhere on the web
means get all files starting from bobby.html, with 6 link\-depth, and possibility of going everywhere on the web
.TP
.B httrack www.someweb.com/bob/bobby.html \-\-spider \-P proxy.myhost.com:8080
runs the spider on www.someweb.com/bob/bobby.html using a proxy
@@ -208,7 +208,7 @@ continues a mirror in the current folder
.SS General options:
.IP \-O
path for mirror/logfiles+cache (\-O path
mirror[\,path
mirror[,path
cache
and
logfiles]) (\-\-path <param>)
@@ -217,7 +217,7 @@ logfiles]) (\-\-path <param>)
.IP \-w
*mirror web sites (\-\-mirror)
.IP \-W
mirror web sites\, semi\-automatic (asks questions) (\-\-mirror\-wizard)
mirror web sites, semi\-automatic (asks questions) (\-\-mirror\-wizard)
.IP \-g
just get files (saved in the current directory) (\-\-get\-files)
.IP \-i
@@ -240,34 +240,34 @@ set the mirror depth to N (* r9999) (\-\-depth[=N])
set the external links depth to N (* %e0) (\-\-ext\-depth[=N])
.IP \-mN
maximum file length for a non\-html file (\-\-max\-files[=N])
.IP \-mN\,N2
.IP \-mN,N2
maximum file length for non html (N) and html (N2)
.IP \-MN
maximum overall size that can be uploaded/scanned (\-\-max\-size[=N])
.IP \-EN
maximum mirror time in seconds (60=1 minute\, 3600=1 hour) (\-\-max\-time[=N])
maximum mirror time in seconds (60=1 minute, 3600=1 hour) (\-\-max\-time[=N])
.IP \-AN
maximum transfer rate in bytes/seconds (1000=1KB/s max) (\-\-max\-rate[=N])
.IP \-%cN
maximum number of connections/seconds (*%c10) (\-\-connection\-per\-second[=N])
.IP \-GN
pause transfer if N bytes reached\, and wait until lock file is deleted (\-\-max\-pause[=N])
pause transfer if N bytes reached, and wait until lock file is deleted (\-\-max\-pause[=N])
.SS Flow control:
.IP \-cN
number of multiple connections (*c8) (\-\-sockets[=N])
.IP \-TN
timeout\, number of seconds after a non\-responding link is shutdown (\-\-timeout[=N])
timeout, number of seconds after a non\-responding link is shutdown (\-\-timeout[=N])
.IP \-RN
number of retries\, in case of timeout or non\-fatal errors (*R1) (\-\-retries[=N])
number of retries, in case of timeout or non\-fatal errors (*R1) (\-\-retries[=N])
.IP \-JN
traffic jam control\, minimum transfert rate (bytes/seconds) tolerated for a link (\-\-min\-rate[=N])
traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link (\-\-min\-rate[=N])
.IP \-HN
host is abandonned if: 0=never\, 1=timeout\, 2=slow\, 3=timeout or slow (\-\-host\-control[=N])
host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow (\-\-host\-control[=N])
.SS Links options:
.IP \-%P
*extended parsing\, attempt to parse all links\, even in unknown tags or Javascript (%P0 don t use) (\-\-extended\-parsing[=N])
*extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don t use) (\-\-extended\-parsing[=N])
.IP \-n
get non\-html files near an html file (ex: an image located outside) (\-\-near)
.IP \-t
@@ -279,25 +279,25 @@ test all URLs (even forbidden ones) (\-\-test)
.SS Build options:
.IP \-NN
structure type (0 *original structure\, 1+: see below) (\-\-structure[=N])
structure type (0 *original structure, 1+: see below) (\-\-structure[=N])
.IP \-or
user defined structure (\-N "%h%p/%n%q.%t")
.IP \-%N
delayed type check\, don t make any link test but wait for files download to start instead (experimental) (%N0 don t use\, %N1 use for unknown extensions\, * %N2 always use)
delayed type check, don t make any link test but wait for files download to start instead (experimental) (%N0 don t use, %N1 use for unknown extensions, * %N2 always use)
.IP \-%D
cached delayed type check\, don t wait for remote type during updates\, to speedup them (%D0 wait\, * %D1 don t wait) (\-\-cached\-delayed\-type\-check)
cached delayed type check, don t wait for remote type during updates, to speedup them (%D0 wait, * %D1 don t wait) (\-\-cached\-delayed\-type\-check)
.IP \-%M
generate a RFC MIME\-encapsulated full\-archive (.mht) (\-\-mime\-html)
.IP \-LN
long names (L1 *long names / L0 8\-3 conversion / L2 ISO9660 compatible) (\-\-long\-names[=N])
.IP \-KN
keep original links (e.g. http://www.adr/link) (K0 *relative link\, K absolute links\, K4 original links\, K3 absolute URI links\, K5 transparent proxy link) (\-\-keep\-links[=N])
keep original links (e.g. http://www.adr/link) (K0 *relative link, K absolute links, K4 original links, K3 absolute URI links, K5 transparent proxy link) (\-\-keep\-links[=N])
.IP \-x
replace external html links by error pages (\-\-replace\-external)
.IP \-%x
do not include any password for external password protected websites (%x0 include) (\-\-disable\-passwords)
.IP \-%q
*include query string for local files (useless\, for information purpose only) (%q0 don t include) (\-\-include\-query\-string)
*include query string for local files (useless, for information purpose only) (%q0 don t include) (\-\-include\-query\-string)
.IP \-o
*generate output html file in case of error (404..) (o0 don t generate) (\-\-generate\-errors)
.IP \-X
@@ -309,29 +309,29 @@ links conversion to UTF\-8 (\-\-utf8\-conversion)
.SS Spider options:
.IP \-bN
accept cookies in cookies.txt (0=do not accept\,* 1=accept) (\-\-cookies[=N])
accept cookies in cookies.txt (0=do not accept,* 1=accept) (\-\-cookies[=N])
.IP \-u
check document type if unknown (cgi\,asp..) (u0 don t check\, * u1 check but /\, u2 check always) (\-\-check\-type[=N])
check document type if unknown (cgi,asp..) (u0 don t check, * u1 check but /, u2 check always) (\-\-check\-type[=N])
.IP \-j
*parse Java Classes (j0 don t parse\, bitmask: |1 parse default\, |2 don t parse .class |4 don t parse .js |8 don t be aggressive) (\-\-parse\-java[=N])
*parse Java Classes (j0 don t parse, bitmask: |1 parse default, |2 don t parse .class |4 don t parse .js |8 don t be aggressive) (\-\-parse\-java[=N])
.IP \-sN
follow robots.txt and meta robots tags (0=never\,1=sometimes\,* 2=always\, 3=always (even strict rules)) (\-\-robots[=N])
follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules)) (\-\-robots[=N])
.IP \-%h
force HTTP/1.0 requests (reduce update features\, only for old servers or proxies) (\-\-http\-10)
force HTTP/1.0 requests (reduce update features, only for old servers or proxies) (\-\-http\-10)
.IP \-%k
use keep\-alive if possible\, greately reducing latency for small files and test requests (%k0 don t use) (\-\-keep\-alive)
use keep\-alive if possible, greately reducing latency for small files and test requests (%k0 don t use) (\-\-keep\-alive)
.IP \-%B
tolerant requests (accept bogus responses on some servers\, but not standard!) (\-\-tolerant)
tolerant requests (accept bogus responses on some servers, but not standard!) (\-\-tolerant)
.IP \-%s
update hacks: various hacks to limit re\-transfers when updating (identical size\, bogus response..) (\-\-updatehack)
update hacks: various hacks to limit re\-transfers when updating (identical size, bogus response..) (\-\-updatehack)
.IP \-%u
url hacks: various hacks to limit duplicate URLs (strip //\, www.foo.com==foo.com..) (\-\-urlhack)
url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..) (\-\-urlhack)
.IP \-%A
assume that a type (cgi\,asp..) is always linked with a mime type (\-%A php3\,cgi=text/html;dat\,bin=application/x\-zip) (\-\-assume <param>)
assume that a type (cgi,asp..) is always linked with a mime type (\-%A php3,cgi=text/html;dat,bin=application/x\-zip) (\-\-assume <param>)
.IP \-can
also be used to force a specific file type: \-\-assume foo.cgi=text/html
.IP \-@iN
internet protocol (0=both ipv6+ipv4\, 4=ipv4 only\, 6=ipv6 only) (\-\-protocol[=N])
internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only) (\-\-protocol[=N])
.IP \-%w
disable a specific external mime module (\-%w htsswf \-%w htsjava) (\-\-disable\-module <param>)
@@ -345,15 +345,15 @@ from email address sent in HTTP headers (\-\-from <param>)
.IP \-%F
footer string in Html code (\-%F "Mirrored [from host %s [file %s [at %s]]]" (\-\-footer <param>)
.IP \-%l
preffered language (\-%l "fr\, en\, jp\, *" (\-\-language <param>)
preffered language (\-%l "fr, en, jp, *" (\-\-language <param>)
.IP \-%a
accepted formats (\-%l "text/html\,image/png\,image/jpeg\,image/gif;q=0.9\,*/*;q=0.1" (\-\-accept <param>)
accepted formats (\-%l "text/html,image/png,image/jpeg,image/gif;q=0.9,*/*;q=0.1" (\-\-accept <param>)
.IP \-%X
additional HTTP header line (\-%X "X\-Magic: 42" (\-\-headers <param>)
.SS Log\, index\, cache
.SS Log, index, cache
.IP \-C
create/use a cache for updates and retries (C0 no cache\,C1 cache is prioritary\,* C2 test update before) (\-\-cache[=N])
create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before) (\-\-cache[=N])
.IP \-k
store all files in cache (not useful if files on disk) (\-\-store\-all\-in\-cache)
.IP \-%n
@@ -385,7 +385,7 @@ make an searchable index for this mirror (* %I0 don t make) (\-\-search\-index)
.IP \-pN
priority mode: (* p3) (\-\-priority[=N])
.IP \-p0
just scan\, don t save anything (for checking links)
just scan, don t save anything (for checking links)
.IP \-p1
save only html files
.IP \-p2
@@ -393,7 +393,7 @@ save only non html files
.IP \-*p3
save all files
.IP \-p7
get html files before\, then treat other files
get html files before, then treat other files
.IP \-S
stay on the same directory (\-\-stay\-on\-same\-dir)
.IP \-D
@@ -455,35 +455,35 @@ generate transfer rate statictics every minutes (\-\-debug\-ratestats)
.SS Dangerous options: (do NOT use unless you exactly know what you are doing)
.IP \-%!
bypass built\-in security limits aimed to avoid bandwidth abuses (bandwidth\, simultaneous connections) (\-\-disable\-security\-limits)
bypass built\-in security limits aimed to avoid bandwidth abuses (bandwidth, simultaneous connections) (\-\-disable\-security\-limits)
.IP \-IMPORTANT
NOTE: DANGEROUS OPTION\, ONLY SUITABLE FOR EXPERTS
NOTE: DANGEROUS OPTION, ONLY SUITABLE FOR EXPERTS
.IP \-USE
IT WITH EXTREME CARE
.SS Command\-line specific options:
.IP \-V
execute system command after each files ($0 is the filename: \-V "rm \$0") (\-\-userdef\-cmd <param>)
execute system command after each files ($0 is the filename: \-V "rm \\$0") (\-\-userdef\-cmd <param>)
.IP \-%W
use an external library function as a wrapper (\-%W myfoo.so[\,myparameters]) (\-\-callback <param>)
use an external library function as a wrapper (\-%W myfoo.so[,myparameters]) (\-\-callback <param>)
.SS Details: Option N
.IP \-N0
Site\-structure (default)
.IP \-N1
HTML in web/\, images/other files in web/images/
HTML in web/, images/other files in web/images/
.IP \-N2
HTML in web/HTML\, images/other in web/images
HTML in web/HTML, images/other in web/images
.IP \-N3
HTML in web/\, images/other in web/
HTML in web/, images/other in web/
.IP \-N4
HTML in web/\, images/other in web/xxx\, where xxx is the file extension (all gif will be placed onto web/gif\, for example)
HTML in web/, images/other in web/xxx, where xxx is the file extension (all gif will be placed onto web/gif, for example)
.IP \-N5
Images/other in web/xxx and HTML in web/HTML
.IP \-N99
All files in web/\, with random names (gadget !)
All files in web/, with random names (gadget !)
.IP \-N100
Site\-structure\, without www.domain.xxx/
Site\-structure, without www.domain.xxx/
.IP \-N101
Identical to N1 exept that "web" is replaced by the site s name
.IP \-N102
@@ -510,15 +510,15 @@ Identical to N5 exept that there is no "web" directory
Identical to N99 exept that there is no "web" directory
.SS Details: User\-defined option N
%n Name of file without file type (ex: image)
%N Name of file\, including file type (ex: image.gif)
%N Name of file, including file type (ex: image.gif)
%t File type (ex: gif)
%p Path [without ending /] (ex: /someimages)
%h Host name (ex: www.someweb.com)
%M URL MD5 (128 bits\, 32 ascii bytes)
%Q query string MD5 (128 bits\, 32 ascii bytes)
%M URL MD5 (128 bits, 32 ascii bytes)
%Q query string MD5 (128 bits, 32 ascii bytes)
%k full query string
%r protocol name (ex: http)
%q small query string MD5 (16 bits\, 4 ascii bytes)
%q small query string MD5 (16 bits, 4 ascii bytes)
%s? Short name version (ex: %sN)
%[param] param variable in query string
%[param:before:after:empty:notfound] advanced variable extraction
@@ -535,11 +535,11 @@ Identical to N99 exept that there is no "web" directory
.IP \-empty
: string replacement if the parameter was empty
.IP \-all
fields\, except the first one (the parameter name)\, can be empty
fields, except the first one (the parameter name), can be empty
.SS Details: Option K
.IP \-K0
foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI\, default)
foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI, default)
.IP \-K
\-> http://www.foobar.com/folder/foo.cgi?q=45 (absolute URL) (\-\-keep\-links[=N])
.IP \-K3
@@ -553,7 +553,7 @@ foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI\, default)
.IP \-\-mirror
<URLs> *make a mirror of site(s) (default)
.IP \-\-get
<URLs> get the files indicated\, do not seek other URLs (\-qg)
<URLs> get the files indicated, do not seek other URLs (\-qg)
.IP \-\-list
<text file> add all URL located in this text file (\-%L)
.IP \-\-mirrorlinks
@@ -561,15 +561,15 @@ foo.cgi?q=45 \-> foo4B54.html?q=45 (relative URI\, default)
.IP \-\-testlinks
<URLs> test links in pages (\-r1p0C0I0t)
.IP \-\-spider
<URLs> spider site(s)\, to test links: reports Errors & Warnings (\-p0C0I0t)
<URLs> spider site(s), to test links: reports Errors & Warnings (\-p0C0I0t)
.IP \-\-testsite
<URLs> identical to \-\-spider
.IP \-\-skeleton
<URLs> make a mirror\, but gets only html files (\-p1)
<URLs> make a mirror, but gets only html files (\-p1)
.IP \-\-update
update a mirror\, without confirmation (\-iC2)
update a mirror, without confirmation (\-iC2)
.IP \-\-continue
continue a mirror\, without confirmation (\-iC1)
continue a mirror, without confirmation (\-iC1)
.IP \-\-catchurl
create a temporary proxy to capture an URL or a form post URL

View File

@@ -2355,7 +2355,7 @@ void host_ban(httrackp * opt, lien_url ** liens, int ptr, int lien_tot,
if (strfield2(jump_identification(liens[i]->adr), host)) { // host
hts_log_print(opt, LOG_DEBUG, "Cancel: %s%s", liens[i]->adr,
liens[i]->fil);
strcpybuff(liens[i]->adr, "!"); // cancel (invalide hash)
hash_invalidate_entry(opt->hash, i); // invalidate hashtable entry
// on efface pas le hash, because si on rencontre le lien, reverif sav..
}
} else {

View File

@@ -36,8 +36,8 @@ Please visit our Website: http://www.httrack.com
#define HTTRACK_GLOBAL_DEFH
// Version (also check external version information)
#define HTTRACK_VERSION "3.48-5"
#define HTTRACK_VERSIONID "3.48.5"
#define HTTRACK_VERSION "3.48-8"
#define HTTRACK_VERSIONID "3.48.8"
#define HTTRACK_AFF_VERSION "3.x"
#define HTTRACK_LIB_VERSION "2.0"

View File

@@ -85,6 +85,14 @@ static int key_sav_equals(void *arg, const char *a, const char *b) {
return strcasecmp(a, b) == 0;
}
static const char* key_sav_debug_print(void *arg, const char *a) {
return a;
}
static const char* value_sav_debug_print(void *arg, void *a) {
return (char*) a;
}
/* Pseudo-key (lien_url structure) hash function */
static inthash_keys key_adrfil_hashes_generic(void *arg, const char *value_,
const int former) {
@@ -151,6 +159,32 @@ static int key_adrfil_equals_generic(void *arg, const char *a_, const char *b_,
}
}
static const char* key_adrfil_debug_print_(void *arg, const char *a_, const int former) {
hash_struct *const hash = (hash_struct*) arg;
const int normalized = hash->normalized;
const lien_url*const a = (lien_url*) a_;
const char *const a_adr = !former ? a->adr : a->former_adr;
const char *const a_fil = !former ? a->fil : a->former_fil;
snprintf(hash->normfil, sizeof(hash->normfil), "%s%s", a_adr, a_fil);
return hash->normfil;
}
static const char* key_adrfil_debug_print(void *arg, const char *a_) {
return key_adrfil_debug_print_(arg, a_, 0);
}
static const char* key_former_adrfil_debug_print(void *arg, const char *a_) {
return key_adrfil_debug_print_(arg, a_, 1);
}
static const char* value_adrfil_debug_print(void *arg, void *value) {
hash_struct *const hash = (hash_struct*) arg;
inthash_value v;
v.ptr = value;
snprintf(hash->normfil2, sizeof(hash->normfil2), "%d", (int) v.intg);
return hash->normfil2;
}
/* "adr"/"fil" lien_url structure members hashing function */
static inthash_keys key_adrfil_hashes(void *arg, const char *value_) {
return key_adrfil_hashes_generic(arg, value_, 0);
@@ -207,6 +241,20 @@ void hash_init(httrackp *opt, hash_struct * hash, int normalized) {
key_former_adrfil_hashes,
key_former_adrfil_equals,
hash);
/* pretty-printing */
inthash_set_print_handler(hash->sav,
key_sav_debug_print,
value_sav_debug_print,
NULL);
inthash_set_print_handler(hash->adrfil,
key_adrfil_debug_print,
value_adrfil_debug_print,
hash);
inthash_set_print_handler(hash->former_adrfil,
key_former_adrfil_debug_print,
value_adrfil_debug_print,
hash);
}
void hash_free(hash_struct *hash) {
@@ -272,3 +320,14 @@ void hash_write(hash_struct * hash, int lpos) {
inthash_write(hash->former_adrfil, (char*) hash->liens[lpos], lpos);
}
}
void hash_invalidate_entry(hash_struct * hash, int lpos) {
if (inthash_remove(hash->adrfil, (char*) hash->liens[lpos])) {
/* devalidate entry now it is removed from hashtable */
strcpybuff(hash->liens[lpos]->adr, "!");
/* add back */
inthash_write(hash->adrfil, (char*) hash->liens[lpos], lpos);
} else {
assertf(! "error invalidating hash entry");
}
}

View File

@@ -56,6 +56,7 @@ void hash_free(hash_struct *hash);
int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
hash_struct_type type);
void hash_write(hash_struct * hash, int lpos);
void hash_invalidate_entry(hash_struct * hash, int lpos);
int *hash_calc_chaine(hash_struct * hash, hash_struct_type type, int pos);
unsigned long int hash_cle(const char *nom1, const char *nom2);
#endif

View File

@@ -187,6 +187,16 @@ struct struct_inthash {
/** hashtable name for logging **/
const char *name;
} error;
/** How to handle pretty-print (debug) (might be NULL). **/
struct {
/** key print() **/
t_inthash_printkeyhandler key;
/** value print() **/
t_inthash_printvaluehandler value;
/** opaque argument **/
void *arg;
} print;
} custom;
};
@@ -889,10 +899,62 @@ static int inthash_add_item_(inthash hashtable, inthash_item item) {
(int) hashtable->stash.size);
return 1; /* added */
} else {
/* debugging */
if (hashtable->custom.print.key != NULL
&& hashtable->custom.print.value != NULL) {
size_t i;
for(i = 0 ; i < hashtable->stash.size ; i++) {
inthash_item *const item = &hashtable->stash.items[i];
const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1);
const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2);
inthash_crit(hashtable,
"stash[%u]: key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x",
(int) i,
hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
(int) pos1, (int) pos2,
item->hashes.hash1, item->hashes.hash2);
if (!inthash_is_free(hashtable, pos1)) {
inthash_item *const item = &hashtable->items[pos1];
const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1);
const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2);
inthash_crit(hashtable,
"\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x",
hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
(int) pos1, (int) pos2,
item->hashes.hash1, item->hashes.hash2);
} else {
inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos1);
}
if (!inthash_is_free(hashtable, pos2)) {
inthash_item *const item = &hashtable->items[pos2];
const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1);
const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2);
inthash_crit(hashtable,
"\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x",
hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
(int) pos1, (int) pos2,
item->hashes.hash1, item->hashes.hash2);
} else {
inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos2);
}
}
//struct_inthash_enum e = inthash_enum_new(hashtable);
//while((item = inthash_enum_next(&e)) != NULL) {
// inthash_crit(hashtable, "element key='%s' value='%s' hash1=%04x hash2=%04x",
// hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
// hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
// item->hashes.hash1, item->hashes.hash2);
//}
}
/* we are doomed. hopefully the probability is lower than being killed
by a wandering radioactive monkey */
inthash_log_stats(hashtable);
inthash_assert(hashtable, ! "hashtable internal error: cuckoo/stash collision");
/* not reachable code */
return -1;
}
@@ -1235,9 +1297,13 @@ inthash inthash_new(size_t initial_size) {
hashtable->custom.key.hash = NULL;
hashtable->custom.key.equals = NULL;
hashtable->custom.key.arg = NULL;
hashtable->custom.error.log = NULL;
hashtable->custom.error.fatal = NULL;
hashtable->custom.error.name = NULL;
hashtable->custom.error.arg = NULL;
hashtable->custom.print.key = NULL;
hashtable->custom.print.value = NULL;
hashtable->custom.print.arg = NULL;
}
return hashtable;
}
@@ -1288,12 +1354,20 @@ void inthash_set_assert_handler(inthash hashtable,
t_inthash_loghandler log,
t_inthash_asserthandler fatal,
void *arg) {
inthash_assert(hashtable, fatal != NULL);
hashtable->custom.error.log = log;
hashtable->custom.error.fatal = fatal;
hashtable->custom.error.arg = arg;
}
void inthash_set_print_handler(inthash hashtable,
t_inthash_printkeyhandler key,
t_inthash_printvaluehandler value,
void *arg) {
hashtable->custom.print.key = key;
hashtable->custom.print.value = value;
hashtable->custom.print.arg = arg;
}
size_t inthash_nitems(inthash hashtable) {
if (hashtable != NULL)
return hashtable->used;

View File

@@ -67,6 +67,7 @@ typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif
#include <stdarg.h>
/** Value. **/
typedef union inthash_value {
@@ -140,6 +141,12 @@ typedef void (*t_inthash_loghandler)(void *arg, inthash_loglevel level,
/** Hashtable fatal assertion failure. **/
typedef void (*t_inthash_asserthandler)(void *arg, const char* exp, const char* file, int line);
/** Key printer (debug) **/
typedef const char* (*t_inthash_printkeyhandler)(void *arg, const char *name);
/** Value printer (debug) **/
typedef const char* (*t_inthash_printvaluehandler)(void *arg, void *value);
/**
* Value comparison handler (returns non-zero value if strings are equal).
**/
@@ -240,6 +247,18 @@ void inthash_set_assert_handler(inthash hashtable,
t_inthash_asserthandler fatal,
void *arg);
/**
* Set pretty print loggers (debug). Both handlers must return a string
* pointer which shall be valid until the next call. Both key and value
* pointers shall be valid at the same time.
* name: handler called to print the string representation of the name
* value: handler called to print the string representation of the value
**/
void inthash_set_print_handler(inthash hashtable,
t_inthash_printkeyhandler key,
t_inthash_printvaluehandler value,
void *arg);
/**
* Set the hashtable name, for degugging purpose.
* name: the hashtable name (ASCII or UTF-8)

View File

@@ -62,6 +62,7 @@ Please visit our Website: http://www.httrack.com
#include <unistd.h>
#endif
#endif /* _WIN32 */
#include <stdarg.h>
#include <string.h>
#include <time.h>
@@ -5112,6 +5113,17 @@ static int ssl_vulnerable(const char *version) {
return 0;
}
/* user abort callback */
htsErrorCallback htsCallbackErr = NULL;
HTSEXT_API void hts_set_error_callback(htsErrorCallback handler) {
htsCallbackErr = handler;
}
HTSEXT_API htsErrorCallback hts_get_error_callback() {
return htsCallbackErr;
}
static void default_inthash_asserthandler(void *arg, const char* exp, const char* file, int line) {
abortf_(exp, file, line);
}

View File

@@ -3606,7 +3606,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
urladr, urlfil, mov_adr, mov_fil);
// canceller lien actuel
error = 1;
strcpybuff(liens[ptr]->adr, "!"); // caractère bidon (invalide hash)
hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
// noter NOUVEAU lien
//xxc xxc
// set_prio_to=0+1; // protection if the moved URL is an html page!!
@@ -3742,7 +3742,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//
// canceller lien actuel
error = 1;
strcpybuff(liens[ptr]->adr, "!"); // caractère bidon (invalide hash)
hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
//
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);

View File

@@ -35,17 +35,26 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
/**
* Optional user-defined callback upon fatal error.
*/
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
/**
* Emergency logging.
* Default is to use libhttrack one.
*/
#ifndef HTSSAFE_ABORT_FUNCTION
HTSEXT_API htsErrorCallback htsCallbackErr;
#define HTSSAFE_ABORT_FUNCTION(A,B,C) do { if (htsCallbackErr != NULL) { htsCallbackErr(A,B,C); } } while(0)
/** Assert error callback. **/
#ifndef HTS_DEF_FWSTRUCT_htsErrorCallback
#define HTS_DEF_FWSTRUCT_htsErrorCallback
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
HTSEXT_API htsErrorCallback hts_get_error_callback(void);
#endif
#define HTSSAFE_ABORT_FUNCTION(A,B,C) do { \
htsErrorCallback callback = hts_get_error_callback(); \
if (callback != NULL) { \
callback(A,B,C); \
} \
} while(0)
#endif
/**
@@ -61,7 +70,7 @@ HTSEXT_API htsErrorCallback htsCallbackErr;
/**
* Fatal assertion check.
*/
#define assertf_(exp, file, line) assertf__(exp, #exp, __FILE__, __LINE__)
#define assertf_(exp, file, line) assertf__(exp, #exp, file, line)
/**
* Fatal assertion check.

View File

@@ -36,6 +36,7 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
#include "htsbase.h"
#include "htsthread.h"
#include "httrack-library.h"
#if USE_BEGINTHREAD
#ifdef _WIN32

View File

@@ -38,6 +38,7 @@ Please visit our Website: http://www.httrack.com
#ifndef _WIN32
#include <inttypes.h>
#endif
#include <stdarg.h>
#ifndef HTS_DEF_FWSTRUCT_httrackp
#define HTS_DEF_FWSTRUCT_httrackp
@@ -65,6 +66,12 @@ typedef enum hts_log_type {
typedef struct hts_stat_struct hts_stat_struct;
#endif
/** Assert error callback. **/
#ifndef HTS_DEF_FWSTRUCT_htsErrorCallback
#define HTS_DEF_FWSTRUCT_htsErrorCallback
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
#endif
/* Helpers for plugging callbacks
requires: htsdefines.h */
@@ -111,6 +118,8 @@ HTSEXT_API const hts_stat_struct* hts_get_stats(httrackp * opt);
HTSEXT_API void set_wrappers(httrackp * opt); /* LEGACY */
HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
const char *argv);
HTSEXT_API void hts_set_error_callback(htsErrorCallback handler);
HTSEXT_API htsErrorCallback hts_get_error_callback(void);
/* Logging */
HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg);

View File

@@ -53,7 +53,7 @@
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.3\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.8\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
PreprocessorDefinitions="WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE;_CRT_SECURE_NO_WARNINGS"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
@@ -152,7 +152,7 @@
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.3\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.8\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
PreprocessorDefinitions="WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
@@ -246,7 +246,7 @@
Name="VCCLCompilerTool"
AdditionalOptions="/Zm200 "
Optimization="0"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.3\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.8\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
PreprocessorDefinitions="_DEBUG;WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE;_CRT_SECURE_NO_WARNINGS"
RuntimeLibrary="3"
BufferSecurityCheck="true"
@@ -343,7 +343,7 @@
Name="VCCLCompilerTool"
AdditionalOptions="/Zm200 "
Optimization="0"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.3\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.8\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
PreprocessorDefinitions="_DEBUG;WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE;_CRT_SECURE_NO_WARNINGS"
RuntimeLibrary="3"
BufferSecurityCheck="true"
@@ -440,7 +440,7 @@
AdditionalOptions="/Zm200 "
Optimization="3"
InlineFunctionExpansion="2"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.3\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.8\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
PreprocessorDefinitions="NDEBUG;WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE;_CRT_SECURE_NO_WARNINGS"
StringPooling="true"
RuntimeLibrary="2"
@@ -535,7 +535,7 @@
AdditionalOptions="/Zm200 "
Optimization="3"
InlineFunctionExpansion="2"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.3\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
AdditionalIncludeDirectories="&quot;C:\Dev\zlib-1.2.8\&quot;;&quot;C:\Dev\openssl-1.0.1e\include&quot;;C:\Dev\Winhttrack"
PreprocessorDefinitions="NDEBUG;WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE;_CRT_SECURE_NO_WARNINGS"
StringPooling="true"
RuntimeLibrary="2"