mirror of
https://github.com/xroche/httrack.git
synced 2026-06-20 17:18:14 +03:00
Compare commits
18 Commits
fix/css-im
...
3.49.8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0cbd5279f2 | ||
|
|
05306ee4fd | ||
|
|
1d0fc0a566 | ||
|
|
a4452592b4 | ||
|
|
62c2364b59 | ||
|
|
fe7041ddbf | ||
|
|
f5543df1af | ||
|
|
fee30aa95d | ||
|
|
f9f4700ee1 | ||
|
|
f030fa21e3 | ||
|
|
bdd1c1bc2c | ||
|
|
56665a268f | ||
|
|
2e948b9acd | ||
|
|
cae11499f1 | ||
|
|
02c7f4ebf6 | ||
|
|
9070b44a70 | ||
|
|
799c045061 | ||
|
|
fb1ee3bf2e |
@@ -16,6 +16,7 @@ BasedOnStyle: LLVM
|
||||
SpaceAfterCStyleCast: true # "(int) x", overwhelmingly dominant (542 vs 7)
|
||||
SortIncludes: false # C include order can be significant; never reorder
|
||||
IncludeBlocks: Preserve # do not merge/reflow include groups
|
||||
SeparateDefinitionBlocks: Always # blank line between definitions (readability)
|
||||
|
||||
# Stated explicitly for robustness against base-style drift (these match LLVM):
|
||||
IndentWidth: 2
|
||||
|
||||
20
.github/workflows/ci.yml
vendored
20
.github/workflows/ci.yml
vendored
@@ -320,6 +320,21 @@ jobs:
|
||||
lint:
|
||||
name: lint (shellcheck, shfmt)
|
||||
runs-on: ubuntu-24.04
|
||||
# Every tracked shell script; the globs expand at run time. Kept here so the
|
||||
# shellcheck and shfmt steps below cannot drift apart.
|
||||
env:
|
||||
SHELL_SCRIPTS: >-
|
||||
.githooks/pre-commit
|
||||
bootstrap
|
||||
build.sh
|
||||
html/div/search.sh
|
||||
man/makeman.sh
|
||||
src/htsbasiccharsets.sh
|
||||
src/htsentities.sh
|
||||
src/webhttrack
|
||||
tests/*.sh
|
||||
tests/*.test
|
||||
tools/mkdeb.sh
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
@@ -332,12 +347,11 @@ jobs:
|
||||
sudo apt-get install -y --no-install-recommends shellcheck shfmt
|
||||
shfmt --version
|
||||
|
||||
# Lint the scripts we maintain; the legacy scripts are a separate cleanup.
|
||||
- name: shellcheck
|
||||
run: shellcheck man/makeman.sh tools/mkdeb.sh .githooks/pre-commit tests/*.test tests/check-network.sh
|
||||
run: shellcheck $SHELL_SCRIPTS
|
||||
|
||||
- name: shfmt
|
||||
run: shfmt -d -i 4 man/makeman.sh tools/mkdeb.sh .githooks/pre-commit
|
||||
run: shfmt -d -i 4 $SHELL_SCRIPTS
|
||||
|
||||
# Check clang-format on CHANGED LINES ONLY. The engine predates clang-format
|
||||
# (it was shaped by an old Visual Studio formatter) and does not round-trip,
|
||||
|
||||
7
debian/changelog
vendored
7
debian/changelog
vendored
@@ -1,6 +1,9 @@
|
||||
httrack (3.49.8-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release.
|
||||
* New upstream release: HTTPS-proxy CONNECT tunnelling and wider srcset
|
||||
parsing, a batch of crawler and parser fixes (CSS @import, xmlns
|
||||
namespaces, relative paths, RFC 6265 cookies), and security hardening of
|
||||
the parser and of buffer copies throughout the engine.
|
||||
* Drop the OpenSSL linking exception from the license: OpenSSL 3.0+ is
|
||||
Apache-2.0 and GPL-compatible, so it is no longer needed. httrack is now
|
||||
plain GPL-3.0-or-later. Updated debian/copyright accordingly.
|
||||
@@ -14,7 +17,7 @@ httrack (3.49.8-1) unstable; urgency=medium
|
||||
the QA debcheck page. Depend on firefox-esr | chromium | www-browser
|
||||
instead.
|
||||
|
||||
-- Xavier Roche <xavier@debian.org> Sun, 07 Jun 2026 14:29:24 +0200
|
||||
-- Xavier Roche <xavier@debian.org> Sat, 20 Jun 2026 13:02:08 +0200
|
||||
|
||||
httrack (3.49.7-2) unstable; urgency=medium
|
||||
|
||||
|
||||
21
history.txt
21
history.txt
@@ -5,12 +5,31 @@ HTTrack Website Copier release history:
|
||||
This file lists all changes and fixes that have been made for HTTrack
|
||||
|
||||
3.49-8
|
||||
+ New: tunnel HTTPS downloads through the configured HTTP proxy via CONNECT (#85)
|
||||
+ New: parse every candidate URL in <img> and <source> srcset lists (#326)
|
||||
+ Changed: dropped the obsolete OpenSSL linking exception (OpenSSL 3.0+ is Apache-2.0 and GPL-compatible); httrack is now plain GPLv3-or-later
|
||||
+ Fixed: link libhtsjava and the libtest examples directly against libc
|
||||
+ Fixed: several out-of-bounds reads in the HTML/CSS parser on hostile input (#94, #396)
|
||||
+ Fixed: stored XSS via an unescaped URL in the generated page footer (#165)
|
||||
+ Fixed: hardened buffer copies throughout the engine against overflow
|
||||
+ Fixed: capture conditional CSS @import URLs (#94)
|
||||
+ Fixed: don't crawl xmlns namespace declarations as links (#191)
|
||||
+ Fixed: don't mistake the method argument of XMLHttpRequest.open for a URL (#218)
|
||||
+ Fixed: percent-encode parentheses when rewriting CSS url() targets (#163)
|
||||
+ Fixed: collapse ../ in file:// URLs and widen relative-link handling (#137, #162)
|
||||
+ Fixed: drop the obsolete $Version/$Path attributes from the request Cookie header, per RFC 6265 (#151)
|
||||
+ Fixed: keep empty quoted arguments when reloading doit.log for --update/--continue (#106)
|
||||
+ Fixed: raise the User-Agent and custom-header length limits (#152)
|
||||
+ Fixed: abort on a long log path (lock-file buffer too small) (#183)
|
||||
+ Fixed: race in lazy mutex initialization (#297)
|
||||
+ Fixed: sub-second mtime precision when comparing local files on POSIX (#383)
|
||||
+ Fixed: modernize OpenSSL TLS initialization for the 3.x to 4.x transition (#308)
|
||||
+ Fixed: in-place changes made by the postprocess callback were not applied (Roman Sęk)
|
||||
+ Fixed: "preffered" typo in the help text and man page (yosinn1-blip)
|
||||
+ Fixed: corrections and updates of the Russian translation (German Aizek)
|
||||
+ Fixed: corrections and updates of the Danish translation (scootergrisen)
|
||||
+ Fixed: link libhtsjava and the libtest examples directly against libc
|
||||
+ New: documented the public library API headers and typed the option fields as named enums
|
||||
+ Fixed: numerous build, packaging, CI and test-coverage improvements (out-of-tree builds, sanitizer/distcheck CI, shell and Python linting, AppStream metainfo)
|
||||
|
||||
3.49-7
|
||||
+ Fixed: keep generated config.h architecture-independent (Debian #1133728)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
|
||||
#!/bin/sh
|
||||
|
||||
# Simple indexing test using HTTrack
|
||||
# A "real" script/program would use advanced search, and
|
||||
# A "real" script/program would use advanced search, and
|
||||
# use dichotomy to find the word in the index.txt file
|
||||
# This script is really basic and NOT optimized, and
|
||||
# should not be used for professional purpose :)
|
||||
@@ -11,50 +10,49 @@ TESTSITE="http://localhost/"
|
||||
|
||||
# Create an index if necessary
|
||||
if ! test -f "index.txt"; then
|
||||
echo "Building the index .."
|
||||
rm -rf test
|
||||
httrack --display "$TESTSITE" -%I -O test
|
||||
mv test/index.txt ./
|
||||
echo "Building the index .."
|
||||
rm -rf test
|
||||
httrack --display "$TESTSITE" -%I -O test
|
||||
mv test/index.txt ./
|
||||
fi
|
||||
|
||||
# Convert crlf to lf
|
||||
if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then
|
||||
echo "Converting index to Unix LF style (not CR/LF) .."
|
||||
mv -f index.txt index.txt.old
|
||||
cat index.txt.old|tr -d '\r' > index.txt
|
||||
if test "$(head index.txt -n 1 | tr '\r' '#' | grep -c '#')" = "1"; then
|
||||
echo "Converting index to Unix LF style (not CR/LF) .."
|
||||
mv -f index.txt index.txt.old
|
||||
tr -d '\r' <index.txt.old >index.txt
|
||||
fi
|
||||
|
||||
keyword=-
|
||||
while test -n "$keyword"; do
|
||||
printf "Enter a keyword: "
|
||||
read keyword
|
||||
printf "Enter a keyword: "
|
||||
read -r keyword
|
||||
|
||||
if test -n "$keyword"; then
|
||||
FOUNDK="`grep -niE \"^$keyword\" index.txt`"
|
||||
if test -n "$keyword"; then
|
||||
FOUNDK="$(grep -niE "^$keyword" index.txt)"
|
||||
|
||||
if test -n "$FOUNDK"; then
|
||||
if ! test `echo "$FOUNDK"|wc -l` = "1"; then
|
||||
# Multiple matches
|
||||
printf "Found multiple keywords: "
|
||||
echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' '
|
||||
echo ""
|
||||
echo "Use keyword$ to find only one"
|
||||
else
|
||||
# One match
|
||||
N=`echo "$FOUNDK"|cut -f1 -d':'`
|
||||
PM=`tail +$N index.txt|grep -nE "\("|head -n 1`
|
||||
if ! echo "$PM"|grep "ignored">/dev/null; then
|
||||
M=`echo $PM|cut -f1 -d':'`
|
||||
echo "Found in:"
|
||||
cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' '
|
||||
else
|
||||
echo "keyword ignored (too many hits)"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "not found"
|
||||
fi
|
||||
if test -n "$FOUNDK"; then
|
||||
if ! test "$(echo "$FOUNDK" | wc -l)" = "1"; then
|
||||
# Multiple matches
|
||||
printf "Found multiple keywords: "
|
||||
echo "$FOUNDK" | cut -f2 -d':' | tr '\n' ' '
|
||||
echo ""
|
||||
echo "Use keyword$ to find only one"
|
||||
else
|
||||
# One match
|
||||
N=$(echo "$FOUNDK" | cut -f1 -d':')
|
||||
PM=$(tail "+$N" index.txt | grep -nE "\(" | head -n 1)
|
||||
if ! echo "$PM" | grep "ignored" >/dev/null; then
|
||||
M=$(echo "$PM" | cut -f1 -d':')
|
||||
echo "Found in:"
|
||||
tail "+$N" index.txt | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' '
|
||||
else
|
||||
echo "keyword ignored (too many hits)"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "not found"
|
||||
fi
|
||||
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
137
src/htsarrays.h
137
src/htsarrays.h
@@ -48,112 +48,115 @@ Please visit our Website: http://www.httrack.com
|
||||
/* Abort (with the failed byte count) when a growth allocation fails. The
|
||||
array macros never return an out-of-memory error; they assert and abort. */
|
||||
static void hts_record_assert_memory_failed(const size_t size) {
|
||||
fprintf(stderr, "memory allocation failed (%lu bytes)", \
|
||||
(long int) size); \
|
||||
assertf(! "memory allocation failed"); \
|
||||
fprintf(stderr, "memory allocation failed (%lu bytes)", (long int) size);
|
||||
assertf(!"memory allocation failed");
|
||||
}
|
||||
|
||||
/** Dynamic array of T elements. **/
|
||||
#define TypedArray(T) \
|
||||
struct { \
|
||||
/** Elements. **/ \
|
||||
union { \
|
||||
/** Typed. **/ \
|
||||
T* elts; \
|
||||
/** Opaque. **/ \
|
||||
void* ptr; \
|
||||
} data; \
|
||||
/** Count. **/ \
|
||||
size_t size; \
|
||||
/** Capacity. **/ \
|
||||
size_t capa; \
|
||||
#define TypedArray(T) \
|
||||
struct { \
|
||||
/** Elements. **/ \
|
||||
union { \
|
||||
/** Typed. **/ \
|
||||
T *elts; \
|
||||
/** Opaque. **/ \
|
||||
void *ptr; \
|
||||
} data; \
|
||||
/** Count. **/ \
|
||||
size_t size; \
|
||||
/** Capacity. **/ \
|
||||
size_t capa; \
|
||||
}
|
||||
|
||||
/** Initializer for an empty array (no backing store, size and capacity 0). **/
|
||||
#define EMPTY_TYPED_ARRAY { { NULL }, 0, 0 }
|
||||
#define EMPTY_TYPED_ARRAY {{NULL}, 0, 0}
|
||||
|
||||
/** Array size, in elements. **/
|
||||
#define TypedArraySize(A) ((A).size)
|
||||
#define TypedArraySize(A) ((A).size)
|
||||
|
||||
/** Array capacity, in elements. **/
|
||||
#define TypedArrayCapa(A) ((A).capa)
|
||||
#define TypedArrayCapa(A) ((A).capa)
|
||||
|
||||
/**
|
||||
* Remaining free space, in elements.
|
||||
* Remaining free space, in elements.
|
||||
* Macro, first element evaluated multiple times.
|
||||
**/
|
||||
#define TypedArrayRoom(A) ( TypedArrayCapa(A) - TypedArraySize(A) )
|
||||
#define TypedArrayRoom(A) (TypedArrayCapa(A) - TypedArraySize(A))
|
||||
|
||||
/** Array elements, of type T*. **/
|
||||
#define TypedArrayElts(A) ((A).data.elts)
|
||||
#define TypedArrayElts(A) ((A).data.elts)
|
||||
|
||||
/** Array pointer, of type void*. **/
|
||||
#define TypedArrayPtr(A) ((A).data.ptr)
|
||||
#define TypedArrayPtr(A) ((A).data.ptr)
|
||||
|
||||
/** Size of T. **/
|
||||
#define TypedArrayWidth(A) (sizeof(*TypedArrayElts(A)))
|
||||
#define TypedArrayWidth(A) (sizeof(*TypedArrayElts(A)))
|
||||
|
||||
/** Nth element of the array, as an lvalue. No bounds check; N must be
|
||||
< TypedArraySize(A). **/
|
||||
#define TypedArrayNth(A, N) (TypedArrayElts(A)[N])
|
||||
|
||||
/**
|
||||
* Tail of the array (outside the array).
|
||||
* Tail of the array (outside the array).
|
||||
* The returned pointer points to the beginning of TypedArrayRoom(A)
|
||||
* free elements.
|
||||
**/
|
||||
#define TypedArrayTail(A) (TypedArrayNth(A, TypedArraySize(A)))
|
||||
|
||||
/**
|
||||
* Ensure at least 'ROOM' elements can be put in the remaining space.
|
||||
* After a call to this macro, TypedArrayRoom(A) is guaranteed to be at
|
||||
* least equal to 'ROOM'.
|
||||
**/
|
||||
#define TypedArrayEnsureRoom(A, ROOM) do { \
|
||||
const size_t room_ = (ROOM); \
|
||||
while (TypedArrayRoom(A) < room_) { \
|
||||
TypedArrayCapa(A) = TypedArrayCapa(A) < 16 ? 16 : TypedArrayCapa(A) * 2; \
|
||||
} \
|
||||
TypedArrayPtr(A) = realloc(TypedArrayPtr(A), \
|
||||
TypedArrayCapa(A)*TypedArrayWidth(A)); \
|
||||
if (TypedArrayPtr(A) == NULL) { \
|
||||
hts_record_assert_memory_failed(TypedArrayCapa(A)*TypedArrayWidth(A)); \
|
||||
} \
|
||||
} while(0)
|
||||
* Ensure at least 'ROOM' elements can be put in the remaining space.
|
||||
* After a call to this macro, TypedArrayRoom(A) is guaranteed to be at
|
||||
* least equal to 'ROOM'.
|
||||
**/
|
||||
#define TypedArrayEnsureRoom(A, ROOM) \
|
||||
do { \
|
||||
const size_t room_ = (ROOM); \
|
||||
while (TypedArrayRoom(A) < room_) { \
|
||||
TypedArrayCapa(A) = TypedArrayCapa(A) < 16 ? 16 : TypedArrayCapa(A) * 2; \
|
||||
} \
|
||||
TypedArrayPtr(A) = \
|
||||
realloc(TypedArrayPtr(A), TypedArrayCapa(A) * TypedArrayWidth(A)); \
|
||||
if (TypedArrayPtr(A) == NULL) { \
|
||||
hts_record_assert_memory_failed(TypedArrayCapa(A) * TypedArrayWidth(A)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Add an element. Macro, first element evaluated multiple times. **/
|
||||
#define TypedArrayAdd(A, E) do { \
|
||||
TypedArrayEnsureRoom(A, 1); \
|
||||
assertf(TypedArraySize(A) < TypedArrayCapa(A)); \
|
||||
TypedArrayTail(A) = (E); \
|
||||
TypedArraySize(A)++; \
|
||||
} while(0)
|
||||
#define TypedArrayAdd(A, E) \
|
||||
do { \
|
||||
TypedArrayEnsureRoom(A, 1); \
|
||||
assertf(TypedArraySize(A) < TypedArrayCapa(A)); \
|
||||
TypedArrayTail(A) = (E); \
|
||||
TypedArraySize(A)++; \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Add 'COUNT' elements from 'PTR'.
|
||||
* Add 'COUNT' elements from 'PTR'.
|
||||
* Macro, first element evaluated multiple times.
|
||||
**/
|
||||
#define TypedArrayAppend(A, PTR, COUNT) do { \
|
||||
const size_t count_ = (COUNT); \
|
||||
/* This 1-case is to benefit from type safety. */ \
|
||||
if (count_ == 1) { \
|
||||
TypedArrayAdd(A, *(PTR)); \
|
||||
} else { \
|
||||
const void *const source_ = (PTR); \
|
||||
TypedArrayEnsureRoom(A, count_); \
|
||||
assertf(count_ <= TypedArrayRoom(A)); \
|
||||
memcpy(&TypedArrayTail(A), source_, count_ * TypedArrayWidth(A)); \
|
||||
TypedArraySize(A) += count_; \
|
||||
} \
|
||||
} while(0)
|
||||
#define TypedArrayAppend(A, PTR, COUNT) \
|
||||
do { \
|
||||
const size_t count_ = (COUNT); \
|
||||
/* This 1-case is to benefit from type safety. */ \
|
||||
if (count_ == 1) { \
|
||||
TypedArrayAdd(A, *(PTR)); \
|
||||
} else { \
|
||||
const void *const source_ = (PTR); \
|
||||
TypedArrayEnsureRoom(A, count_); \
|
||||
assertf(count_ <= TypedArrayRoom(A)); \
|
||||
memcpy(&TypedArrayTail(A), source_, count_ *TypedArrayWidth(A)); \
|
||||
TypedArraySize(A) += count_; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Clear an array, freeing memory and clearing size and capacity. **/
|
||||
#define TypedArrayFree(A) do { \
|
||||
if (TypedArrayPtr(A) != NULL) { \
|
||||
TypedArrayCapa(A) = TypedArraySize(A) = 0; \
|
||||
free(TypedArrayPtr(A)); \
|
||||
TypedArrayPtr(A) = NULL; \
|
||||
} \
|
||||
} while(0)
|
||||
#define TypedArrayFree(A) \
|
||||
do { \
|
||||
if (TypedArrayPtr(A) != NULL) { \
|
||||
TypedArrayCapa(A) = TypedArraySize(A) = 0; \
|
||||
free(TypedArrayPtr(A)); \
|
||||
TypedArrayPtr(A) = NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -41,7 +41,7 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#if HTS_INET6==0
|
||||
#if HTS_INET6 == 0
|
||||
#include <winsock2.h>
|
||||
#else
|
||||
|
||||
@@ -49,13 +49,14 @@ Please visit our Website: http://www.httrack.com
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
// KB955045 (http://support.microsoft.com/kb/955045)
|
||||
// To execute an application using this function on earlier versions of Windows
|
||||
// (Windows 2000, Windows NT, and Windows Me/98/95), then it is mandatary to #include Ws2tcpip.h
|
||||
// and also Wspiapi.h. When the Wspiapi.h header file is included, the 'getaddrinfo' function is
|
||||
// #defined to the 'WspiapiGetAddrInfo' inline function in Wspiapi.h.
|
||||
// (Windows 2000, Windows NT, and Windows Me/98/95), then it is mandatary to
|
||||
// #include Ws2tcpip.h and also Wspiapi.h. When the Wspiapi.h header file is
|
||||
// included, the 'getaddrinfo' function is #defined to the 'WspiapiGetAddrInfo'
|
||||
// inline function in Wspiapi.h.
|
||||
#include <ws2tcpip.h>
|
||||
#include <Wspiapi.h>
|
||||
//#include <winsock2.h>
|
||||
//#include <tpipv6.h>
|
||||
// #include <winsock2.h>
|
||||
// #include <tpipv6.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -3,57 +3,59 @@
|
||||
|
||||
# Change this to download files
|
||||
if false; then
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8*.TXT" | lftp
|
||||
rm -f CP932.TXT CP936.TXT CP949.TXT CP950.TXT
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/CP*.TXT" | lftp
|
||||
echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8*.TXT" | lftp
|
||||
rm -f CP932.TXT CP936.TXT CP949.TXT CP950.TXT
|
||||
fi
|
||||
|
||||
# Produce code
|
||||
printf "/** GENERATED FILE ($0), DO NOT EDIT **/\n\n"
|
||||
for i in *.TXT ; do
|
||||
echo "processing $i" >&2
|
||||
grep -vE "^(#|$)" $i | grep -E "^0x" | sed -e 's/[[:space:]]/ /g' | cut -f1,2 -d' ' | \
|
||||
(
|
||||
unset arr
|
||||
while read LINE ; do
|
||||
from=$[$(echo $LINE | cut -f1 -d' ')]
|
||||
if ! test -n "$from"; then
|
||||
echo "error with $i" >&2
|
||||
exit 1
|
||||
elif test $from -ge 256; then
|
||||
echo "out-of-range ($LINE) with $i" >&2
|
||||
exit 1
|
||||
fi
|
||||
to=$(echo $LINE | cut -f2 -d' ')
|
||||
arr[$from]=$to
|
||||
done
|
||||
name=$(echo $i | tr 'A-Z' 'a-z' | tr '-' '_' | sed -e 's/\.txt//' -e 's/8859/iso_8859/')
|
||||
printf "/* Table for $i */\nstatic const hts_UCS4 table_${name}[256] = {\n "
|
||||
i=0
|
||||
while test "$i" -lt 256; do
|
||||
if test "$i" -gt 0; then
|
||||
printf ", "
|
||||
if test $[${i}%8] -eq 0; then
|
||||
printf "\n "
|
||||
fi
|
||||
fi
|
||||
value=${arr[$i]:-0}
|
||||
printf "0x%04x" $value
|
||||
i=$[${i}+1]
|
||||
done
|
||||
printf " };\n\n"
|
||||
)
|
||||
echo "processed $i" >&2
|
||||
printf '/** GENERATED FILE (%s), DO NOT EDIT **/\n\n' "$0"
|
||||
for i in *.TXT; do
|
||||
echo "processing $i" >&2
|
||||
grep -vE "^(#|$)" "$i" | grep -E "^0x" | sed -e 's/[[:space:]]/ /g' | cut -f1,2 -d' ' |
|
||||
(
|
||||
unset arr
|
||||
while read -r LINE; do
|
||||
from=$(($(echo "$LINE" | cut -f1 -d' ')))
|
||||
if ! test -n "$from"; then
|
||||
echo "error with $i" >&2
|
||||
exit 1
|
||||
elif test $from -ge 256; then
|
||||
echo "out-of-range ($LINE) with $i" >&2
|
||||
exit 1
|
||||
fi
|
||||
to=$(echo "$LINE" | cut -f2 -d' ')
|
||||
arr[from]=$to
|
||||
done
|
||||
# shellcheck disable=SC2018,SC2019 # charset filenames are ASCII; keep C-locale A-Z/a-z
|
||||
name=$(echo "$i" | tr 'A-Z' 'a-z' | tr '-' '_' | sed -e 's/\.txt//' -e 's/8859/iso_8859/')
|
||||
printf '/* Table for %s */\nstatic const hts_UCS4 table_%s[256] = {\n ' "$i" "$name"
|
||||
idx=0
|
||||
while test "$idx" -lt 256; do
|
||||
if test "$idx" -gt 0; then
|
||||
printf ", "
|
||||
if test $((idx % 8)) -eq 0; then
|
||||
printf "\n "
|
||||
fi
|
||||
fi
|
||||
value=${arr[$idx]:-0}
|
||||
printf "0x%04x" "$value"
|
||||
idx=$((idx + 1))
|
||||
done
|
||||
printf " };\n\n"
|
||||
)
|
||||
echo "processed $i" >&2
|
||||
done
|
||||
|
||||
# Indexes
|
||||
printf "static const struct {\n const char *name;\n const hts_UCS4 *table;\n} table_mappings[] = {\n"
|
||||
for i in *.TXT ; do
|
||||
name=$(echo $i | tr 'A-Z' 'a-z' | tr '-' '_' | sed -e 's/\.txt//' -e 's/8859/iso_8859/')
|
||||
printf " { \"$(echo $name | tr -d '_')\", table_${name} },\n"
|
||||
for i in *.TXT; do
|
||||
# shellcheck disable=SC2018,SC2019 # charset filenames are ASCII; keep C-locale A-Z/a-z
|
||||
name=$(echo "$i" | tr 'A-Z' 'a-z' | tr '-' '_' | sed -e 's/\.txt//' -e 's/8859/iso_8859/')
|
||||
printf ' { "%s", table_%s },\n' "$(echo "$name" | tr -d '_')" "$name"
|
||||
done
|
||||
printf " { NULL, NULL }\n};\n"
|
||||
|
||||
@@ -68,14 +68,15 @@ struct t_cookie {
|
||||
#ifdef HTS_INTERNAL_BYTECODE
|
||||
|
||||
/* cookies */
|
||||
int cookie_add(t_cookie * cookie, const char *cook_name, const char *cook_value,
|
||||
const char *domain, const char *path);
|
||||
int cookie_add(t_cookie *cookie, const char *cook_name, const char *cook_value,
|
||||
const char *domain, const char *path);
|
||||
|
||||
int cookie_del(t_cookie * cookie, const char *cook_name, const char *domain, const char *path);
|
||||
int cookie_del(t_cookie *cookie, const char *cook_name, const char *domain,
|
||||
const char *path);
|
||||
|
||||
int cookie_load(t_cookie * cookie, const char *path, const char *name);
|
||||
int cookie_load(t_cookie *cookie, const char *path, const char *name);
|
||||
|
||||
int cookie_save(t_cookie * cookie, const char *name);
|
||||
int cookie_save(t_cookie *cookie, const char *name);
|
||||
|
||||
void cookie_insert(char *s, size_t s_size, const char *ins);
|
||||
|
||||
@@ -83,7 +84,8 @@ void cookie_delete(char *s, size_t s_size, size_t pos);
|
||||
|
||||
const char *cookie_get(char *buffer, const char *cookie_base, int param);
|
||||
|
||||
char *cookie_find(char *s, const char *cook_name, const char *domain, const char *path);
|
||||
char *cookie_find(char *s, const char *cook_name, const char *domain,
|
||||
const char *path);
|
||||
|
||||
char *cookie_nextfield(char *a);
|
||||
|
||||
@@ -92,12 +94,13 @@ char *cookie_nextfield(char *a);
|
||||
/** Register credentials (auth = base-64 user:pass) for the prefix derived from
|
||||
adr (host) and fil (path). No-op returning 0 if cookie is NULL, allocation
|
||||
fails, or a matching prefix is already stored; returns 1 on insertion. */
|
||||
int bauth_add(t_cookie * cookie, const char *adr, const char *fil, const char *auth);
|
||||
int bauth_add(t_cookie *cookie, const char *adr, const char *fil,
|
||||
const char *auth);
|
||||
|
||||
/** Return the stored base-64 credentials whose prefix matches adr+fil, or NULL
|
||||
if none (or cookie is NULL). Returned pointer aliases the jar's bauth_chain;
|
||||
caller must not free it. */
|
||||
char *bauth_check(t_cookie * cookie, const char *adr, const char *fil);
|
||||
char *bauth_check(t_cookie *cookie, const char *adr, const char *fil);
|
||||
|
||||
/** Build the auth lookup key (host + path, query string stripped, truncated at
|
||||
the last '/') from adr and fil into prefix; returns prefix. Caller must
|
||||
|
||||
@@ -52,12 +52,12 @@ Please visit our Website: http://www.httrack.com
|
||||
#define DEFAULT_FTP "index.txt"
|
||||
|
||||
// extension par défaut pour fichiers n'en ayant pas
|
||||
#define DEFAULT_EXT ".html"
|
||||
#define DEFAULT_EXT ".html"
|
||||
#define DEFAULT_EXT_SHORT ".htm"
|
||||
//#define DEFAULT_BIN_EXT ".bin"
|
||||
//#define DEFAULT_BIN_EXT_SHORT ".bin"
|
||||
//#define DEFAULT_EXT ".txt"
|
||||
//#define DEFAULT_EXT_SHORT ".txt"
|
||||
// #define DEFAULT_BIN_EXT ".bin"
|
||||
// #define DEFAULT_BIN_EXT_SHORT ".bin"
|
||||
// #define DEFAULT_EXT ".txt"
|
||||
// #define DEFAULT_EXT_SHORT ".txt"
|
||||
|
||||
// éviter les /nul, /con..
|
||||
#define HTS_OVERRIDE_DOS_FOLDERS 1
|
||||
@@ -87,7 +87,8 @@ Please visit our Website: http://www.httrack.com
|
||||
// fast cache (build hash table)
|
||||
#define HTS_FAST_CACHE 1
|
||||
|
||||
// le > peut être considéré comme un tag de fermeture de commentaire (<!-- > est valide)
|
||||
// le > peut être considéré comme un tag de fermeture de commentaire (<!-- > est
|
||||
// valide)
|
||||
#define GT_ENDS_COMMENT 1
|
||||
|
||||
// always adds a '/' at the end if a '~' is encountered (/~smith -> /~smith/)
|
||||
@@ -97,7 +98,8 @@ Please visit our Website: http://www.httrack.com
|
||||
#define HTS_STRIP_DOUBLE_SLASH 0
|
||||
|
||||
// case-sensitive pour les dossiers et fichiers (0/1)
|
||||
// [normalement 1, mais pose des problèmes (url malformée par exemple) et n'est pas très utile..
|
||||
// [normalement 1, mais pose des problèmes (url malformée par exemple) et n'est
|
||||
// pas très utile..
|
||||
// ..et pas bcp respecté]
|
||||
// REMOVED
|
||||
// #define HTS_CASSE 0
|
||||
|
||||
@@ -2787,6 +2787,47 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case 'l': /* lienrelatif: relative link from curr_fil to link */
|
||||
if (na + 2 >= argc) {
|
||||
HTS_PANIC_PRINTF(
|
||||
"Option #l needs a link and a current-file path");
|
||||
printf(
|
||||
"Example: '-#l' 'host/dir/img.gif' 'host/dir/p.html'\n");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
} else {
|
||||
char s[HTS_URLMAXSIZE * 2];
|
||||
|
||||
if (lienrelatif(s, sizeof(s), argv[na + 1], argv[na + 2]) ==
|
||||
0)
|
||||
printf("relative=%s\n", s);
|
||||
else
|
||||
printf("relative=<ERROR>\n");
|
||||
htsmain_free();
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case 'i': /* ident_url_relatif: resolve a link -> adr/fil */
|
||||
if (na + 3 >= argc) {
|
||||
HTS_PANIC_PRINTF(
|
||||
"Option #i needs a link, an origin address and file");
|
||||
printf("Example: '-#i' '../img.gif' 'www.foo.com' "
|
||||
"'/d/p.html'\n");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
} else {
|
||||
lien_adrfil af;
|
||||
const int r = ident_url_relatif(argv[na + 1], argv[na + 2],
|
||||
argv[na + 3], &af);
|
||||
|
||||
if (r == 0)
|
||||
printf("adr=%s fil=%s\n", af.adr, af.fil);
|
||||
else
|
||||
printf("error=%d\n", r);
|
||||
htsmain_free();
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case '2': // mimedefs
|
||||
if (na + 1 >= argc) {
|
||||
HTS_PANIC_PRINTF("Option #2 needs to be followed by an URL");
|
||||
|
||||
164
src/htsdefines.h
164
src/htsdefines.h
@@ -82,9 +82,9 @@ typedef struct t_hts_callbackarg t_hts_callbackarg;
|
||||
/* Entry points of a --wrapper plug-in: hts_plug(opt, argv) is called once to
|
||||
install the wrapper (argv is the wrapper's argument string), hts_unplug(opt)
|
||||
once to tear it down. Both return non-zero on success. */
|
||||
typedef int (*t_hts_plug) (httrackp * opt, const char *argv);
|
||||
typedef int (*t_hts_plug)(httrackp *opt, const char *argv);
|
||||
|
||||
typedef int (*t_hts_unplug) (httrackp * opt);
|
||||
typedef int (*t_hts_unplug)(httrackp *opt);
|
||||
|
||||
/* Engine callback prototypes. Each is one hook the engine fires at a defined
|
||||
point of a mirror; a wrapper installs the ones it cares about in the
|
||||
@@ -92,27 +92,27 @@ typedef int (*t_hts_unplug) (httrackp * opt);
|
||||
returns are 1 to continue/accept, 0 to abort/refuse unless noted. */
|
||||
|
||||
/* Called once when the wrapper is installed; allocate per-run state here. */
|
||||
typedef void (*t_hts_htmlcheck_init) (t_hts_callbackarg * carg);
|
||||
typedef void (*t_hts_htmlcheck_init)(t_hts_callbackarg *carg);
|
||||
|
||||
/* Called once when the wrapper is removed; release per-run state here. */
|
||||
typedef void (*t_hts_htmlcheck_uninit) (t_hts_callbackarg * carg);
|
||||
typedef void (*t_hts_htmlcheck_uninit)(t_hts_callbackarg *carg);
|
||||
|
||||
/* Fired at the start of a mirror, after options are parsed. */
|
||||
typedef int (*t_hts_htmlcheck_start) (t_hts_callbackarg * carg, httrackp * opt);
|
||||
typedef int (*t_hts_htmlcheck_start)(t_hts_callbackarg *carg, httrackp *opt);
|
||||
|
||||
/* Fired at the end of a mirror. */
|
||||
typedef int (*t_hts_htmlcheck_end) (t_hts_callbackarg * carg, httrackp * opt);
|
||||
typedef int (*t_hts_htmlcheck_end)(t_hts_callbackarg *carg, httrackp *opt);
|
||||
|
||||
/* Fired while options are being changed, to validate or adjust them. */
|
||||
typedef int (*t_hts_htmlcheck_chopt) (t_hts_callbackarg * carg, httrackp * opt);
|
||||
typedef int (*t_hts_htmlcheck_chopt)(t_hts_callbackarg *carg, httrackp *opt);
|
||||
|
||||
/* Rewrite hook over an in-memory page: the html and len arguments point at the
|
||||
buffer and its length (the callback may reallocate and resize it),
|
||||
url_adresse and url_fichier name it. */
|
||||
typedef int (*t_hts_htmlcheck_process) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, char **html, int *len,
|
||||
const char *url_adresse,
|
||||
const char *url_fichier);
|
||||
typedef int (*t_hts_htmlcheck_process)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
char **html, int *len,
|
||||
const char *url_adresse,
|
||||
const char *url_fichier);
|
||||
|
||||
/* Same shape as process, run before HTML parsing. */
|
||||
typedef t_hts_htmlcheck_process t_hts_htmlcheck_preprocess;
|
||||
@@ -121,113 +121,111 @@ typedef t_hts_htmlcheck_process t_hts_htmlcheck_preprocess;
|
||||
typedef t_hts_htmlcheck_process t_hts_htmlcheck_postprocess;
|
||||
|
||||
/* Inspect a page (read-only html/len) without rewriting it. */
|
||||
typedef int (*t_hts_htmlcheck_check_html) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, char *html, int len,
|
||||
const char *url_adresse,
|
||||
const char *url_fichier);
|
||||
typedef int (*t_hts_htmlcheck_check_html)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, char *html, int len,
|
||||
const char *url_adresse,
|
||||
const char *url_fichier);
|
||||
|
||||
/* Answer an engine query identified by 'question'; returns the answer string
|
||||
(owned by the callback, must stay valid until the next call). */
|
||||
typedef const char *(*t_hts_htmlcheck_query) (t_hts_callbackarg * carg,
|
||||
httrackp * opt,
|
||||
const char *question);
|
||||
typedef const char *(*t_hts_htmlcheck_query)(t_hts_callbackarg *carg,
|
||||
httrackp *opt,
|
||||
const char *question);
|
||||
|
||||
/* Second query channel, same contract as query. */
|
||||
typedef const char *(*t_hts_htmlcheck_query2) (t_hts_callbackarg * carg,
|
||||
httrackp * opt,
|
||||
const char *question);
|
||||
typedef const char *(*t_hts_htmlcheck_query2)(t_hts_callbackarg *carg,
|
||||
httrackp *opt,
|
||||
const char *question);
|
||||
|
||||
/* Third query channel, same contract as query. */
|
||||
typedef const char *(*t_hts_htmlcheck_query3) (t_hts_callbackarg * carg,
|
||||
httrackp * opt,
|
||||
const char *question);
|
||||
typedef const char *(*t_hts_htmlcheck_query3)(t_hts_callbackarg *carg,
|
||||
httrackp *opt,
|
||||
const char *question);
|
||||
|
||||
/* Per-tick progress hook: 'back' is the transfer slot array of 'back_max'
|
||||
entries, back_index the active one; lien_tot/lien_ntot and stats report
|
||||
queue size and running totals, stat_time the elapsed time. */
|
||||
typedef int (*t_hts_htmlcheck_loop) (t_hts_callbackarg * carg, httrackp * opt,
|
||||
lien_back * back, int back_max,
|
||||
int back_index, int lien_tot,
|
||||
int lien_ntot, int stat_time,
|
||||
hts_stat_struct * stats);
|
||||
typedef int (*t_hts_htmlcheck_loop)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
lien_back *back, int back_max,
|
||||
int back_index, int lien_tot, int lien_ntot,
|
||||
int stat_time, hts_stat_struct *stats);
|
||||
|
||||
/* Veto a link (adr host, fil path) after its transfer; status is the result.
|
||||
Return 0 to drop the link. */
|
||||
typedef int (*t_hts_htmlcheck_check_link) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, const char *adr,
|
||||
const char *fil, int status);
|
||||
typedef int (*t_hts_htmlcheck_check_link)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, const char *adr,
|
||||
const char *fil, int status);
|
||||
|
||||
/* Veto a link by its MIME type before download; return 0 to skip it. */
|
||||
typedef int (*t_hts_htmlcheck_check_mime) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, const char *adr,
|
||||
const char *fil, const char *mime,
|
||||
int status);
|
||||
typedef int (*t_hts_htmlcheck_check_mime)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, const char *adr,
|
||||
const char *fil, const char *mime,
|
||||
int status);
|
||||
|
||||
/* Fired when the mirror pauses, waiting on 'lockfile' to be removed. */
|
||||
typedef void (*t_hts_htmlcheck_pause) (t_hts_callbackarg * carg, httrackp * opt,
|
||||
const char *lockfile);
|
||||
typedef void (*t_hts_htmlcheck_pause)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
const char *lockfile);
|
||||
|
||||
/* Fired after a file is written to disk; 'file' is the local path. */
|
||||
typedef void (*t_hts_htmlcheck_filesave) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, const char *file);
|
||||
typedef void (*t_hts_htmlcheck_filesave)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
const char *file);
|
||||
|
||||
/* Richer file-saved notification: source host/filename, local path, and flags
|
||||
telling whether the file is new, modified, or left unchanged. */
|
||||
typedef void (*t_hts_htmlcheck_filesave2) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, const char *hostname,
|
||||
const char *filename,
|
||||
const char *localfile, int is_new,
|
||||
int is_modified, int not_updated);
|
||||
typedef void (*t_hts_htmlcheck_filesave2)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, const char *hostname,
|
||||
const char *filename,
|
||||
const char *localfile, int is_new,
|
||||
int is_modified, int not_updated);
|
||||
|
||||
/* Fired for each link parsed out of a page; 'link' may be edited in place. */
|
||||
typedef int (*t_hts_htmlcheck_linkdetected) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, char *link);
|
||||
typedef int (*t_hts_htmlcheck_linkdetected)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, char *link);
|
||||
|
||||
/* As linkdetected, plus tag_start, the markup the link was found in. */
|
||||
typedef int (*t_hts_htmlcheck_linkdetected2) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, char *link,
|
||||
const char *tag_start);
|
||||
typedef int (*t_hts_htmlcheck_linkdetected2)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, char *link,
|
||||
const char *tag_start);
|
||||
|
||||
/* Fired on each transfer-status change of slot 'back'. */
|
||||
typedef int (*t_hts_htmlcheck_xfrstatus) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, lien_back * back);
|
||||
typedef int (*t_hts_htmlcheck_xfrstatus)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
lien_back *back);
|
||||
|
||||
/* Choose the local save path for a URL; write it into 'save'. adr/fil name the
|
||||
target, referer_adr/referer_fil the page that linked it. */
|
||||
typedef int (*t_hts_htmlcheck_savename) (t_hts_callbackarg * carg,
|
||||
httrackp * opt,
|
||||
const char *adr_complete,
|
||||
const char *fil_complete,
|
||||
const char *referer_adr,
|
||||
const char *referer_fil, char *save);
|
||||
typedef int (*t_hts_htmlcheck_savename)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
const char *adr_complete,
|
||||
const char *fil_complete,
|
||||
const char *referer_adr,
|
||||
const char *referer_fil, char *save);
|
||||
|
||||
/* Extended save-name hook, same signature as savename. */
|
||||
typedef t_hts_htmlcheck_savename t_hts_htmlcheck_extsavename;
|
||||
|
||||
/* Inspect or edit the outgoing request headers in 'buff' before they are sent.
|
||||
*/
|
||||
typedef int (*t_hts_htmlcheck_sendhead) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, char *buff,
|
||||
const char *adr, const char *fil,
|
||||
const char *referer_adr,
|
||||
const char *referer_fil,
|
||||
htsblk * outgoing);
|
||||
typedef int (*t_hts_htmlcheck_sendhead)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
char *buff, const char *adr,
|
||||
const char *fil,
|
||||
const char *referer_adr,
|
||||
const char *referer_fil,
|
||||
htsblk *outgoing);
|
||||
|
||||
/* Inspect the incoming response headers in 'buff' after they are received. */
|
||||
typedef int (*t_hts_htmlcheck_receivehead) (t_hts_callbackarg * carg,
|
||||
httrackp * opt, char *buff,
|
||||
const char *adr, const char *fil,
|
||||
const char *referer_adr,
|
||||
const char *referer_fil,
|
||||
htsblk * incoming);
|
||||
typedef int (*t_hts_htmlcheck_receivehead)(t_hts_callbackarg *carg,
|
||||
httrackp *opt, char *buff,
|
||||
const char *adr, const char *fil,
|
||||
const char *referer_adr,
|
||||
const char *referer_fil,
|
||||
htsblk *incoming);
|
||||
|
||||
/* External parser module hooks: detect claims a document type (return 1 to
|
||||
take it), parse then extracts its links. 'str' carries the document. */
|
||||
typedef int (*t_hts_htmlcheck_detect) (t_hts_callbackarg * carg, httrackp * opt,
|
||||
htsmoduleStruct * str);
|
||||
typedef int (*t_hts_htmlcheck_detect)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
htsmoduleStruct *str);
|
||||
|
||||
typedef int (*t_hts_htmlcheck_parse) (t_hts_callbackarg * carg, httrackp * opt,
|
||||
htsmoduleStruct * str);
|
||||
typedef int (*t_hts_htmlcheck_parse)(t_hts_callbackarg *carg, httrackp *opt,
|
||||
htsmoduleStruct *str);
|
||||
|
||||
/* Callbacks */
|
||||
#ifndef HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks
|
||||
@@ -237,10 +235,10 @@ typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks;
|
||||
|
||||
/* Declares one named callback slot: its function pointer (typed
|
||||
t_hts_htmlcheck_<NAME>) paired with the carg passed to it. */
|
||||
#define DEFCALLBACK(NAME) \
|
||||
struct NAME { \
|
||||
t_hts_htmlcheck_ ##NAME fun; \
|
||||
t_hts_callbackarg *carg; \
|
||||
#define DEFCALLBACK(NAME) \
|
||||
struct NAME { \
|
||||
t_hts_htmlcheck_##NAME fun; \
|
||||
t_hts_callbackarg *carg; \
|
||||
} NAME
|
||||
|
||||
/* Generic, type-erased callback slot used where the hook type is opaque. */
|
||||
@@ -324,18 +322,18 @@ extern const t_hts_htmlcheck_callbacks default_callbacks;
|
||||
/* Internal helpers for building an HTTP request/response into the engine's
|
||||
scratch buffer (opt->state.HTbuff): START resets it, PRINT appends; the
|
||||
PANIC variant records a fatal error message. */
|
||||
#define HT_PRINT(A) strcatbuff(opt->state.HTbuff,A);
|
||||
#define HT_PRINT(A) strcatbuff(opt->state.HTbuff, A);
|
||||
|
||||
#define HT_REQUEST_START opt->state.HTbuff[0]='\0';
|
||||
#define HT_REQUEST_START opt->state.HTbuff[0] = '\0';
|
||||
|
||||
#define HT_REQUEST_END
|
||||
#define HTT_REQUEST_START opt->state.HTbuff[0]='\0';
|
||||
#define HTT_REQUEST_START opt->state.HTbuff[0] = '\0';
|
||||
|
||||
#define HTT_REQUEST_END
|
||||
#define HTS_REQUEST_START opt->state.HTbuff[0]='\0';
|
||||
#define HTS_REQUEST_START opt->state.HTbuff[0] = '\0';
|
||||
|
||||
#define HTS_REQUEST_END
|
||||
#define HTS_PANIC_PRINTF(S) strcpybuff(opt->state._hts_errmsg,S);
|
||||
#define HTS_PANIC_PRINTF(S) strcpybuff(opt->state._hts_errmsg, S);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -33,43 +33,43 @@ EOF
|
||||
else
|
||||
GET "${url}"
|
||||
fi
|
||||
) \
|
||||
| grep -E '^<!ENTITY [a-zA-Z0-9_]' \
|
||||
| sed \
|
||||
-e 's/<!ENTITY //' -e "s/[[:space:]][[:space:]]*/ /g" \
|
||||
-e 's/-->$//' \
|
||||
-e 's/\([^ ]*\) CDATA "&#\([^\"]*\);" -- \(.*\)/\1 \2 \3/'\
|
||||
| ( \
|
||||
read A
|
||||
while test -n "$A"; do
|
||||
ent="${A%% *}"
|
||||
code=$(echo "$A"|cut -f2 -d' ')
|
||||
# compute hash
|
||||
hash=0
|
||||
i=0
|
||||
a=1664525
|
||||
c=1013904223
|
||||
m="$[1 << 32]"
|
||||
while test "$i" -lt ${#ent}; do
|
||||
d="$(echo -n "${ent:${i}:1}"|hexdump -v -e '/1 "%d"')"
|
||||
hash="$[((${hash}*${a})%(${m})+${d}+${c})%(${m})]"
|
||||
i=$[${i}+1]
|
||||
done
|
||||
echo -e " /* $A */"
|
||||
echo -e " case ${hash}u:"
|
||||
echo -e " if (len == ${#ent} /* && strncmp(ent, \"${ent}\") == 0 */) {"
|
||||
echo -e " return ${code};"
|
||||
echo -e " }"
|
||||
echo -e " break;"
|
||||
) |
|
||||
grep -E '^<!ENTITY [a-zA-Z0-9_]' |
|
||||
sed \
|
||||
-e 's/<!ENTITY //' -e "s/[[:space:]][[:space:]]*/ /g" \
|
||||
-e 's/-->$//' \
|
||||
-e 's/\([^ ]*\) CDATA "&#\([^\"]*\);" -- \(.*\)/\1 \2 \3/' |
|
||||
(
|
||||
read -r A
|
||||
while test -n "$A"; do
|
||||
ent="${A%% *}"
|
||||
code=$(echo "$A" | cut -f2 -d' ')
|
||||
# compute hash
|
||||
hash=0
|
||||
i=0
|
||||
a=1664525
|
||||
c=1013904223
|
||||
m="$((1 << 32))"
|
||||
while test "$i" -lt ${#ent}; do
|
||||
d="$(echo -n "${ent:${i}:1}" | hexdump -v -e '/1 "%d"')"
|
||||
hash="$((((hash * a) % (m) + d + c) % (m)))"
|
||||
i=$((i + 1))
|
||||
done
|
||||
echo -e " /* $A */"
|
||||
echo -e " case ${hash}u:"
|
||||
echo -e " if (len == ${#ent} /* && strncmp(ent, \"${ent}\") == 0 */) {"
|
||||
echo -e " return ${code};"
|
||||
echo -e " }"
|
||||
echo -e " break;"
|
||||
|
||||
# next
|
||||
read A
|
||||
done
|
||||
)
|
||||
# next
|
||||
read -r A
|
||||
done
|
||||
)
|
||||
cat <<EOF
|
||||
}
|
||||
/* unknown */
|
||||
return -1;
|
||||
}
|
||||
EOF
|
||||
) > ${dest}
|
||||
) >${dest}
|
||||
|
||||
@@ -43,10 +43,10 @@ Please visit our Website: http://www.httrack.com
|
||||
configure.ac, decoupled from these). VERSION is the display form, VERSIONID
|
||||
the dotted numeric form, AFF_VERSION the short form shown in footers,
|
||||
LIB_VERSION the data/cache format generation. */
|
||||
#define HTTRACK_VERSION "3.49-8"
|
||||
#define HTTRACK_VERSIONID "3.49.8"
|
||||
#define HTTRACK_AFF_VERSION "3.x"
|
||||
#define HTTRACK_LIB_VERSION "2.0"
|
||||
#define HTTRACK_VERSION "3.49-8"
|
||||
#define HTTRACK_VERSIONID "3.49.8"
|
||||
#define HTTRACK_AFF_VERSION "3.x"
|
||||
#define HTTRACK_LIB_VERSION "2.0"
|
||||
|
||||
#ifndef HTS_NOINCLUDES
|
||||
#include <stdio.h>
|
||||
@@ -71,11 +71,11 @@ Please visit our Website: http://www.httrack.com
|
||||
varargs starting at arg. */
|
||||
#ifndef HTS_UNUSED
|
||||
#ifdef __GNUC__
|
||||
#define HTS_UNUSED __attribute__ ((unused))
|
||||
#define HTS_UNUSED __attribute__((unused))
|
||||
|
||||
#define HTS_STATIC static __attribute__ ((unused))
|
||||
#define HTS_STATIC static __attribute__((unused))
|
||||
|
||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__((format(printf, fmt, arg)))
|
||||
#else
|
||||
#define HTS_UNUSED
|
||||
#define HTS_STATIC static
|
||||
@@ -113,7 +113,7 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#ifndef HTS_LONGLONG
|
||||
#ifdef SIZEOF_LONG_LONG
|
||||
#if SIZEOF_LONG_LONG==8
|
||||
#if SIZEOF_LONG_LONG == 8
|
||||
#define HTS_LONGLONG 1
|
||||
#endif
|
||||
#endif
|
||||
@@ -204,12 +204,12 @@ Please visit our Website: http://www.httrack.com
|
||||
#endif
|
||||
|
||||
#define HTS_HTTRACKRC ".httrackrc"
|
||||
#define HTS_HTTRACKCNF HTS_ETCPATH"/httrack.conf"
|
||||
#define HTS_HTTRACKCNF HTS_ETCPATH "/httrack.conf"
|
||||
|
||||
#ifdef DATADIR
|
||||
#define HTS_HTTRACKDIR DATADIR"/httrack/"
|
||||
#define HTS_HTTRACKDIR DATADIR "/httrack/"
|
||||
#else
|
||||
#define HTS_HTTRACKDIR HTS_PREFIX"/share/httrack/"
|
||||
#define HTS_HTTRACKDIR HTS_PREFIX "/share/httrack/"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -226,12 +226,17 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
/* Copyright (C) 1998 Xavier Roche and other contributors */
|
||||
#define HTTRACK_AFF_AUTHORS "[XR&CO'2014]"
|
||||
#define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/" HTTRACK_AFF_VERSION " " HTTRACK_AFF_AUTHORS ", %s -->"
|
||||
#define HTS_DEFAULT_FOOTER \
|
||||
"<!-- Mirrored from %s%s by HTTrack Website Copier/" HTTRACK_AFF_VERSION \
|
||||
" " HTTRACK_AFF_AUTHORS ", %s -->"
|
||||
#define HTTRACK_WEB "http://www.httrack.com"
|
||||
#define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version=" HTTRACK_VERSIONID "&VersionStr=" HTTRACK_VERSION "&Platform=%d&Language=%s"
|
||||
#define HTS_UPDATE_WEBSITE \
|
||||
"http://www.httrack.com/" \
|
||||
"update.php3?Product=HTTrack&Version=" HTTRACK_VERSIONID \
|
||||
"&VersionStr=" HTTRACK_VERSION "&Platform=%d&Language=%s"
|
||||
|
||||
#define H_CRLF "\x0d\x0a"
|
||||
#define CRLF "\x0d\x0a"
|
||||
#define CRLF "\x0d\x0a"
|
||||
#ifdef _WIN32
|
||||
#define LF "\x0d\x0a"
|
||||
#else
|
||||
@@ -247,13 +252,14 @@ Please visit our Website: http://www.httrack.com
|
||||
return type stays compatible with the int it replaces. */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
#define HTS_DEF_DEFSTRUCT_hts_boolean
|
||||
|
||||
typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
#endif
|
||||
|
||||
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||
#define maximum(A,B) ( (A) > (B) ? (A) : (B) )
|
||||
#define maximum(A, B) ((A) > (B) ? (A) : (B))
|
||||
|
||||
#define minimum(A,B) ( (A) < (B) ? (A) : (B) )
|
||||
#define minimum(A, B) ((A) < (B) ? (A) : (B))
|
||||
|
||||
/* True when A is a non-NULL, non-empty string. */
|
||||
#define strnotempty(A) (((A) != NULL && (A)[0] != '\0'))
|
||||
@@ -278,10 +284,10 @@ typedef enum hts_boolean { HTS_FALSE = 0, HTS_TRUE = 1 } hts_boolean;
|
||||
#endif
|
||||
#else
|
||||
/* See <http://gcc.gnu.org/wiki/Visibility> */
|
||||
#if ( ( defined(__GNUC__) && ( __GNUC__ >= 4 ) ) \
|
||||
|| ( defined(HAVE_VISIBILITY) && HAVE_VISIBILITY ) )
|
||||
#if ((defined(__GNUC__) && (__GNUC__ >= 4)) || \
|
||||
(defined(HAVE_VISIBILITY) && HAVE_VISIBILITY))
|
||||
|
||||
#define HTSEXT_API __attribute__ ((visibility ("default")))
|
||||
#define HTSEXT_API __attribute__((visibility("default")))
|
||||
#else
|
||||
#define HTSEXT_API
|
||||
#endif
|
||||
@@ -335,8 +341,8 @@ typedef __int64 LLint;
|
||||
typedef __int64 TStamp;
|
||||
|
||||
#define LLintP "%I64d"
|
||||
#elif (defined(_LP64) || defined(__x86_64__) \
|
||||
|| defined(__powerpc64__) || defined(__64BIT__))
|
||||
#elif (defined(_LP64) || defined(__x86_64__) || defined(__powerpc64__) || \
|
||||
defined(__64BIT__))
|
||||
|
||||
typedef long int LLint;
|
||||
|
||||
@@ -400,16 +406,17 @@ typedef int T_SOC;
|
||||
/* Permission bits for created folders and files (mkdir and chmod).
|
||||
PROTECT_FOLDER is owner-only. With HTS_ACCESS set (the default) the ACCESS_
|
||||
modes also grant group/other read; otherwise they stay owner-only. */
|
||||
#define HTS_PROTECT_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
|
||||
#define HTS_PROTECT_FOLDER (S_IRUSR | S_IWUSR | S_IXUSR)
|
||||
|
||||
#if HTS_ACCESS
|
||||
#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)
|
||||
#define HTS_ACCESS_FILE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
|
||||
|
||||
#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
|
||||
#define HTS_ACCESS_FOLDER \
|
||||
(S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
|
||||
#else
|
||||
#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR)
|
||||
#define HTS_ACCESS_FILE (S_IRUSR | S_IWUSR)
|
||||
|
||||
#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
|
||||
#define HTS_ACCESS_FOLDER (S_IRUSR | S_IWUSR | S_IXUSR)
|
||||
#endif
|
||||
|
||||
/* Sanity-check that the required preprocessor switches are defined */
|
||||
@@ -427,7 +434,11 @@ typedef int T_SOC;
|
||||
#endif
|
||||
|
||||
/* fflush sur stdout */
|
||||
#define io_flush { fflush(stdout); fflush(stdin); }
|
||||
#define io_flush \
|
||||
{ \
|
||||
fflush(stdout); \
|
||||
fflush(stdin); \
|
||||
}
|
||||
|
||||
/* HTSLib */
|
||||
|
||||
@@ -447,7 +458,7 @@ typedef int T_SOC;
|
||||
|
||||
#ifdef _DEBUG
|
||||
// trace mallocs
|
||||
//#define HTS_TRACE_MALLOC
|
||||
// #define HTS_TRACE_MALLOC
|
||||
#ifdef HTS_TRACE_MALLOC
|
||||
typedef unsigned long int t_htsboundary;
|
||||
|
||||
@@ -524,7 +535,13 @@ static const t_htsboundary htsboundary = 0xDEADBEEF;
|
||||
#if _HTS_WIDE
|
||||
extern FILE *DEBUG_fp;
|
||||
|
||||
#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); }
|
||||
#define DEBUG_W(A) \
|
||||
{ \
|
||||
if (DEBUG_fp == NULL) \
|
||||
DEBUG_fp = fopen("bug.out", "wb"); \
|
||||
fprintf(DEBUG_fp, ":>" A); \
|
||||
fflush(DEBUG_fp); \
|
||||
}
|
||||
#undef _
|
||||
#define _ ,
|
||||
#endif
|
||||
|
||||
@@ -2605,6 +2605,8 @@ int ident_url_absolute(const char *url, lien_adrfil *adrfil) {
|
||||
for(i = 0; adrfil->fil[i] != '\0'; i++)
|
||||
if (adrfil->fil[i] == '\\')
|
||||
adrfil->fil[i] = '/';
|
||||
// collapse ../ like the http branch above (path-traversal safety)
|
||||
fil_simplifie(adrfil->fil);
|
||||
}
|
||||
|
||||
// no hostname
|
||||
|
||||
@@ -69,41 +69,41 @@ typedef struct hash_struct hash_struct;
|
||||
#define HTS_DEF_FWSTRUCT_htsmoduleStruct
|
||||
typedef struct htsmoduleStruct htsmoduleStruct;
|
||||
#endif
|
||||
typedef int (*t_htsAddLink) (htsmoduleStruct * str, char *link);
|
||||
typedef int (*t_htsAddLink)(htsmoduleStruct *str, char *link);
|
||||
|
||||
/** Per-object context passed to a parser module for one downloaded file.
|
||||
Field access classes are noted; engine owns all pointers unless stated. */
|
||||
struct htsmoduleStruct {
|
||||
/* Read-only elements */
|
||||
const char *filename; /* filename (C:\My Web Sites\...) */
|
||||
int size; /* size of filename (should be > 0) */
|
||||
const char *mime; /* MIME type of the object */
|
||||
const char *url_host; /* incoming hostname (www.foo.com) */
|
||||
const char *url_file; /* incoming filename (/bar/bar.gny) */
|
||||
const char *filename; /* filename (C:\My Web Sites\...) */
|
||||
int size; /* size of filename (should be > 0) */
|
||||
const char *mime; /* MIME type of the object */
|
||||
const char *url_host; /* incoming hostname (www.foo.com) */
|
||||
const char *url_file; /* incoming filename (/bar/bar.gny) */
|
||||
|
||||
/* Write-only */
|
||||
const char *wrapper_name; /* name of wrapper (static string) */
|
||||
char *err_msg; /* if an error occurred, the error message (max. 1KB) */
|
||||
const char *wrapper_name; /* name of wrapper (static string) */
|
||||
char *err_msg; /* if an error occurred, the error message (max. 1KB) */
|
||||
|
||||
/* Read/Write */
|
||||
int relativeToHtmlLink; /* set this to 1 if all urls you pass to addLink
|
||||
are in fact relative to the html file where your
|
||||
module was originally */
|
||||
int relativeToHtmlLink; /* set this to 1 if all urls you pass to addLink
|
||||
are in fact relative to the html file where your
|
||||
module was originally */
|
||||
|
||||
/* Callbacks */
|
||||
t_htsAddLink addLink; /* call this function when links are
|
||||
being detected. it if not your responsability to decide
|
||||
if the engine will keep them, or not. */
|
||||
t_htsAddLink addLink; /* call this function when links are
|
||||
being detected. it if not your responsability to
|
||||
decide if the engine will keep them, or not. */
|
||||
|
||||
/* Optional */
|
||||
char *localLink; /* if non null, the engine will write there the local
|
||||
relative filename of the link added by addLink(), or
|
||||
the absolute path if the link was refused by the wizard */
|
||||
int localLinkSize; /* size of the optionnal buffer */
|
||||
char *localLink; /* if non null, the engine will write there the local
|
||||
relative filename of the link added by addLink(), or
|
||||
the absolute path if the link was refused by the wizard */
|
||||
int localLinkSize; /* size of the optionnal buffer */
|
||||
|
||||
/* User-defined */
|
||||
void *userdef; /* can be used by callback routines
|
||||
*/
|
||||
void *userdef; /* can be used by callback routines
|
||||
*/
|
||||
|
||||
/* The parser httrackp structure (may be used) */
|
||||
httrackp *opt;
|
||||
@@ -117,7 +117,6 @@ struct htsmoduleStruct {
|
||||
int *ptr_;
|
||||
const char *page_charset_;
|
||||
/* Internal use - please don't touch */
|
||||
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -126,11 +125,11 @@ extern "C" {
|
||||
|
||||
/** Module lifecycle hooks. Init/PlugInit return 1 on success, 0 on failure;
|
||||
Exit returns its own status (ignored by the engine). */
|
||||
typedef int (*t_htsWrapperInit) (char *fn, char *args);
|
||||
typedef int (*t_htsWrapperInit)(char *fn, char *args);
|
||||
|
||||
typedef int (*t_htsWrapperExit) (void);
|
||||
typedef int (*t_htsWrapperExit)(void);
|
||||
|
||||
typedef int (*t_htsWrapperPlugInit) (char *args);
|
||||
typedef int (*t_htsWrapperPlugInit)(char *args);
|
||||
|
||||
/* Library internal definictions */
|
||||
#ifdef HTS_INTERNAL_BYTECODE
|
||||
@@ -138,7 +137,7 @@ typedef int (*t_htsWrapperPlugInit) (char *args);
|
||||
/** Capabilities string ("-noV6", "-nossl", ...) followed by "+name" for each
|
||||
loaded module. Returned pointer aliases opt->state.HTbuff; do not free, and
|
||||
it is overwritten by the next call. */
|
||||
HTSEXT_API const char *hts_get_version_info(httrackp * opt);
|
||||
HTSEXT_API const char *hts_get_version_info(httrackp *opt);
|
||||
|
||||
/** Static capabilities string set by htspe_init(); valid for the process
|
||||
lifetime, do not free. */
|
||||
@@ -154,7 +153,7 @@ extern void htspe_uninit(void);
|
||||
/** Run the external-parser callbacks for the object described by str.
|
||||
Returns the parse callback result (>=0) on a handled object, or -1 if no
|
||||
module claimed it or its wrapper_name is blacklisted. */
|
||||
extern int hts_parse_externals(htsmoduleStruct * str);
|
||||
extern int hts_parse_externals(htsmoduleStruct *str);
|
||||
|
||||
/** Nonzero if IPv6 support was compiled in (== HTS_INET6). */
|
||||
extern int V6_is_available;
|
||||
|
||||
86
src/htsnet.h
86
src/htsnet.h
@@ -112,10 +112,10 @@ struct SOCaddr {
|
||||
|
||||
/** Pointer to the port field (network byte order) for the active family.
|
||||
Asserts on NULL or an unset/unknown family. */
|
||||
static HTS_INLINE HTS_UNUSED in_port_t* SOCaddr_sinport_(SOCaddr *const addr,
|
||||
const char *file, const int line) {
|
||||
static HTS_INLINE HTS_UNUSED in_port_t *
|
||||
SOCaddr_sinport_(SOCaddr *const addr, const char *file, const int line) {
|
||||
assertf_(addr != NULL, file, line);
|
||||
switch(addr->m_addr.sa.sa_family) {
|
||||
switch (addr->m_addr.sa.sa_family) {
|
||||
case AF_INET:
|
||||
return &addr->m_addr.in.sin_port;
|
||||
break;
|
||||
@@ -125,7 +125,7 @@ static HTS_INLINE HTS_UNUSED in_port_t* SOCaddr_sinport_(SOCaddr *const addr,
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
assertf_(! "invalid structure", file, line);
|
||||
assertf_(!"invalid structure", file, line);
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
@@ -133,10 +133,11 @@ static HTS_INLINE HTS_UNUSED in_port_t* SOCaddr_sinport_(SOCaddr *const addr,
|
||||
|
||||
/** Length of the active sockaddr (sockaddr_in or sockaddr_in6), or 0 if the
|
||||
family is unset/unknown. The 0 case doubles as the "not valid" test. */
|
||||
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_size_(const SOCaddr*const addr,
|
||||
const char *file, const int line) {
|
||||
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_size_(const SOCaddr *const addr,
|
||||
const char *file,
|
||||
const int line) {
|
||||
assertf_(addr != NULL, file, line);
|
||||
switch(addr->m_addr.sa.sa_family) {
|
||||
switch (addr->m_addr.sa.sa_family) {
|
||||
case AF_INET:
|
||||
return sizeof(addr->m_addr.in);
|
||||
break;
|
||||
@@ -152,8 +153,8 @@ static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_size_(const SOCaddr*const addr,
|
||||
}
|
||||
|
||||
/** Reset to the unset state (family AF_UNSPEC), making the address invalid. */
|
||||
static HTS_INLINE HTS_UNUSED void SOCaddr_clear_(SOCaddr*const addr,
|
||||
const char *file, const int line) {
|
||||
static HTS_INLINE HTS_UNUSED void
|
||||
SOCaddr_clear_(SOCaddr *const addr, const char *file, const int line) {
|
||||
assertf_(addr != NULL, file, line);
|
||||
addr->m_addr.sa.sa_family = AF_UNSPEC;
|
||||
}
|
||||
@@ -191,14 +192,16 @@ static HTS_INLINE HTS_UNUSED void SOCaddr_clear_(SOCaddr*const addr,
|
||||
|
||||
/** Set the port (host-order argument, stored network-order) on the active
|
||||
* family. */
|
||||
#define SOCaddr_initport(server, port) do { \
|
||||
SOCaddr_sinport(server) = htons((in_port_t) (port)); \
|
||||
} while(0)
|
||||
#define SOCaddr_initport(server, port) \
|
||||
do { \
|
||||
SOCaddr_sinport(server) = htons((in_port_t) (port)); \
|
||||
} while (0)
|
||||
|
||||
/** Initialize as an all-zero IPv4 wildcard (INADDR_ANY) address; returns its
|
||||
sockaddr length. */
|
||||
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_initany_(SOCaddr*const addr,
|
||||
const char *file, const int line) {
|
||||
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_initany_(SOCaddr *const addr,
|
||||
const char *file,
|
||||
const int line) {
|
||||
assertf_(addr != NULL, file, line);
|
||||
memset(&addr->m_addr.in, 0, sizeof(addr->m_addr.in));
|
||||
addr->m_addr.in.sin_family = AF_INET;
|
||||
@@ -206,17 +209,20 @@ static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_initany_(SOCaddr*const addr,
|
||||
}
|
||||
|
||||
/** Initialize server as an IPv4 wildcard (INADDR_ANY) address. */
|
||||
#define SOCaddr_initany(server) do { \
|
||||
SOCaddr_initany_(&(server), __FILE__, __LINE__); \
|
||||
} while(0)
|
||||
#define SOCaddr_initany(server) \
|
||||
do { \
|
||||
SOCaddr_initany_(&(server), __FILE__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
/** Populate server from data. data_size selects the source form: a full
|
||||
sockaddr_in / sockaddr_in6, or a raw 4-byte (IPv4) / 16-byte (IPv6) address
|
||||
with port zeroed. Any other size leaves an AF_INET shell. Returns the
|
||||
resulting sockaddr length. */
|
||||
static HTS_UNUSED socklen_t SOCaddr_copyaddr_(SOCaddr*const server,
|
||||
const void *data, const size_t data_size,
|
||||
const char *file, const int line) {
|
||||
static HTS_UNUSED socklen_t SOCaddr_copyaddr_(SOCaddr *const server,
|
||||
const void *data,
|
||||
const size_t data_size,
|
||||
const char *file,
|
||||
const int line) {
|
||||
assertf_(server != NULL, file, line);
|
||||
assertf_(data != NULL, file, line);
|
||||
|
||||
@@ -248,32 +254,35 @@ static HTS_UNUSED socklen_t SOCaddr_copyaddr_(SOCaddr*const server,
|
||||
|
||||
/** Copy hpaddr (length hpsize) into server, writing the result length into the
|
||||
lvalue server_len (int). See SOCaddr_copyaddr_ for accepted forms. */
|
||||
#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
|
||||
server_len = (int) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
||||
} while(0)
|
||||
#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) \
|
||||
do { \
|
||||
server_len = (int) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, \
|
||||
__LINE__); \
|
||||
} while (0)
|
||||
|
||||
/** Like SOCaddr_copyaddr but discards the result length. */
|
||||
#define SOCaddr_copyaddr2(server, hpaddr, hpsize) do { \
|
||||
(void) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
||||
} while(0)
|
||||
#define SOCaddr_copyaddr2(server, hpaddr, hpsize) \
|
||||
do { \
|
||||
(void) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
/** Copy one SOCaddr (src) into another (dest), preserving family and port. */
|
||||
#define SOCaddr_copy_SOCaddr(dest, src) do { \
|
||||
SOCaddr_copyaddr_(&(dest), &(src).m_addr.sa, SOCaddr_size(src), __FILE__, __LINE__); \
|
||||
} while(0)
|
||||
#define SOCaddr_copy_SOCaddr(dest, src) \
|
||||
do { \
|
||||
SOCaddr_copyaddr_(&(dest), &(src).m_addr.sa, SOCaddr_size(src), __FILE__, \
|
||||
__LINE__); \
|
||||
} while (0)
|
||||
|
||||
/** Write the numeric (dotted/colon) host of ss into namebuf (capacity
|
||||
namebuflen), scope id stripped. On failure namebuf becomes "". */
|
||||
static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||
SOCaddr *const ss,
|
||||
const char *file, const int line) {
|
||||
static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||
SOCaddr *const ss, const char *file,
|
||||
const int line) {
|
||||
assertf_(namebuf != NULL, file, line);
|
||||
assertf_(ss != NULL, file, line);
|
||||
|
||||
if (getnameinfo(&ss->m_addr.sa, sizeof(ss->m_addr),
|
||||
namebuf, namebuflen,
|
||||
NULL, 0,
|
||||
NI_NUMERICHOST) == 0) {
|
||||
if (getnameinfo(&ss->m_addr.sa, sizeof(ss->m_addr), namebuf, namebuflen, NULL,
|
||||
0, NI_NUMERICHOST) == 0) {
|
||||
/* remove scope id(s) */
|
||||
char *const pos = strchr(namebuf, '%');
|
||||
if (pos != NULL) {
|
||||
@@ -285,11 +294,12 @@ static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||
}
|
||||
|
||||
/** Numeric host of ss into namebuf (capacity namebuflen); "" on failure. */
|
||||
#define SOCaddr_inetntoa(namebuf, namebuflen, ss) \
|
||||
#define SOCaddr_inetntoa(namebuf, namebuflen, ss) \
|
||||
SOCaddr_inetntoa_(namebuf, namebuflen, &(ss), __FILE__, __LINE__)
|
||||
|
||||
/** Single-char family tag: '1' for IPv4, '2' otherwise (used in the cache). */
|
||||
#define SOCaddr_getproto(ss) ( SOCaddr_size(ss) == sizeof(struct sockaddr_in) ? '1' : '2')
|
||||
#define SOCaddr_getproto(ss) \
|
||||
(SOCaddr_size(ss) == sizeof(struct sockaddr_in) ? '1' : '2')
|
||||
|
||||
/** Length type for socket APIs (getsockname, accept, ...). */
|
||||
typedef socklen_t SOClen;
|
||||
|
||||
162
src/htsopt.h
162
src/htsopt.h
@@ -72,6 +72,7 @@ typedef struct String String;
|
||||
#endif
|
||||
#ifndef HTS_DEF_STRUCT_String
|
||||
#define HTS_DEF_STRUCT_String
|
||||
|
||||
struct String {
|
||||
char *buffer_;
|
||||
size_t length_;
|
||||
@@ -80,7 +81,7 @@ struct String {
|
||||
#endif
|
||||
|
||||
/* Defines */
|
||||
#define CATBUFF_SIZE (STRING_SIZE*2*2)
|
||||
#define CATBUFF_SIZE (STRING_SIZE * 2 * 2)
|
||||
|
||||
#define STRING_SIZE 2048
|
||||
|
||||
@@ -108,7 +109,7 @@ struct htsfilters {
|
||||
};
|
||||
|
||||
/* User callbacks chain */
|
||||
typedef int (*htscallbacksfncptr) (void);
|
||||
typedef int (*htscallbacksfncptr)(void);
|
||||
|
||||
typedef struct htscallbacks htscallbacks;
|
||||
|
||||
@@ -179,6 +180,7 @@ typedef struct lien_url lien_url;
|
||||
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_log_type
|
||||
#define HTS_DEF_DEFSTRUCT_hts_log_type
|
||||
|
||||
typedef enum hts_log_type {
|
||||
LOG_PANIC,
|
||||
LOG_ERROR,
|
||||
@@ -278,16 +280,17 @@ struct htslibhandles {
|
||||
|
||||
/* Javascript parser flags */
|
||||
typedef enum htsparsejava_flags {
|
||||
HTSPARSE_NONE = 0, // don't parse
|
||||
HTSPARSE_DEFAULT = 1, // parse default (all)
|
||||
HTSPARSE_NO_CLASS = 2, // don't parse .java
|
||||
HTSPARSE_NO_JAVASCRIPT = 4, // don't parse .js
|
||||
HTSPARSE_NO_AGGRESSIVE = 8 // don't aggressively parse .js or .java
|
||||
HTSPARSE_NONE = 0, // don't parse
|
||||
HTSPARSE_DEFAULT = 1, // parse default (all)
|
||||
HTSPARSE_NO_CLASS = 2, // don't parse .java
|
||||
HTSPARSE_NO_JAVASCRIPT = 4, // don't parse .js
|
||||
HTSPARSE_NO_AGGRESSIVE = 8 // don't aggressively parse .js or .java
|
||||
} htsparsejava_flags;
|
||||
|
||||
/* Link-rewriting style for saved pages (opt->urlmode). */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_urlmode
|
||||
#define HTS_DEF_DEFSTRUCT_hts_urlmode
|
||||
|
||||
typedef enum hts_urlmode {
|
||||
HTS_URLMODE_ABSOLUTE = 0, /**< absolute URL (http://host/path) everywhere */
|
||||
HTS_URLMODE_ABSOLUTE_FILE = 1, /**< legacy file: form, unused */
|
||||
@@ -301,6 +304,7 @@ typedef enum hts_urlmode {
|
||||
/* Cache policy for updates and retries (opt->cache). */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_cachemode
|
||||
#define HTS_DEF_DEFSTRUCT_hts_cachemode
|
||||
|
||||
typedef enum hts_cachemode {
|
||||
HTS_CACHE_NONE = 0, /**< no cache */
|
||||
HTS_CACHE_PRIORITY = 1, /**< cache takes priority over the network */
|
||||
@@ -311,6 +315,7 @@ typedef enum hts_cachemode {
|
||||
/* Interactive wizard level (opt->wizard). */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_wizard
|
||||
#define HTS_DEF_DEFSTRUCT_hts_wizard
|
||||
|
||||
typedef enum hts_wizard {
|
||||
HTS_WIZARD_NONE = 0, /**< no wizard */
|
||||
HTS_WIZARD_ASK = 1, /**< wizard asks questions */
|
||||
@@ -321,6 +326,7 @@ typedef enum hts_wizard {
|
||||
/* robots.txt / meta-robots obedience level (opt->robots). */
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_robots
|
||||
#define HTS_DEF_DEFSTRUCT_hts_robots
|
||||
|
||||
typedef enum hts_robots {
|
||||
HTS_ROBOTS_NEVER = 0, /**< ignore robots rules */
|
||||
HTS_ROBOTS_SOMETIMES = 1, /**< partial obedience (default) */
|
||||
@@ -406,34 +412,34 @@ struct httrackp {
|
||||
/* */
|
||||
hts_wizard wizard; /**< interactive wizard level (none/ask/auto) */
|
||||
hts_boolean flush; /**< fflush() log files after each write */
|
||||
int travel; /**< link-following scope (same domain, etc.) */
|
||||
int seeker; /**< allowed direction: go up and/or down the tree */
|
||||
int depth; /**< maximum recursion depth (-rN) */
|
||||
int extdepth; /**< maximum recursion depth outside the start domain */
|
||||
int travel; /**< link-following scope (same domain, etc.) */
|
||||
int seeker; /**< allowed direction: go up and/or down the tree */
|
||||
int depth; /**< maximum recursion depth (-rN) */
|
||||
int extdepth; /**< maximum recursion depth outside the start domain */
|
||||
hts_urlmode
|
||||
urlmode; /**< saved-link rewriting style (relative, absolute, etc.) */
|
||||
hts_boolean no_type_change; // do not change file type according to MIME
|
||||
hts_log_type debug; /**< debug logging level */
|
||||
int getmode; /**< what to fetch (HTML, images, ...) bitmask */
|
||||
FILE *log; /**< informational log stream; NULL mutes it */
|
||||
FILE *errlog; /**< error log stream; NULL mutes it */
|
||||
LLint maxsite; /**< max total bytes for the whole mirror */
|
||||
LLint maxfile_nonhtml; /**< max bytes per non-HTML file */
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
hts_boolean no_type_change; // do not change file type according to MIME
|
||||
hts_log_type debug; /**< debug logging level */
|
||||
int getmode; /**< what to fetch (HTML, images, ...) bitmask */
|
||||
FILE *log; /**< informational log stream; NULL mutes it */
|
||||
FILE *errlog; /**< error log stream; NULL mutes it */
|
||||
LLint maxsite; /**< max total bytes for the whole mirror */
|
||||
LLint maxfile_nonhtml; /**< max bytes per non-HTML file */
|
||||
LLint maxfile_html; /**< max bytes per HTML file */
|
||||
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||
LLint fragment; /**< split site after this many bytes */
|
||||
hts_boolean
|
||||
nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||
hts_boolean makeindex; /**< build a top-level index.html */
|
||||
hts_boolean kindex; /**< build a keyword index */
|
||||
hts_boolean delete_old; /**< delete locally obsolete files after update */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
int maxrate; /**< max transfer rate cap (bytes/s) */
|
||||
float maxconn; /**< max connections per second */
|
||||
int waittime; /**< scheduled start time (wall-clock seconds) */
|
||||
hts_cachemode cache; /**< cache generation mode */
|
||||
int timeout; /**< connection timeout in seconds */
|
||||
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||
int maxtime; /**< max total mirror duration in seconds */
|
||||
int maxrate; /**< max transfer rate cap (bytes/s) */
|
||||
float maxconn; /**< max connections per second */
|
||||
int waittime; /**< scheduled start time (wall-clock seconds) */
|
||||
hts_cachemode cache; /**< cache generation mode */
|
||||
// int aff_progress; // progress bar
|
||||
hts_boolean shell; /**< driven by a shell over stdin/stdout pipes */
|
||||
t_proxy proxy; /**< proxy configuration */
|
||||
@@ -446,12 +452,12 @@ struct httrackp {
|
||||
hts_boolean
|
||||
delayed_cached; // delayed type check can be cached to speedup updates
|
||||
hts_boolean mimehtml; /**< produce a single MIME/MHTML archive */
|
||||
hts_boolean user_agent_send; /**< send a User-Agent header */
|
||||
String user_agent; /**< User-Agent value (e.g. httrack/1.0) */
|
||||
String referer; /**< Referer value to send */
|
||||
String from; /**< From value to send */
|
||||
String path_log; /**< directory for cache and logs */
|
||||
String path_html; /**< output directory for the mirror */
|
||||
hts_boolean user_agent_send; /**< send a User-Agent header */
|
||||
String user_agent; /**< User-Agent value (e.g. httrack/1.0) */
|
||||
String referer; /**< Referer value to send */
|
||||
String from; /**< From value to send */
|
||||
String path_log; /**< directory for cache and logs */
|
||||
String path_html; /**< output directory for the mirror */
|
||||
String path_html_utf8; /**< output directory for the mirror, UTF-8 form */
|
||||
String path_bin; /**< directory for HTML templates */
|
||||
int retry; /**< extra retries on a failed transfer */
|
||||
@@ -459,49 +465,49 @@ struct httrackp {
|
||||
hts_boolean maketrack; /**< maintain an operations-statistics log */
|
||||
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||
int hostcontrol; /**< ban slow/timing-out hosts; see hts_hostcontrol bits */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean errpage; /**< generate an error page on 404 and similar */
|
||||
hts_boolean
|
||||
check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||
*/
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
hts_boolean all_in_cache; /**< keep all retrieved data in the cache */
|
||||
hts_robots robots; /**< robots.txt handling level */
|
||||
hts_boolean external; /**< render external links as error pages */
|
||||
hts_boolean passprivacy; /**< strip passwords from external links */
|
||||
hts_boolean includequery; /**< include the query string in saved names */
|
||||
hts_boolean mirror_first_page; /**< only mirror the links of the first page */
|
||||
String sys_com; /**< system command to run */
|
||||
hts_boolean sys_com_exec; /**< actually execute sys_com */
|
||||
hts_boolean accept_cookie; /**< accept and send cookies */
|
||||
t_cookie *cookie; /**< cookie store */
|
||||
hts_boolean http10; /**< force HTTP/1.0 */
|
||||
hts_boolean nokeepalive; /**< disable keep-alive */
|
||||
hts_boolean nocompression; /**< disable content compression */
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
String sys_com; /**< system command to run */
|
||||
hts_boolean sys_com_exec; /**< actually execute sys_com */
|
||||
hts_boolean accept_cookie; /**< accept and send cookies */
|
||||
t_cookie *cookie; /**< cookie store */
|
||||
hts_boolean http10; /**< force HTTP/1.0 */
|
||||
hts_boolean nokeepalive; /**< disable keep-alive */
|
||||
hts_boolean nocompression; /**< disable content compression */
|
||||
hts_boolean sizehack; /**< treat same-size response as "updated" */
|
||||
hts_boolean urlhack; // force "url normalization" to avoid loops
|
||||
hts_boolean tolerant; /**< accept an incorrect Content-Length */
|
||||
hts_boolean
|
||||
parseall; /**< parse aggressively, including unknown tags with links */
|
||||
hts_boolean parsedebug; /**< parser debug mode */
|
||||
hts_boolean norecatch; /**< do not re-fetch files the user deleted locally */
|
||||
hts_verbosedisplay verbosedisplay; /**< animated text progress display */
|
||||
String footer; /**< footer/info line injected into pages */
|
||||
int maxcache; /**< in-memory cache backing limit (bytes) */
|
||||
String footer; /**< footer/info line injected into pages */
|
||||
int maxcache; /**< in-memory cache backing limit (bytes) */
|
||||
// int maxcache_anticipate; // maximum links to anticipate (upper bound)
|
||||
hts_boolean ftp_proxy; /**< use the HTTP proxy for FTP too */
|
||||
String filelist; /**< file listing URLs to include */
|
||||
String urllist; /**< file listing filters to include */
|
||||
htsfilters filters; /**< filter pointers (+/-pattern rules) */
|
||||
hash_struct *hash; // hash structure
|
||||
lien_url **liens; // links
|
||||
int lien_tot; // top index of "links" heap (always out-of-range)
|
||||
lien_buffers *liensbuf; // links buffers
|
||||
robots_wizard *robotsptr; // robots ptr
|
||||
String lang_iso; /**< Accept-Language value (en, fr, ...) */
|
||||
String accept; // Accept:
|
||||
String headers; // Additional headers
|
||||
String mimedefs; // ext1=mimetype1\next2=mimetype2..
|
||||
String mod_blacklist; /**< blacklisted modules */
|
||||
hts_boolean convert_utf8; // filenames UTF-8 conversion (3.46)
|
||||
hts_boolean ftp_proxy; /**< use the HTTP proxy for FTP too */
|
||||
String filelist; /**< file listing URLs to include */
|
||||
String urllist; /**< file listing filters to include */
|
||||
htsfilters filters; /**< filter pointers (+/-pattern rules) */
|
||||
hash_struct *hash; // hash structure
|
||||
lien_url **liens; // links
|
||||
int lien_tot; // top index of "links" heap (always out-of-range)
|
||||
lien_buffers *liensbuf; // links buffers
|
||||
robots_wizard *robotsptr; // robots ptr
|
||||
String lang_iso; /**< Accept-Language value (en, fr, ...) */
|
||||
String accept; // Accept:
|
||||
String headers; // Additional headers
|
||||
String mimedefs; // ext1=mimetype1\next2=mimetype2..
|
||||
String mod_blacklist; /**< blacklisted modules */
|
||||
hts_boolean convert_utf8; // filenames UTF-8 conversion (3.46)
|
||||
//
|
||||
int maxlink; /**< max number of links */
|
||||
int maxfilter; /**< max number of filters */
|
||||
@@ -587,17 +593,17 @@ typedef struct htsrequest htsrequest;
|
||||
struct htsrequest {
|
||||
short int user_agent_send; /**< send a User-Agent header */
|
||||
short int http11; /**< sign the request as HTTP/1.1 rather than HTTP/1.0 */
|
||||
short int nokeepalive; /**< disable keep-alive */
|
||||
short int range_used; /**< a Range header is in use */
|
||||
short int nocompression; /**< disable compression */
|
||||
short int flush_garbage; // recycled
|
||||
const char *user_agent; /**< User-Agent value */
|
||||
const char *referer; /**< Referer value */
|
||||
const char *from; /**< From value */
|
||||
const char *lang_iso; /**< Accept-Language value */
|
||||
const char *accept; /**< Accept value */
|
||||
const char *headers; /**< extra request headers */
|
||||
htsrequest_proxy proxy; /**< proxy for this request */
|
||||
short int nokeepalive; /**< disable keep-alive */
|
||||
short int range_used; /**< a Range header is in use */
|
||||
short int nocompression; /**< disable compression */
|
||||
short int flush_garbage; // recycled
|
||||
const char *user_agent; /**< User-Agent value */
|
||||
const char *referer; /**< Referer value */
|
||||
const char *from; /**< From value */
|
||||
const char *lang_iso; /**< Accept-Language value */
|
||||
const char *accept; /**< Accept value */
|
||||
const char *headers; /**< extra request headers */
|
||||
htsrequest_proxy proxy; /**< proxy for this request */
|
||||
};
|
||||
|
||||
/* Result of a connection / header fetch. */
|
||||
@@ -629,8 +635,8 @@ struct htsblk {
|
||||
short int is_file; /**< 1 if a file descriptor rather than a socket */
|
||||
T_SOC soc; /**< socket id */
|
||||
SOCaddr address; /**< peer IP address */
|
||||
int address_size; // IP address structure length (unused internally)
|
||||
FILE *fp; /**< file handle for file:// */
|
||||
int address_size; // IP address structure length (unused internally)
|
||||
FILE *fp; /**< file handle for file:// */
|
||||
#if HTS_USEOPENSSL
|
||||
short int ssl; /**< nonzero if this is an SSL connection (https) */
|
||||
// BIO* ssl_soc; // SSL structure
|
||||
@@ -712,7 +718,7 @@ struct lien_back {
|
||||
LLint chunk_blocksize; /**< data size declared by the chunk */
|
||||
LLint compressed_size; /**< compressed size (stats only) */
|
||||
//
|
||||
//int links_index; // to access liens[links_index]
|
||||
// int links_index; // to access liens[links_index]
|
||||
//
|
||||
char info[256]; /**< status text, e.g. for FTP */
|
||||
int stop_ftp; /**< stop flag for FTP */
|
||||
|
||||
@@ -296,6 +296,48 @@ static const char *html_inline_safe(const char *src, char *dst, size_t size) {
|
||||
return dst;
|
||||
}
|
||||
|
||||
/* Byte before html, or a space sentinel at the buffer start where html[-1]
|
||||
would underflow; space reads as the word boundary the guards want there. */
|
||||
static HTS_INLINE char html_prevc(const char *html, const char *start) {
|
||||
return html > start ? html[-1] : ' ';
|
||||
}
|
||||
|
||||
/* True if [s, s+len) is exactly an HTTP method token (XHR.open's first
|
||||
argument is a method, not a URL: #218). Case-insensitive. */
|
||||
static int is_http_method(const char *s, size_t len) {
|
||||
static const char *const methods[] = {"GET", "POST", "PUT",
|
||||
"DELETE", "HEAD", "OPTIONS",
|
||||
"PATCH", "TRACE", NULL};
|
||||
int i;
|
||||
|
||||
for (i = 0; methods[i] != NULL; i++) {
|
||||
if (strlen(methods[i]) == len && strfield(s, methods[i]) == (int) len)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Percent-encode '(' and ')' in a link emitted into an unquoted url(...) (CSS
|
||||
or JS): a literal ')' closes the token early and the UA mis-parses the value
|
||||
(#163). The UA decodes %28/%29 back to the saved-on-disk name. */
|
||||
static void escape_url_parens(char *const s, const size_t size) {
|
||||
char BIGSTK buff[HTS_URLMAXSIZE * 2];
|
||||
size_t i, j;
|
||||
|
||||
for (i = 0, j = 0; s[i] != '\0' && j + 3 < size && j + 3 < sizeof(buff);
|
||||
i++) {
|
||||
if (s[i] == '(' || s[i] == ')') {
|
||||
buff[j++] = '%';
|
||||
buff[j++] = '2';
|
||||
buff[j++] = s[i] == '(' ? '8' : '9';
|
||||
} else {
|
||||
buff[j++] = s[i];
|
||||
}
|
||||
}
|
||||
buff[j] = '\0';
|
||||
strlcpybuff(s, buff, size);
|
||||
}
|
||||
|
||||
/* Main parser */
|
||||
int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
char catbuff[CATBUFF_SIZE];
|
||||
@@ -556,7 +598,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
if (opt->getmode & HTS_GETMODE_HTML) {
|
||||
p = strfield(html, "title");
|
||||
if (p) {
|
||||
if (*(html - 1) == '/')
|
||||
if (html_prevc(html, r->adr) == '/')
|
||||
p = 0; // /title
|
||||
} else {
|
||||
if (strfield(html, "/html"))
|
||||
@@ -1341,6 +1383,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
int can_avoid_quotes = 0;
|
||||
char quotes_replacement = '\0';
|
||||
int ensure_not_mime = 0;
|
||||
// .open(method,url): reject an HTTP-method first arg (#218)
|
||||
int ensure_not_method = 0;
|
||||
// @import: the quoted token is the URL; a trailing
|
||||
// media/supports/layer condition is not part of it
|
||||
int is_import = 0;
|
||||
@@ -1360,9 +1404,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
if (!nc)
|
||||
nc = strfield(html, ":location"); // javascript:location="doc"
|
||||
if (!nc) { // location="doc"
|
||||
if ((nc = strfield(html, "location"))
|
||||
&& !isspace(*(html - 1))
|
||||
)
|
||||
if ((nc = strfield(html, "location")) &&
|
||||
!isspace(html_prevc(html, r->adr)))
|
||||
nc = 0;
|
||||
}
|
||||
if (!nc)
|
||||
@@ -1372,6 +1415,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
expected = '('; // parenthèse
|
||||
expected_end = "),"; // fin: virgule ou parenthèse
|
||||
ensure_not_mime = 1; //* ensure the url is not a mime type */
|
||||
ensure_not_method = 1; // xhr.open: don't grab method
|
||||
}
|
||||
if (!nc)
|
||||
if ((nc = strfield(html, ".replace"))) { // window.replace("url")
|
||||
@@ -1383,7 +1427,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
expected = '('; // parenthèse
|
||||
expected_end = ")"; // fin: parenthèse
|
||||
}
|
||||
if (!nc && (nc = strfield(html, "url")) && (!isalnum(*(html - 1))) && *(html - 1) != '_') { // url(url)
|
||||
if (!nc && (nc = strfield(html, "url")) &&
|
||||
(!isalnum(html_prevc(html, r->adr))) &&
|
||||
html_prevc(html, r->adr) != '_') { // url(url)
|
||||
expected = '('; // parenthèse
|
||||
expected_end = ")"; // fin: parenthèse
|
||||
can_avoid_quotes = 1;
|
||||
@@ -1455,6 +1501,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
// XHR.open's "GET" etc. is a method, not a URL
|
||||
if (a != NULL && ensure_not_method &&
|
||||
is_http_method(a, (size_t) (c - a + 1))) {
|
||||
a = NULL;
|
||||
}
|
||||
// Check for bogus links (Vasiliy)
|
||||
if (a != NULL) {
|
||||
const size_t size = c - a + 1;
|
||||
@@ -2997,6 +3048,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
/* Never escape high-chars (we don't know the encoding!!) */
|
||||
inplace_escape_uri_utf(tempo, sizeof(tempo));
|
||||
|
||||
// unquoted url() (CSS/JS): keep parens escaped
|
||||
if (ending_p == ')')
|
||||
escape_url_parens(tempo, sizeof(tempo));
|
||||
|
||||
//if (!no_esc_utf)
|
||||
// escape_uri(tempo); // escape with %xx
|
||||
//else {
|
||||
|
||||
201
src/htssafe.h
201
src/htssafe.h
@@ -48,7 +48,7 @@ Please visit our Website: http://www.httrack.com
|
||||
/** Assert error callback. **/
|
||||
#ifndef HTS_DEF_FWSTRUCT_htsErrorCallback
|
||||
#define HTS_DEF_FWSTRUCT_htsErrorCallback
|
||||
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
|
||||
typedef void (*htsErrorCallback)(const char *msg, const char *file, int line);
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -58,12 +58,13 @@ HTSEXT_API htsErrorCallback hts_get_error_callback(void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define HTSSAFE_ABORT_FUNCTION(A,B,C) do { \
|
||||
htsErrorCallback callback = hts_get_error_callback(); \
|
||||
if (callback != NULL) { \
|
||||
callback(A,B,C); \
|
||||
} \
|
||||
} while(0)
|
||||
#define HTSSAFE_ABORT_FUNCTION(A, B, C) \
|
||||
do { \
|
||||
htsErrorCallback callback = hts_get_error_callback(); \
|
||||
if (callback != NULL) { \
|
||||
callback(A, B, C); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -75,7 +76,8 @@ HTSEXT_API htsErrorCallback hts_get_error_callback(void);
|
||||
/**
|
||||
* Fatal assertion check.
|
||||
*/
|
||||
#define assertf__(exp, sexp, file, line) (void) ( (exp) || (abortf_(sexp, file, line), 0) )
|
||||
#define assertf__(exp, sexp, file, line) \
|
||||
(void) ((exp) || (abortf_(sexp, file, line), 0))
|
||||
|
||||
/**
|
||||
* Fatal assertion check.
|
||||
@@ -106,12 +108,13 @@ static HTS_UNUSED void abortf_(const char *exp, const char *file, int line) {
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
|
||||
/* Note: char[] and const char[] are compatible */
|
||||
#define HTS_IS_CHAR_BUFFER(VAR) ( __builtin_types_compatible_p ( typeof (VAR), char[] ) )
|
||||
#define HTS_IS_CHAR_BUFFER(VAR) \
|
||||
(__builtin_types_compatible_p(typeof(VAR), char[]))
|
||||
#else
|
||||
/* Note: a bit lame as char[8] won't be seen. */
|
||||
#define HTS_IS_CHAR_BUFFER(VAR) ( sizeof(VAR) != sizeof(char*) )
|
||||
#define HTS_IS_CHAR_BUFFER(VAR) (sizeof(VAR) != sizeof(char *))
|
||||
#endif
|
||||
#define HTS_IS_NOT_CHAR_BUFFER(VAR) ( ! HTS_IS_CHAR_BUFFER(VAR) )
|
||||
#define HTS_IS_NOT_CHAR_BUFFER(VAR) (!HTS_IS_CHAR_BUFFER(VAR))
|
||||
|
||||
/* Compile-time checks. */
|
||||
static HTS_UNUSED void htssafe_compile_time_check_(void) {
|
||||
@@ -201,60 +204,74 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
|
||||
#define strncatbuff(A, B, N) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||
strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__), \
|
||||
strncatbuff_ptr_((A), (B), (N)) )
|
||||
#define strncatbuff(A, B, N) \
|
||||
__builtin_choose_expr( \
|
||||
HTS_IS_CHAR_BUFFER(A), \
|
||||
strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||
"overflow while appending '" #B "' to '" #A "'", __FILE__, \
|
||||
__LINE__), \
|
||||
strncatbuff_ptr_((A), (B), (N)))
|
||||
#else
|
||||
#define strncatbuff(A, B, N) \
|
||||
( HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strncat(A, B, N) \
|
||||
: strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||
#define strncatbuff(A, B, N) \
|
||||
(HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strncat(A, B, N) \
|
||||
: strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||
"overflow while appending '" #B "' to '" #A "'", \
|
||||
__FILE__, __LINE__))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Append characters of "B" to "A".
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* is assumed to be the capacity of this array.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
|
||||
#define strcatbuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||
strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__), \
|
||||
strcatbuff_ptr_((A), (B)) )
|
||||
#define strcatbuff(A, B) \
|
||||
__builtin_choose_expr( \
|
||||
HTS_IS_CHAR_BUFFER(A), \
|
||||
strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
(size_t) -1, \
|
||||
"overflow while appending '" #B "' to '" #A "'", __FILE__, \
|
||||
__LINE__), \
|
||||
strcatbuff_ptr_((A), (B)))
|
||||
#else
|
||||
#define strcatbuff(A, B) \
|
||||
( HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strcat(A, B) \
|
||||
: strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||
#define strcatbuff(A, B) \
|
||||
(HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strcat(A, B) \
|
||||
: strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
(size_t) -1, \
|
||||
"overflow while appending '" #B "' to '" #A "'", \
|
||||
__FILE__, __LINE__))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copy characters from "B" to "A".
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* is assumed to be the capacity of this array.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
|
||||
#define strcpybuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||
strcpy_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__), \
|
||||
strcpybuff_ptr_((A), (B)) )
|
||||
#define strcpybuff(A, B) \
|
||||
__builtin_choose_expr( \
|
||||
HTS_IS_CHAR_BUFFER(A), \
|
||||
strcpy_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '" #A "'", __FILE__, \
|
||||
__LINE__), \
|
||||
strcpybuff_ptr_((A), (B)))
|
||||
#else
|
||||
#define strcpybuff(A, B) \
|
||||
( HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strcpy(A, B) \
|
||||
: strcpy_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||
#define strcpybuff(A, B) \
|
||||
(HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strcpy(A, B) \
|
||||
: strcpy_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '" #A "'", __FILE__, \
|
||||
__LINE__))
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -268,10 +285,10 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
||||
/**
|
||||
* Append characters of "B" to "A", "A" having a maximum capacity of "S".
|
||||
*/
|
||||
#define strlcatbuff(A, B, S) \
|
||||
strncat_safe_(A, S, B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__)
|
||||
#define strlcatbuff(A, B, S) \
|
||||
strncat_safe_(A, S, B, HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
(size_t) -1, "overflow while appending '" #B "' to '" #A "'", \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
* Append at most "N" characters of "B" to "A", "A" having a maximum capacity
|
||||
@@ -285,17 +302,18 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
||||
/**
|
||||
* Copy characters of "B" to "A", "A" having a maximum capacity of "S".
|
||||
*/
|
||||
#define strlcpybuff(A, B, S) \
|
||||
strcpy_safe_(A, S, B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__)
|
||||
#define strlcpybuff(A, B, S) \
|
||||
strcpy_safe_(A, S, B, HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '" #A "'", __FILE__, \
|
||||
__LINE__)
|
||||
|
||||
/** strnlen replacement (autotools). **/
|
||||
#if ( ! defined(_WIN32) && ! defined(HAVE_STRNLEN) )
|
||||
#if (!defined(_WIN32) && !defined(HAVE_STRNLEN))
|
||||
|
||||
static HTS_UNUSED size_t strnlen(const char *s, size_t maxlen) {
|
||||
size_t i;
|
||||
for(i = 0 ; i < maxlen && s[i] != '\0' ; i++) ;
|
||||
for (i = 0; i < maxlen && s[i] != '\0'; i++)
|
||||
;
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
@@ -304,13 +322,14 @@ static HTS_UNUSED size_t strnlen(const char *s, size_t maxlen) {
|
||||
Aborts if source is NULL or has no NUL within that capacity. The sentinel
|
||||
sizeof_source == (size_t)-1 means "capacity unknown", and falls back to the
|
||||
unbounded strlen (used when the source is a pointer rather than an array). */
|
||||
static HTS_INLINE HTS_UNUSED size_t strlen_safe_(const char *source, const size_t sizeof_source,
|
||||
static HTS_INLINE HTS_UNUSED size_t strlen_safe_(const char *source,
|
||||
const size_t sizeof_source,
|
||||
const char *file, int line) {
|
||||
size_t size;
|
||||
assertf_( source != NULL, file, line );
|
||||
size = sizeof_source != (size_t) -1
|
||||
? strnlen(source, sizeof_source) : strlen(source);
|
||||
assertf_( size < sizeof_source, file, line );
|
||||
assertf_(source != NULL, file, line);
|
||||
size = sizeof_source != (size_t) -1 ? strnlen(source, sizeof_source)
|
||||
: strlen(source);
|
||||
assertf_(size < sizeof_source, file, line);
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -319,10 +338,10 @@ static HTS_INLINE HTS_UNUSED size_t strlen_safe_(const char *source, const size_
|
||||
source's capacity or (size_t)-1 if unknown. Aborts if the result (existing
|
||||
dest length + appended bytes + NUL) would not fit sizeof_dest: this NEVER
|
||||
truncates. Always NUL-terminates on success. */
|
||||
static HTS_INLINE HTS_UNUSED char* strncat_safe_(char *const dest, const size_t sizeof_dest,
|
||||
const char *const source, const size_t sizeof_source,
|
||||
const size_t n,
|
||||
const char *exp, const char *file, int line) {
|
||||
static HTS_INLINE HTS_UNUSED char *
|
||||
strncat_safe_(char *const dest, const size_t sizeof_dest,
|
||||
const char *const source, const size_t sizeof_source,
|
||||
const size_t n, const char *exp, const char *file, int line) {
|
||||
const size_t source_len = strlen_safe_(source, sizeof_source, file, line);
|
||||
const size_t dest_len = strlen_safe_(dest, sizeof_dest, file, line);
|
||||
/* note: "size_t is an unsigned integral type" ((size_t) -1 is positive) */
|
||||
@@ -337,12 +356,14 @@ static HTS_INLINE HTS_UNUSED char* strncat_safe_(char *const dest, const size_t
|
||||
/* Core bounded copy: empties dest then appends all of source via
|
||||
strncat_safe_. sizeof_dest is dest's total capacity (NUL included). Aborts
|
||||
(no truncation) if source plus its NUL would not fit. */
|
||||
static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t sizeof_dest,
|
||||
const char *const source, const size_t sizeof_source,
|
||||
const char *exp, const char *file, int line) {
|
||||
static HTS_INLINE HTS_UNUSED char *
|
||||
strcpy_safe_(char *const dest, const size_t sizeof_dest,
|
||||
const char *const source, const size_t sizeof_source,
|
||||
const char *exp, const char *file, int line) {
|
||||
assertf_(sizeof_dest != 0, file, line);
|
||||
dest[0] = '\0';
|
||||
return strncat_safe_(dest, sizeof_dest, source, sizeof_source, (size_t) -1, exp, file, line);
|
||||
return strncat_safe_(dest, sizeof_dest, source, sizeof_source, (size_t) -1,
|
||||
exp, file, line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -360,9 +381,9 @@ static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t s
|
||||
* htsbuff_ptr(). The buffer is kept NUL-terminated; htsbuff_str() returns it.
|
||||
*/
|
||||
typedef struct {
|
||||
char *buf; /* backing buffer (kept NUL-terminated) */
|
||||
size_t cap; /* total capacity of buf, including the NUL */
|
||||
size_t len; /* current length, excluding the NUL */
|
||||
char *buf; /* backing buffer (kept NUL-terminated) */
|
||||
size_t cap; /* total capacity of buf, including the NUL */
|
||||
size_t len; /* current length, excluding the NUL */
|
||||
} htsbuff;
|
||||
|
||||
static HTS_INLINE HTS_UNUSED htsbuff htsbuff_ptr_(char *buf, size_t cap) {
|
||||
@@ -384,23 +405,29 @@ static HTS_INLINE HTS_UNUSED htsbuff htsbuff_ptr_(char *buf, size_t cap) {
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
|
||||
/* 0 for an array, a -1 array-size compile error for a pointer. */
|
||||
#define htsbuff_must_be_array_(A) \
|
||||
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), typeof(&(A)[0]))]) - 1)
|
||||
#define htsbuff_must_be_array_(A) \
|
||||
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), \
|
||||
typeof(&(A)[0]))]) - \
|
||||
1)
|
||||
|
||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
|
||||
#define htsbuff_array(ARR) \
|
||||
htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
|
||||
#else
|
||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR))
|
||||
#endif
|
||||
/** Builder over pointer P of known capacity N (N includes the NUL). */
|
||||
#define htsbuff_ptr(P, N) htsbuff_ptr_((P), (N))
|
||||
#define htsbuff_ptr(P, N) htsbuff_ptr_((P), (N))
|
||||
|
||||
/** Append at most n characters of s (stopping at its NUL). Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_catn(htsbuff *b, const char *s, size_t n) {
|
||||
/** Append at most n characters of s (stopping at its NUL). Aborts on overflow.
|
||||
*/
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_catn(htsbuff *b, const char *s,
|
||||
size_t n) {
|
||||
const size_t add = strnlen(s, n);
|
||||
/* Overflow-safe: keep the (potentially huge) 'add' alone on one side. The
|
||||
maintained invariant len < cap makes 'cap - len' >= 1 (no underflow), so
|
||||
'add < cap - len' cannot wrap the way 'len + add < cap' could. */
|
||||
assertf__(add < b->cap - b->len, "htsbuff append overflow", __FILE__, __LINE__);
|
||||
assertf__(add < b->cap - b->len, "htsbuff append overflow", __FILE__,
|
||||
__LINE__);
|
||||
memcpy(b->buf + b->len, s, add);
|
||||
b->len += add;
|
||||
b->buf[b->len] = '\0';
|
||||
@@ -433,15 +460,21 @@ static HTS_INLINE HTS_UNUSED const char *htsbuff_str(const htsbuff *b) {
|
||||
added bounds checking. freet() also NULLs the freed pointer and tolerates
|
||||
NULL. memcpybuff() despite the name is a raw memcpy: the caller owns the
|
||||
bounds. */
|
||||
#define malloct(A) malloc(A)
|
||||
#define malloct(A) malloc(A)
|
||||
|
||||
#define calloct(A,B) calloc((A), (B))
|
||||
#define calloct(A, B) calloc((A), (B))
|
||||
|
||||
#define freet(A) do { if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
|
||||
#define freet(A) \
|
||||
do { \
|
||||
if ((A) != NULL) { \
|
||||
free(A); \
|
||||
(A) = NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define strdupt(A) strdup(A)
|
||||
#define strdupt(A) strdup(A)
|
||||
|
||||
#define realloct(A,B) realloc(A, B)
|
||||
#define realloct(A, B) realloc(A, B)
|
||||
|
||||
#define memcpybuff(A, B, N) memcpy((A), (B), (N))
|
||||
|
||||
|
||||
280
src/htsstrings.h
280
src/htsstrings.h
@@ -41,11 +41,11 @@ Please visit our Website: http://www.httrack.com
|
||||
/* GCC extension */
|
||||
#ifndef HTS_UNUSED
|
||||
#ifdef __GNUC__
|
||||
#define HTS_UNUSED __attribute__ ((unused))
|
||||
#define HTS_UNUSED __attribute__((unused))
|
||||
|
||||
#define HTS_STATIC static __attribute__ ((unused))
|
||||
#define HTS_STATIC static __attribute__((unused))
|
||||
|
||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__((format(printf, fmt, arg)))
|
||||
#else
|
||||
#define HTS_UNUSED
|
||||
#define HTS_STATIC static
|
||||
@@ -60,6 +60,7 @@ typedef struct String String;
|
||||
#endif
|
||||
#ifndef HTS_DEF_STRUCT_String
|
||||
#define HTS_DEF_STRUCT_String
|
||||
|
||||
/**
|
||||
* Growable owned string.
|
||||
*
|
||||
@@ -86,7 +87,7 @@ struct String {
|
||||
|
||||
/** Allocator **/
|
||||
#ifndef STRING_REALLOC
|
||||
#define STRING_REALLOC(BUFF, SIZE) ( (char*) realloc(BUFF, SIZE) )
|
||||
#define STRING_REALLOC(BUFF, SIZE) ((char *) realloc(BUFF, SIZE))
|
||||
|
||||
#define STRING_FREE(BUFF) free(BUFF)
|
||||
#endif
|
||||
@@ -96,11 +97,11 @@ struct String {
|
||||
#endif
|
||||
|
||||
/** Initializer for an empty String (NULL buffer). Use to declare or reset. **/
|
||||
#define STRING_EMPTY { (char*) NULL, 0, 0 }
|
||||
#define STRING_EMPTY {(char *) NULL, 0, 0}
|
||||
|
||||
/** Read-only buffer pointer. NULL until the String has been written to.
|
||||
Invalidated by any subsequent growing operation. **/
|
||||
#define StringBuff(BLK) ( (const char*) ((BLK).buffer_) )
|
||||
#define StringBuff(BLK) ((const char *) ((BLK).buffer_))
|
||||
|
||||
/** Read/write buffer pointer. Same NULL/invalidation rules as StringBuff. **/
|
||||
#define StringBuffRW(BLK) ((BLK).buffer_)
|
||||
@@ -109,56 +110,60 @@ struct String {
|
||||
#define StringLength(BLK) ((BLK).length_)
|
||||
|
||||
/** Non-zero if the String holds at least one byte. **/
|
||||
#define StringNotEmpty(BLK) ( StringLength(BLK) > 0 )
|
||||
#define StringNotEmpty(BLK) (StringLength(BLK) > 0)
|
||||
|
||||
/** Allocated capacity in bytes, including room for the terminating NUL. **/
|
||||
#define StringCapacity(BLK) ((BLK).capacity_)
|
||||
|
||||
/** Byte at POS (read). No bounds check; POS must be < StringLength. **/
|
||||
#define StringSub(BLK, POS) ( StringBuff(BLK)[POS] )
|
||||
#define StringSub(BLK, POS) (StringBuff(BLK)[POS])
|
||||
|
||||
/** Byte at POS (read/write). No bounds check; POS must be < StringLength. **/
|
||||
#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] )
|
||||
#define StringSubRW(BLK, POS) (StringBuffRW(BLK)[POS])
|
||||
|
||||
/** Subcharacter (read/write) **/
|
||||
#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] )
|
||||
#define StringSubRW(BLK, POS) (StringBuffRW(BLK)[POS])
|
||||
|
||||
/** Byte POS positions from the end (read). POS==1 is the last byte. **/
|
||||
#define StringRight(BLK, POS) ( StringBuff(BLK)[StringLength(BLK) - POS] )
|
||||
#define StringRight(BLK, POS) (StringBuff(BLK)[StringLength(BLK) - POS])
|
||||
|
||||
/** Byte POS positions from the end (read/write). POS==1 is the last byte. **/
|
||||
#define StringRightRW(BLK, POS) ( StringBuffRW(BLK)[StringLength(BLK) - POS] )
|
||||
#define StringRightRW(BLK, POS) (StringBuffRW(BLK)[StringLength(BLK) - POS])
|
||||
|
||||
/** Drop the last byte and re-terminate. Undefined if the String is empty
|
||||
(no length check; would underflow). **/
|
||||
#define StringPopRight(BLK) do { \
|
||||
StringBuffRW(BLK)[--StringLength(BLK)] = '\0'; \
|
||||
} while(0)
|
||||
#define StringPopRight(BLK) \
|
||||
do { \
|
||||
StringBuffRW(BLK)[--StringLength(BLK)] = '\0'; \
|
||||
} while (0)
|
||||
|
||||
/** Grow so capacity_ >= CAPACITY (total bytes, including the NUL). May realloc
|
||||
(invalidating prior buffer pointers); aborts via STRING_ASSERT on OOM.
|
||||
Never shrinks. **/
|
||||
#define StringRoomTotal(BLK, CAPACITY) do { \
|
||||
const size_t capacity_ = (size_t) (CAPACITY); \
|
||||
while ((BLK).capacity_ < capacity_) { \
|
||||
if ((BLK).capacity_ < 16) { \
|
||||
(BLK).capacity_ = 16; \
|
||||
} else { \
|
||||
(BLK).capacity_ *= 2; \
|
||||
} \
|
||||
(BLK).buffer_ = STRING_REALLOC((BLK).buffer_, (BLK).capacity_); \
|
||||
STRING_ASSERT((BLK).buffer_ != NULL); \
|
||||
} \
|
||||
} while(0)
|
||||
#define StringRoomTotal(BLK, CAPACITY) \
|
||||
do { \
|
||||
const size_t capacity_ = (size_t) (CAPACITY); \
|
||||
while ((BLK).capacity_ < capacity_) { \
|
||||
if ((BLK).capacity_ < 16) { \
|
||||
(BLK).capacity_ = 16; \
|
||||
} else { \
|
||||
(BLK).capacity_ *= 2; \
|
||||
} \
|
||||
(BLK).buffer_ = STRING_REALLOC((BLK).buffer_, (BLK).capacity_); \
|
||||
STRING_ASSERT((BLK).buffer_ != NULL); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Reserve room for SIZE more bytes beyond the current length (plus the NUL).
|
||||
May realloc, invalidating prior buffer pointers. **/
|
||||
#define StringRoom(BLK, SIZE) StringRoomTotal(BLK, StringLength(BLK) + (SIZE) + 1)
|
||||
#define StringRoom(BLK, SIZE) \
|
||||
StringRoomTotal(BLK, StringLength(BLK) + (SIZE) + 1)
|
||||
|
||||
/** Reserve room for SIZE more bytes and return the (post-realloc) RW buffer,
|
||||
for appending in place. Does not update length_; the caller must. **/
|
||||
#define StringBuffN(BLK, SIZE) StringBuffN_(&(BLK), SIZE)
|
||||
HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
||||
|
||||
HTS_STATIC char *StringBuffN_(String *blk, int size) {
|
||||
StringRoom(*blk, size);
|
||||
return StringBuffRW(*blk);
|
||||
}
|
||||
@@ -166,40 +171,44 @@ HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
||||
/** Zero the fields (NULL buffer, no allocation). Use on an uninitialized
|
||||
String only; does NOT free an existing buffer (use StringFree to reset
|
||||
an owned one), so calling it on a live String leaks. **/
|
||||
#define StringInit(BLK) do { \
|
||||
(BLK).buffer_ = NULL; \
|
||||
(BLK).capacity_ = 0; \
|
||||
(BLK).length_ = 0; \
|
||||
} while(0)
|
||||
#define StringInit(BLK) \
|
||||
do { \
|
||||
(BLK).buffer_ = NULL; \
|
||||
(BLK).capacity_ = 0; \
|
||||
(BLK).length_ = 0; \
|
||||
} while (0)
|
||||
|
||||
/** Truncate to length 0, keeping the allocation. Forces a non-NULL buffer
|
||||
(allocates if empty) and writes the leading NUL, so StringBuff is "". **/
|
||||
#define StringClear(BLK) do { \
|
||||
(BLK).length_ = 0; \
|
||||
StringRoom(BLK, 0); \
|
||||
(BLK).buffer_[0] = '\0'; \
|
||||
} while(0)
|
||||
#define StringClear(BLK) \
|
||||
do { \
|
||||
(BLK).length_ = 0; \
|
||||
StringRoom(BLK, 0); \
|
||||
(BLK).buffer_[0] = '\0'; \
|
||||
} while (0)
|
||||
|
||||
/** Set length_ to SIZE, or to strlen(buffer_) if SIZE is negative. Caller
|
||||
asserts SIZE fits the existing content; does not (re)allocate. **/
|
||||
#define StringSetLength(BLK, SIZE) do { \
|
||||
if (SIZE >= 0) { \
|
||||
(BLK).length_ = SIZE; \
|
||||
} else { \
|
||||
(BLK).length_ = strlen((BLK).buffer_); \
|
||||
} \
|
||||
} while(0)
|
||||
#define StringSetLength(BLK, SIZE) \
|
||||
do { \
|
||||
if (SIZE >= 0) { \
|
||||
(BLK).length_ = SIZE; \
|
||||
} else { \
|
||||
(BLK).length_ = strlen((BLK).buffer_); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Release the owned buffer and reset to the empty state (NULL buffer).
|
||||
Idempotent; safe on an already-empty String. **/
|
||||
#define StringFree(BLK) do { \
|
||||
if ((BLK).buffer_ != NULL) { \
|
||||
STRING_FREE((BLK).buffer_); \
|
||||
(BLK).buffer_ = NULL; \
|
||||
} \
|
||||
(BLK).capacity_ = 0; \
|
||||
(BLK).length_ = 0; \
|
||||
} while(0)
|
||||
#define StringFree(BLK) \
|
||||
do { \
|
||||
if ((BLK).buffer_ != NULL) { \
|
||||
STRING_FREE((BLK).buffer_); \
|
||||
(BLK).buffer_ = NULL; \
|
||||
} \
|
||||
(BLK).capacity_ = 0; \
|
||||
(BLK).length_ = 0; \
|
||||
} while (0)
|
||||
|
||||
/** Take ownership of a NUL-terminated heap string STR (the String will free
|
||||
it). Frees any current buffer first. STR MUST have been allocated by an
|
||||
@@ -207,48 +216,52 @@ HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
||||
freed or used by the caller afterwards. length_/capacity_ are set to
|
||||
strlen(STR) (capacity_ here excludes the NUL, so the next append reallocs).
|
||||
**/
|
||||
#define StringSetBuffer(BLK, STR) do { \
|
||||
size_t len__ = strlen( STR ); \
|
||||
StringFree(BLK); \
|
||||
(BLK).buffer_ = ( STR ); \
|
||||
(BLK).capacity_ = len__; \
|
||||
(BLK).length_ = len__; \
|
||||
} while(0)
|
||||
#define StringSetBuffer(BLK, STR) \
|
||||
do { \
|
||||
size_t len__ = strlen(STR); \
|
||||
StringFree(BLK); \
|
||||
(BLK).buffer_ = (STR); \
|
||||
(BLK).capacity_ = len__; \
|
||||
(BLK).length_ = len__; \
|
||||
} while (0)
|
||||
|
||||
/** Append SIZE raw bytes from STR (NULs allowed as data). Grows as needed and
|
||||
re-terminates with a NUL after the appended bytes. STR must not alias
|
||||
BLK's buffer (a realloc would invalidate it). **/
|
||||
#define StringMemcat(BLK, STR, SIZE) do { \
|
||||
const char* str_mc_ = (STR); \
|
||||
const size_t size_mc_ = (size_t) (SIZE); \
|
||||
StringRoom(BLK, size_mc_); \
|
||||
if (size_mc_ > 0) { \
|
||||
memcpy((BLK).buffer_ + (BLK).length_, str_mc_, size_mc_); \
|
||||
(BLK).length_ += size_mc_; \
|
||||
} \
|
||||
*((BLK).buffer_ + (BLK).length_) = '\0'; \
|
||||
} while(0)
|
||||
#define StringMemcat(BLK, STR, SIZE) \
|
||||
do { \
|
||||
const char *str_mc_ = (STR); \
|
||||
const size_t size_mc_ = (size_t) (SIZE); \
|
||||
StringRoom(BLK, size_mc_); \
|
||||
if (size_mc_ > 0) { \
|
||||
memcpy((BLK).buffer_ + (BLK).length_, str_mc_, size_mc_); \
|
||||
(BLK).length_ += size_mc_; \
|
||||
} \
|
||||
*((BLK).buffer_ + (BLK).length_) = '\0'; \
|
||||
} while (0)
|
||||
|
||||
/** Replace content with SIZE raw bytes from STR (NULs allowed as data).
|
||||
Same non-aliasing requirement as StringMemcat. **/
|
||||
#define StringMemcpy(BLK, STR, SIZE) do { \
|
||||
(BLK).length_ = 0; \
|
||||
StringMemcat(BLK, STR, SIZE); \
|
||||
} while(0)
|
||||
#define StringMemcpy(BLK, STR, SIZE) \
|
||||
do { \
|
||||
(BLK).length_ = 0; \
|
||||
StringMemcat(BLK, STR, SIZE); \
|
||||
} while (0)
|
||||
|
||||
/** Append one byte and re-terminate. Grows as needed. **/
|
||||
#define StringAddchar(BLK, c) do { \
|
||||
String * const s__ = &(BLK); \
|
||||
char c__ = (c); \
|
||||
StringRoom(*s__, 1); \
|
||||
StringBuffRW(*s__)[StringLength(*s__)++] = c__; \
|
||||
StringBuffRW(*s__)[StringLength(*s__) ] = 0; \
|
||||
} while(0)
|
||||
#define StringAddchar(BLK, c) \
|
||||
do { \
|
||||
String *const s__ = &(BLK); \
|
||||
char c__ = (c); \
|
||||
StringRoom(*s__, 1); \
|
||||
StringBuffRW(*s__)[StringLength(*s__)++] = c__; \
|
||||
StringBuffRW(*s__)[StringLength(*s__)] = 0; \
|
||||
} while (0)
|
||||
|
||||
/** Hand the buffer to the caller and reset the String to empty (NULL buffer).
|
||||
The returned pointer is now owned by the caller, who must STRING_FREE() it.
|
||||
Returns NULL if the String was empty. **/
|
||||
HTS_STATIC char *StringAcquire(String * blk) {
|
||||
HTS_STATIC char *StringAcquire(String *blk) {
|
||||
char *buff = StringBuffRW(*blk);
|
||||
|
||||
StringBuffRW(*blk) = NULL;
|
||||
@@ -259,7 +272,7 @@ HTS_STATIC char *StringAcquire(String * blk) {
|
||||
|
||||
/** Return an independent deep copy of *src (its own allocation). The caller
|
||||
owns the result and must StringFree it. **/
|
||||
HTS_STATIC String StringDup(const String * src) {
|
||||
HTS_STATIC String StringDup(const String *src) {
|
||||
String s = STRING_EMPTY;
|
||||
|
||||
StringMemcat(s, StringBuff(*src), StringLength(*src));
|
||||
@@ -270,7 +283,7 @@ HTS_STATIC String StringDup(const String * src) {
|
||||
ownership transfers and the caller keeps no dangling alias. Frees any
|
||||
current buffer first. *str MUST be allocator-compatible (see
|
||||
StringSetBuffer). No-op if str or *str is NULL. **/
|
||||
HTS_STATIC void StringAttach(String * blk, char **str) {
|
||||
HTS_STATIC void StringAttach(String *blk, char **str) {
|
||||
StringFree(*blk);
|
||||
if (str != NULL && *str != NULL) {
|
||||
StringBuffRW(*blk) = *str;
|
||||
@@ -281,43 +294,46 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
||||
|
||||
/** Append the C string STR (up to its NUL). No-op if STR is NULL. STR must not
|
||||
alias BLK's buffer. **/
|
||||
#define StringCat(BLK, STR) do { \
|
||||
const char *const str__ = ( STR ); \
|
||||
if (str__ != NULL) { \
|
||||
const size_t size__ = strlen(str__); \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} \
|
||||
} while(0)
|
||||
#define StringCat(BLK, STR) \
|
||||
do { \
|
||||
const char *const str__ = (STR); \
|
||||
if (str__ != NULL) { \
|
||||
const size_t size__ = strlen(str__); \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Append at most SIZE leading bytes of the C string STR. No-op if STR is
|
||||
NULL. STR must not alias BLK's buffer. **/
|
||||
#define StringCatN(BLK, STR, SIZE) do { \
|
||||
const char *str__ = ( STR ); \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
if (size__ > (SIZE)) { \
|
||||
size__ = (SIZE); \
|
||||
} \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} \
|
||||
} while(0)
|
||||
#define StringCatN(BLK, STR, SIZE) \
|
||||
do { \
|
||||
const char *str__ = (STR); \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
if (size__ > (SIZE)) { \
|
||||
size__ = (SIZE); \
|
||||
} \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Replace content with at most SIZE leading bytes of the C string STR.
|
||||
If STR is NULL, clears to "". STR must not alias BLK's buffer. **/
|
||||
#define StringCopyN(BLK, STR, SIZE) do { \
|
||||
const char *str__ = ( STR ); \
|
||||
const size_t usize__ = (SIZE); \
|
||||
(BLK).length_ = 0; \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
if (size__ > usize__ ) { \
|
||||
size__ = usize__; \
|
||||
} \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} else { \
|
||||
StringClear(BLK); \
|
||||
} \
|
||||
} while(0)
|
||||
#define StringCopyN(BLK, STR, SIZE) \
|
||||
do { \
|
||||
const char *str__ = (STR); \
|
||||
const size_t usize__ = (SIZE); \
|
||||
(BLK).length_ = 0; \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
if (size__ > usize__) { \
|
||||
size__ = usize__; \
|
||||
} \
|
||||
StringMemcat(BLK, str__, size__); \
|
||||
} else { \
|
||||
StringClear(BLK); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Replace blk's content with a copy of String blk2. blk and blk2 must be
|
||||
distinct Strings (use StringCopyOverlapped if they may be the same). **/
|
||||
@@ -326,23 +342,25 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
||||
/** Replace content with a copy of the C string STR. If STR is NULL, clears to
|
||||
"". STR must not alias BLK's buffer (use StringCopyOverlapped if it might).
|
||||
**/
|
||||
#define StringCopy(BLK, STR) do { \
|
||||
const char *str__ = ( STR ); \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
StringMemcpy(BLK, str__, size__); \
|
||||
} else { \
|
||||
StringClear(BLK); \
|
||||
} \
|
||||
} while(0)
|
||||
#define StringCopy(BLK, STR) \
|
||||
do { \
|
||||
const char *str__ = (STR); \
|
||||
if (str__ != NULL) { \
|
||||
size_t size__ = strlen(str__); \
|
||||
StringMemcpy(BLK, str__, size__); \
|
||||
} else { \
|
||||
StringClear(BLK); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/** Like StringCopy but safe when STR aliases BLK's own buffer: copies via a
|
||||
temporary, so a self-copy or overlap is well-defined. **/
|
||||
#define StringCopyOverlapped(BLK, STR) do { \
|
||||
String s__ = STRING_EMPTY; \
|
||||
StringCopy(s__, STR); \
|
||||
StringCopyS(BLK, s__); \
|
||||
StringFree(s__); \
|
||||
} while(0)
|
||||
#define StringCopyOverlapped(BLK, STR) \
|
||||
do { \
|
||||
String s__ = STRING_EMPTY; \
|
||||
StringCopy(s__, STR); \
|
||||
StringCopyS(BLK, s__); \
|
||||
StringFree(s__); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -57,17 +57,17 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/** Legacy no-op retained for ABI compatibility; always returns 1. */
|
||||
HTSEXT_API int htswrap_init(void); // LEGACY
|
||||
HTSEXT_API int htswrap_init(void); // LEGACY
|
||||
|
||||
/** Legacy no-op retained for ABI compatibility; always returns 1. */
|
||||
HTSEXT_API int htswrap_free(void); // LEGACY
|
||||
HTSEXT_API int htswrap_free(void); // LEGACY
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
//HTSEXT_API int htswrap_add(httrackp * opt, const char *name, void *fct);
|
||||
//HTSEXT_API uintptr_t htswrap_read(httrackp * opt, const char *name);
|
||||
// HTSEXT_API int htswrap_add(httrackp * opt, const char *name, void *fct);
|
||||
// HTSEXT_API uintptr_t htswrap_read(httrackp * opt, const char *name);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ typedef struct strc_int2bytes2 strc_int2bytes2;
|
||||
#endif
|
||||
#ifndef HTS_DEF_DEFSTRUCT_hts_log_type
|
||||
#define HTS_DEF_DEFSTRUCT_hts_log_type
|
||||
|
||||
/** Log severity levels, most to least severe. A message is emitted only if its
|
||||
level is <= opt->debug. LOG_ERRNO is a flag OR'd into the level to append
|
||||
": <strerror(errno)>" to the message. */
|
||||
@@ -97,7 +98,7 @@ typedef struct hts_stat_struct hts_stat_struct;
|
||||
retain them. */
|
||||
#ifndef HTS_DEF_FWSTRUCT_htsErrorCallback
|
||||
#define HTS_DEF_FWSTRUCT_htsErrorCallback
|
||||
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
|
||||
typedef void (*htsErrorCallback)(const char *msg, const char *file, int line);
|
||||
#endif
|
||||
|
||||
/* Helpers for plugging callbacks
|
||||
@@ -111,29 +112,35 @@ requires: htsdefines.h */
|
||||
* CALLBACKARG_USERDEF(). Allocates a t_hts_callbackarg with hts_malloc (not
|
||||
* checked for OOM); it is freed by hts_free_opt().
|
||||
*/
|
||||
#define CHAIN_FUNCTION(OPT, MEMBER, FUNCTION, ARGUMENT) do { \
|
||||
t_hts_callbackarg *carg = (t_hts_callbackarg*) hts_malloc(sizeof(t_hts_callbackarg)); \
|
||||
carg->userdef = ( ARGUMENT ); \
|
||||
carg->prev.fun = (void*) ( OPT )->callbacks_fun-> MEMBER .fun; \
|
||||
carg->prev.carg = ( OPT )->callbacks_fun-> MEMBER .carg; \
|
||||
( OPT )->callbacks_fun-> MEMBER .fun = ( FUNCTION ); \
|
||||
( OPT )->callbacks_fun-> MEMBER .carg = carg; \
|
||||
} while(0)
|
||||
#define CHAIN_FUNCTION(OPT, MEMBER, FUNCTION, ARGUMENT) \
|
||||
do { \
|
||||
t_hts_callbackarg *carg = \
|
||||
(t_hts_callbackarg *) hts_malloc(sizeof(t_hts_callbackarg)); \
|
||||
carg->userdef = (ARGUMENT); \
|
||||
carg->prev.fun = (void *) (OPT)->callbacks_fun->MEMBER.fun; \
|
||||
carg->prev.carg = (OPT)->callbacks_fun->MEMBER.carg; \
|
||||
(OPT)->callbacks_fun->MEMBER.fun = (FUNCTION); \
|
||||
(OPT)->callbacks_fun->MEMBER.carg = carg; \
|
||||
} while (0)
|
||||
|
||||
/* The following helpers are useful only if you know that an existing callback migh be existing before before the call to CHAIN_FUNCTION()
|
||||
If your functions were added just after hts_create_opt(), no need to make the previous function check */
|
||||
/* The following helpers are useful only if you know that an existing callback
|
||||
migh be existing before before the call to CHAIN_FUNCTION() If your functions
|
||||
were added just after hts_create_opt(), no need to make the previous function
|
||||
check */
|
||||
|
||||
/** Inside a chained callback, return the ARGUMENT pointer originally passed to
|
||||
CHAIN_FUNCTION(), or NULL when CARG is NULL. */
|
||||
#define CALLBACKARG_USERDEF(CARG) ( ( (CARG) != NULL ) ? (CARG)->userdef : NULL )
|
||||
#define CALLBACKARG_USERDEF(CARG) (((CARG) != NULL) ? (CARG)->userdef : NULL)
|
||||
|
||||
/** Return the callback of type NAME that this one chained over, cast to its
|
||||
function-pointer type, or NULL. Call it to forward to the prior handler. */
|
||||
#define CALLBACKARG_PREV_FUN(CARG, NAME) ( (t_hts_htmlcheck_ ##NAME) ( ( (CARG) != NULL ) ? (CARG)->prev.fun : NULL ) )
|
||||
#define CALLBACKARG_PREV_FUN(CARG, NAME) \
|
||||
((t_hts_htmlcheck_##NAME)(((CARG) != NULL) ? (CARG)->prev.fun : NULL))
|
||||
|
||||
/** Return the carg of the callback this one chained over (pass it when
|
||||
forwarding to the CALLBACKARG_PREV_FUN result), or NULL. */
|
||||
#define CALLBACKARG_PREV_CARG(CARG) ( ( (CARG) != NULL ) ? (CARG)->prev.carg : NULL )
|
||||
#define CALLBACKARG_PREV_CARG(CARG) \
|
||||
(((CARG) != NULL) ? (CARG)->prev.carg : NULL)
|
||||
|
||||
/* Functions */
|
||||
|
||||
@@ -162,7 +169,7 @@ HTSEXT_API int hts_main(int argc, char **argv);
|
||||
hts_main() to set options or plug callbacks on opt first. Blocks until the
|
||||
mirror ends and returns the engine exit code. The caller keeps ownership of
|
||||
opt and must release it with hts_free_opt(). */
|
||||
HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt);
|
||||
HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt);
|
||||
|
||||
/* Options handling */
|
||||
/** Allocate and default-initialize an option set, preloading the bundled parser
|
||||
@@ -174,7 +181,7 @@ HTSEXT_API httrackp *hts_create_opt(void);
|
||||
modules, DNS cache, owned strings, and the structure). NULL is accepted. The
|
||||
pointer is invalid afterward. Do not call while a mirror is running on that
|
||||
opt; wait until hts_has_stopped() is true. */
|
||||
HTSEXT_API void hts_free_opt(httrackp * opt);
|
||||
HTSEXT_API void hts_free_opt(httrackp *opt);
|
||||
|
||||
/** Return sizeof(httrackp) as the library sees it, for caller-vs-library struct
|
||||
ABI mismatch checks. */
|
||||
@@ -184,16 +191,16 @@ HTSEXT_API size_t hts_sizeof_opt(void);
|
||||
Returns NULL if opt is NULL. The result aliases a single process-global
|
||||
static: it is not thread-safe and is overwritten by the next call, so copy
|
||||
out the fields you need. */
|
||||
HTSEXT_API const hts_stat_struct* hts_get_stats(httrackp * opt);
|
||||
HTSEXT_API const hts_stat_struct *hts_get_stats(httrackp *opt);
|
||||
|
||||
/** Legacy no-op retained for API compatibility. */
|
||||
HTSEXT_API void set_wrappers(httrackp * opt); /* LEGACY */
|
||||
HTSEXT_API void set_wrappers(httrackp *opt); /* LEGACY */
|
||||
|
||||
/** Load a plugin shared library and run its hts_plug(opt, argv) entry point. On
|
||||
success the handle is recorded in opt and unloaded by hts_free_opt().
|
||||
@return 1 if loaded and hts_plug succeeded; 0 if loaded but hts_plug was
|
||||
missing or refused; -1 if the library could not be loaded. */
|
||||
HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
|
||||
HTSEXT_API int plug_wrapper(httrackp *opt, const char *moduleName,
|
||||
const char *argv);
|
||||
|
||||
/** Install the process-global assertion/error callback (NULL clears it). Not
|
||||
@@ -212,12 +219,12 @@ HTSEXT_API hts_boolean hts_log(httrackp *opt, const char *prefix,
|
||||
/** printf-style log at level @p type (an hts_log_type, optionally |LOG_ERRNO).
|
||||
Forwards to the registered log callback, and when the level is <= opt->debug
|
||||
also to opt->log. @p format must be non-NULL. */
|
||||
HTSEXT_API void hts_log_print(httrackp * opt, int type, const char *format,
|
||||
...) HTS_PRINTF_FUN(3, 4);
|
||||
HTSEXT_API void hts_log_print(httrackp *opt, int type, const char *format, ...)
|
||||
HTS_PRINTF_FUN(3, 4);
|
||||
|
||||
/** va_list form of hts_log_print(). @p opt may be NULL (only the callback
|
||||
runs). Preserves errno. @p format must be non-NULL. */
|
||||
HTSEXT_API void hts_log_vprint(httrackp * opt, int type, const char *format,
|
||||
HTSEXT_API void hts_log_vprint(httrackp *opt, int type, const char *format,
|
||||
va_list args);
|
||||
|
||||
/** Install the process-global log callback invoked by hts_log_vprint() for
|
||||
@@ -231,7 +238,7 @@ hts_set_log_vprint_callback(void (*callback)(httrackp *opt, int type,
|
||||
result is written into and aliases a 2048-byte scratch buffer inside opt: it
|
||||
is valid until that buffer is next used, and must not be freed. opt must be
|
||||
non-NULL. */
|
||||
HTSEXT_API const char *hts_get_version_info(httrackp * opt);
|
||||
HTSEXT_API const char *hts_get_version_info(httrackp *opt);
|
||||
|
||||
/** Static build-features string (TLS, zlib, ipv6, and so on). Process-global
|
||||
storage; do not free or modify. */
|
||||
@@ -241,21 +248,22 @@ HTSEXT_API const char *hts_is_available(void);
|
||||
HTSEXT_API const char *hts_version(void);
|
||||
|
||||
/* Wrapper functions */
|
||||
HTSEXT_API int htswrap_init(void); // DEPRECATED - DUMMY FUNCTION
|
||||
HTSEXT_API int htswrap_init(void); // DEPRECATED - DUMMY FUNCTION
|
||||
|
||||
HTSEXT_API int htswrap_free(void); // DEPRECATED - DUMMY FUNCTION
|
||||
HTSEXT_API int htswrap_free(void); // DEPRECATED - DUMMY FUNCTION
|
||||
|
||||
/** Register callback @p fct under @p name in opt's callback table (for example
|
||||
"start", "check-html", "linkdetected"). Returns 1 on success, 0 if @p name
|
||||
is not a known slot. Prefer CHAIN_FUNCTION(), which preserves any prior
|
||||
callback. */
|
||||
HTSEXT_API int htswrap_add(httrackp * opt, const char *name, void *fct);
|
||||
HTSEXT_API int htswrap_add(httrackp *opt, const char *name, void *fct);
|
||||
|
||||
/** Return the function pointer registered under @p name in opt as a uintptr_t,
|
||||
or 0 if none or unknown. */
|
||||
HTSEXT_API uintptr_t htswrap_read(httrackp * opt, const char *name);
|
||||
HTSEXT_API uintptr_t htswrap_read(httrackp *opt, const char *name);
|
||||
|
||||
/* Internal library allocators, if a different libc is being used by the client */
|
||||
/* Internal library allocators, if a different libc is being used by the client
|
||||
*/
|
||||
/** strdup() through the library allocator. Returns a heap copy freed with
|
||||
hts_free(), or NULL on failure. */
|
||||
HTSEXT_API char *hts_strdup(const char *string);
|
||||
@@ -272,13 +280,13 @@ HTSEXT_API void *hts_realloc(void *const data, const size_t size);
|
||||
HTSEXT_API void hts_free(void *data);
|
||||
|
||||
/* Other functions */
|
||||
HTSEXT_API int hts_resetvar(void); // DEPRECATED - DUMMY FUNCTION
|
||||
HTSEXT_API int hts_resetvar(void); // DEPRECATED - DUMMY FUNCTION
|
||||
|
||||
/** (Re)build the top-level index.html aggregating every mirror project found
|
||||
under @p path. @p binpath is the data root used to locate the
|
||||
templates/topindex-*.html files, falling back to built-in templates. Writes
|
||||
<path>/index.html. @return 1 on success, 0 on failure. */
|
||||
HTSEXT_API int hts_buildtopindex(httrackp * opt, const char *path,
|
||||
HTSEXT_API int hts_buildtopindex(httrackp *opt, const char *path,
|
||||
const char *binpath);
|
||||
|
||||
/** Scan every mirror project under @p path and return a CRLF-separated list:
|
||||
@@ -321,14 +329,14 @@ HTSEXT_API hts_boolean catch_url(T_SOC soc, char *url, char *method,
|
||||
/** Whether the engine is parsing HTML. Returns 0 if not, otherwise the percent
|
||||
done (at least 1). @p flag >= 0 also requests a progress refresh; pass a
|
||||
negative value to query without side effects. */
|
||||
HTSEXT_API int hts_is_parsing(httrackp * opt, int flag);
|
||||
HTSEXT_API int hts_is_parsing(httrackp *opt, int flag);
|
||||
|
||||
/** Current background phase: 0 none, 1 testing links, 2 purge, 3, 4 scheduling,
|
||||
5 waiting for a slot. */
|
||||
HTSEXT_API int hts_is_testing(httrackp * opt);
|
||||
HTSEXT_API int hts_is_testing(httrackp *opt);
|
||||
|
||||
/** Nonzero once the engine has begun its exit sequence. */
|
||||
HTSEXT_API int hts_is_exiting(httrackp * opt);
|
||||
HTSEXT_API int hts_is_exiting(httrackp *opt);
|
||||
|
||||
/*HTSEXT_API int hts_setopt(httrackp* opt); DEPRECATED ; see copy_htsopt() */
|
||||
|
||||
@@ -344,15 +352,15 @@ HTSEXT_API hts_boolean hts_resetaddurl(httrackp *opt);
|
||||
/** Apply the runtime-tunable options from @p from onto @p to, to adjust a live
|
||||
mirror. Only fields set to a non-sentinel value are copied; the rest of @p
|
||||
to is left untouched. The user-agent string is deep-copied. @return 0. */
|
||||
HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to);
|
||||
HTSEXT_API int copy_htsopt(const httrackp *from, httrackp *to);
|
||||
|
||||
/** Return the engine's last error message, or NULL. The string is owned by
|
||||
@p opt; do not free it, and use it only while @p opt lives. */
|
||||
HTSEXT_API char *hts_errmsg(httrackp * opt);
|
||||
HTSEXT_API char *hts_errmsg(httrackp *opt);
|
||||
|
||||
/** Get or set the transfer-pause flag. @p p >= 0 sets it (nonzero means
|
||||
paused); a negative value queries. @return the current pause flag. */
|
||||
HTSEXT_API int hts_setpause(httrackp * opt, int);
|
||||
HTSEXT_API int hts_setpause(httrackp *opt, int);
|
||||
|
||||
/** Ask the running mirror to terminate (sets the stop flag under the state
|
||||
lock, so it is safe to call from another thread). @p force is currently
|
||||
@@ -363,15 +371,15 @@ HTSEXT_API int hts_request_stop(httrackp *opt, hts_boolean force);
|
||||
/** Queue a single in-progress file, by URL, to be cancelled by the engine.
|
||||
@p url is copied internally. Takes the state lock, so it is thread-safe.
|
||||
@return the underlying push result. */
|
||||
HTSEXT_API int hts_cancel_file_push(httrackp * opt, const char *url);
|
||||
HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url);
|
||||
|
||||
/** Cancel the in-progress link-testing phase. Effective only while a test runs.
|
||||
*/
|
||||
HTSEXT_API void hts_cancel_test(httrackp * opt);
|
||||
HTSEXT_API void hts_cancel_test(httrackp *opt);
|
||||
|
||||
/** Cancel the in-progress HTML parsing. Effective only while parsing is active.
|
||||
*/
|
||||
HTSEXT_API void hts_cancel_parsing(httrackp * opt);
|
||||
HTSEXT_API void hts_cancel_parsing(httrackp *opt);
|
||||
|
||||
/** Nonzero once the mirror has fully ended. Read under the engine state lock,
|
||||
so safe to poll from another thread. Wait for this before hts_free_opt(). */
|
||||
@@ -416,19 +424,19 @@ HTSEXT_API void qsec2str(char *st, TStamp t);
|
||||
is reused, and a given strc is not reentrant. Use one strc per
|
||||
concurrently-live result. */
|
||||
/** Format @p n as a decimal string into @p strc and return it. */
|
||||
HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n);
|
||||
HTSEXT_API char *int2char(strc_int2bytes2 *strc, int n);
|
||||
|
||||
/** Format byte count @p n as "<num><unit>" (B/KiB/MiB/GiB and so on) into
|
||||
@p strc and return it. */
|
||||
HTSEXT_API char *int2bytes(strc_int2bytes2 * strc, LLint n);
|
||||
HTSEXT_API char *int2bytes(strc_int2bytes2 *strc, LLint n);
|
||||
|
||||
/** Format a transfer rate @p n as "<num><unit>/s" into @p strc and return it.
|
||||
*/
|
||||
HTSEXT_API char *int2bytessec(strc_int2bytes2 * strc, long int n);
|
||||
HTSEXT_API char *int2bytessec(strc_int2bytes2 *strc, long int n);
|
||||
|
||||
/** Split byte count @p n into number and unit, returning a 2-element array
|
||||
{number, unit} stored inside @p strc. */
|
||||
HTSEXT_API char **int2bytes2(strc_int2bytes2 * strc, LLint n);
|
||||
HTSEXT_API char **int2bytes2(strc_int2bytes2 *strc, LLint n);
|
||||
|
||||
/** Skip any "user[:pass]@" identification prefix in a URL, returning a pointer
|
||||
into the argument past it (or past the protocol if none). The result aliases
|
||||
@@ -490,40 +498,50 @@ HTSEXT_API void unescape_amp(char *s);
|
||||
|
||||
/** Percent-escape only spaces (' ' becomes "%20"); copy everything else
|
||||
* verbatim. */
|
||||
HTSEXT_API size_t escape_spc_url(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_spc_url(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Aggressively percent-escape @p src for use as a single URL path segment
|
||||
(reserved, delimiter, unwise, special, avoid and mark characters). */
|
||||
HTSEXT_API size_t escape_in_url(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_in_url(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Percent-escape @p src as a URI, escaping only what is necessary and keeping
|
||||
'/' and other reserved characters. */
|
||||
HTSEXT_API size_t escape_uri(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_uri(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Like escape_uri() for a UTF-8 URI: also escapes reserved characters other
|
||||
than '/'. */
|
||||
HTSEXT_API size_t escape_uri_utf(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_uri_utf(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Minimal "make safe" escape: percent-escapes only '"', ' ' and control
|
||||
characters, leaving an already-formed URL otherwise intact. */
|
||||
HTSEXT_API size_t escape_check_url(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_check_url(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Append-variant of escape_spc_url(): escapes @p src after the existing
|
||||
NUL-terminated content of @p dest. Returns the bytes appended (excluding the
|
||||
NUL). */
|
||||
HTSEXT_API size_t append_escape_spc_url(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t append_escape_spc_url(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Append-variant of escape_in_url(). See append_escape_spc_url(). */
|
||||
HTSEXT_API size_t append_escape_in_url(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t append_escape_in_url(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Append-variant of escape_uri(). See append_escape_spc_url(). */
|
||||
HTSEXT_API size_t append_escape_uri(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t append_escape_uri(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Append-variant of escape_uri_utf(). See append_escape_spc_url(). */
|
||||
HTSEXT_API size_t append_escape_uri_utf(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t append_escape_uri_utf(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Append-variant of escape_check_url(). See append_escape_spc_url(). */
|
||||
HTSEXT_API size_t append_escape_check_url(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t append_escape_check_url(const char *const src,
|
||||
char *const dest, const size_t size);
|
||||
|
||||
/** In-place variant of escape_spc_url(): escapes the NUL-terminated string in
|
||||
@p dest back into @p dest. */
|
||||
@@ -543,32 +561,39 @@ HTSEXT_API size_t inplace_escape_check_url(char *const dest, const size_t size);
|
||||
|
||||
/** Same escaping as escape_check_url() but returns @p dest instead of the byte
|
||||
count. */
|
||||
HTSEXT_API char *escape_check_url_addr(const char *const src, char *const dest, const size_t size);
|
||||
HTSEXT_API char *escape_check_url_addr(const char *const src, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Build a MIME/MHTML content-id token in @p dest from @p adr and @p fil:
|
||||
escape_in_url() both, then replace every '%' with 'X' so the result is one
|
||||
opaque token. */
|
||||
HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil,
|
||||
char *const dest, const size_t size);
|
||||
|
||||
/** Low-level percent-escaper backing the escape_* family. @p mode selects the
|
||||
character class to escape: 0 check_url, 1 in_url, 2 spc_url, 3 uri,
|
||||
30 uri_utf. @p max_size is the dest capacity including the NUL. */
|
||||
HTSEXT_API size_t x_escape_http(const char *const s, char *const dest, const size_t max_size, const int mode);
|
||||
HTSEXT_API size_t x_escape_http(const char *const s, char *const dest,
|
||||
const size_t max_size, const int mode);
|
||||
|
||||
/** Strip all control characters (byte value < 32) from @p s in place. */
|
||||
HTSEXT_API void escape_remove_control(char *const s);
|
||||
|
||||
/** HTML-escape for text output: rewrite '&' to "&" and pass every other
|
||||
byte through unchanged. */
|
||||
HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Like escape_for_html_print() but also convert every high byte (>= 128) to a
|
||||
numeric entity "&#xNN;". */
|
||||
HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const dest, const size_t size);
|
||||
HTSEXT_API size_t escape_for_html_print_full(const char *const s,
|
||||
char *const dest,
|
||||
const size_t size);
|
||||
|
||||
/** Percent-decode @p s into @p catbuff (capacity @p size) and return @p
|
||||
catbuff. Decodes every "%xx" hex escape. */
|
||||
HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const char *const s);
|
||||
HTSEXT_API char *unescape_http(char *const catbuff, const size_t size,
|
||||
const char *const s);
|
||||
|
||||
/** Percent-decode @p s into @p catbuff, but only the escapes that are safe to
|
||||
decode while keeping a valid URI (reserved, delimiter, unwise, control and
|
||||
@@ -589,17 +614,16 @@ HTSEXT_API hts_boolean get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
HTS_MIMETYPE_SIZE capacity. */
|
||||
HTS_DEPRECATED("use get_httptype_sized(opt, s, ssize, fil, flag)")
|
||||
|
||||
HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil,
|
||||
int flag);
|
||||
HTSEXT_API void get_httptype(httrackp *opt, char *s, const char *fil, int flag);
|
||||
|
||||
/** Classify @p fil by its extension: 0 unknown, 1 known non-HTML, 2 known HTML.
|
||||
Consults the built-in table then user --assume rules. 0 for a NULL @p fil.
|
||||
*/
|
||||
HTSEXT_API int is_knowntype(httrackp * opt, const char *fil);
|
||||
HTSEXT_API int is_knowntype(httrackp *opt, const char *fil);
|
||||
|
||||
/** Like is_knowntype() but consults only the user --assume rules: 0 no rule,
|
||||
1 non-HTML, 2 HTML. */
|
||||
HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil);
|
||||
HTSEXT_API int is_userknowntype(httrackp *opt, const char *fil);
|
||||
|
||||
/** 1 if @p fil, an extension such as "asp" or "php" (not a full filename), is a
|
||||
known dynamic-page type, else 0. */
|
||||
@@ -624,7 +648,7 @@ HTSEXT_API hts_boolean guess_httptype_sized(httrackp *opt, char *s,
|
||||
HTS_MIMETYPE_SIZE capacity. */
|
||||
HTS_DEPRECATED("use guess_httptype_sized(opt, s, ssize, fil)")
|
||||
|
||||
HTSEXT_API void guess_httptype(httrackp * opt, char *s, const char *fil);
|
||||
HTSEXT_API void guess_httptype(httrackp *opt, char *s, const char *fil);
|
||||
|
||||
/* Ugly string tools */
|
||||
/* These take a caller scratch buffer catbuff of capacity size and return it. On
|
||||
@@ -633,11 +657,13 @@ HTSEXT_API void guess_httptype(httrackp * opt, char *s, const char *fil);
|
||||
time), not a pointer. */
|
||||
/** Concatenate @p a and @p b into @p catbuff (NULL or empty operands are
|
||||
* skipped). */
|
||||
HTSEXT_API char *concat(char *catbuff, size_t size, const char *a, const char *b);
|
||||
HTSEXT_API char *concat(char *catbuff, size_t size, const char *a,
|
||||
const char *b);
|
||||
|
||||
/** Like concat(a, b) but convert '/' to the platform path separator (Windows).
|
||||
*/
|
||||
HTSEXT_API char *fconcat(char *catbuff, size_t size, const char *a, const char *b);
|
||||
HTSEXT_API char *fconcat(char *catbuff, size_t size, const char *a,
|
||||
const char *b);
|
||||
|
||||
/** Copy @p a into @p catbuff, converting '/' to the platform path separator
|
||||
(Windows). */
|
||||
@@ -719,7 +745,7 @@ HTSEXT_API FILE *hts_fopen_utf8(const char *path, const char *mode);
|
||||
#define STAT hts_stat_utf8
|
||||
typedef struct _stat STRUCT_STAT;
|
||||
|
||||
HTSEXT_API int hts_stat_utf8(const char *path, STRUCT_STAT * buf);
|
||||
HTSEXT_API int hts_stat_utf8(const char *path, STRUCT_STAT *buf);
|
||||
|
||||
#define UNLINK hts_unlink_utf8
|
||||
HTSEXT_API int hts_unlink_utf8(const char *pathname);
|
||||
@@ -731,12 +757,12 @@ HTSEXT_API int hts_rename_utf8(const char *oldpath, const char *newpath);
|
||||
|
||||
HTSEXT_API int hts_mkdir_utf8(const char *pathname);
|
||||
|
||||
#define UTIME(A,B) hts_utime_utf8(A,B)
|
||||
#define UTIME(A, B) hts_utime_utf8(A, B)
|
||||
|
||||
typedef struct _utimbuf STRUCT_UTIMBUF;
|
||||
|
||||
HTSEXT_API int hts_utime_utf8(const char *filename,
|
||||
const STRUCT_UTIMBUF * times);
|
||||
const STRUCT_UTIMBUF *times);
|
||||
#else
|
||||
#define FOPEN fopen
|
||||
#define STAT stat
|
||||
@@ -748,7 +774,7 @@ typedef struct stat STRUCT_STAT;
|
||||
|
||||
typedef struct utimbuf STRUCT_UTIMBUF;
|
||||
|
||||
#define UTIME(A,B) utime(A,B)
|
||||
#define UTIME(A, B) utime(A, B)
|
||||
#endif
|
||||
#define HTS_DEF_FILEAPI
|
||||
#endif
|
||||
@@ -756,20 +782,21 @@ typedef struct utimbuf STRUCT_UTIMBUF;
|
||||
/** Macro aimed to break at build-time if a size is not a sizeof() strictly
|
||||
* greater than sizeof(char*). **/
|
||||
#undef COMPILE_TIME_CHECK_SIZE
|
||||
#define COMPILE_TIME_CHECK_SIZE(A) (void) ((void (*)(char[A - sizeof(char*) - 1])) NULL)
|
||||
#define COMPILE_TIME_CHECK_SIZE(A) \
|
||||
(void) ((void (*)(char[A - sizeof(char *) - 1])) NULL)
|
||||
|
||||
/** Macro aimed to break at compile-time if a size is not a sizeof() strictly
|
||||
* greater than sizeof(char*). **/
|
||||
#undef RUNTIME_TIME_CHECK_SIZE
|
||||
#define RUNTIME_TIME_CHECK_SIZE(A) assertf((A) != sizeof(void*))
|
||||
#define RUNTIME_TIME_CHECK_SIZE(A) assertf((A) != sizeof(void *))
|
||||
|
||||
#define fconv(A,B,C) (COMPILE_TIME_CHECK_SIZE(B), fconv(A,B,C))
|
||||
#define fconv(A, B, C) (COMPILE_TIME_CHECK_SIZE(B), fconv(A, B, C))
|
||||
|
||||
#define concat(A,B,C,D) (COMPILE_TIME_CHECK_SIZE(B), concat(A,B,C,D))
|
||||
#define concat(A, B, C, D) (COMPILE_TIME_CHECK_SIZE(B), concat(A, B, C, D))
|
||||
|
||||
#define fconcat(A,B,C,D) (COMPILE_TIME_CHECK_SIZE(B), fconcat(A,B,C,D))
|
||||
#define fconcat(A, B, C, D) (COMPILE_TIME_CHECK_SIZE(B), fconcat(A, B, C, D))
|
||||
|
||||
#define fslash(A,B,C) (COMPILE_TIME_CHECK_SIZE(B), fslash(A,B,C))
|
||||
#define fslash(A, B, C) (COMPILE_TIME_CHECK_SIZE(B), fslash(A, B, C))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
149
src/webhttrack
149
src/webhttrack
@@ -4,131 +4,140 @@
|
||||
# Initializes the htsserver GUI frontend and launch the default browser
|
||||
|
||||
BROWSEREXE=
|
||||
SRCHBROWSEREXE="x-www-browser www-browser iceape mozilla firefox-developer-edition firefox icecat iceweasel abrowser firebird galeon konqueror midori opera google-chrome chrome chromium chromium-browser netscape firefox-developer-edition"
|
||||
SRCHBROWSEREXE=(x-www-browser www-browser iceape mozilla firefox-developer-edition firefox icecat iceweasel abrowser firebird galeon konqueror midori opera google-chrome chrome chromium chromium-browser netscape firefox-developer-edition)
|
||||
# shellcheck disable=SC2153 # BROWSER is the standard freedesktop env var, not a typo
|
||||
if test -n "${BROWSER}"; then
|
||||
# sensible-browser will f up if BROWSER is not set
|
||||
SRCHBROWSEREXE="xdg-open sensible-browser ${SRCHBROWSEREXE}"
|
||||
# sensible-browser will f up if BROWSER is not set
|
||||
SRCHBROWSEREXE=(xdg-open sensible-browser "${SRCHBROWSEREXE[@]}")
|
||||
fi
|
||||
# Patch for Darwin/Mac by Ross Williams
|
||||
if test "`uname -s`" == "Darwin"; then
|
||||
# Darwin/Mac OS X uses a system 'open' command to find
|
||||
# the default browser. The -W flag causes it to wait for
|
||||
# the browser to exit
|
||||
BROWSEREXE="/usr/bin/open -W"
|
||||
if test "$(uname -s)" == "Darwin"; then
|
||||
# Darwin/Mac OS X uses a system 'open' command to find
|
||||
# the default browser. The -W flag causes it to wait for
|
||||
# the browser to exit
|
||||
BROWSEREXE="/usr/bin/open -W"
|
||||
fi
|
||||
BINWD=`dirname "$0"`
|
||||
SRCHPATH="$BINWD /usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack /opt/local/bin /sw/bin ${HOME}/usr/bin ${HOME}/bin"
|
||||
SRCHPATH="$SRCHPATH "`echo $PATH | tr ":" " "`
|
||||
SRCHDISTPATH="$BINWD/../share $BINWD/.. /usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share /opt/local/share /sw ${HOME}/usr/local ${HOME}/usr/share"
|
||||
BINWD=$(dirname "$0")
|
||||
SRCHPATH=("$BINWD" /usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack /opt/local/bin /sw/bin "${HOME}/usr/bin" "${HOME}/bin")
|
||||
IFS=':' read -ra pathdirs <<<"$PATH"
|
||||
for d in "${pathdirs[@]}"; do
|
||||
# drop empty PATH fields, matching the old echo|tr word-split
|
||||
test -n "$d" && SRCHPATH+=("$d")
|
||||
done
|
||||
SRCHDISTPATH=("$BINWD/../share" "$BINWD/.." /usr/share /usr/local /usr /local /usr/local/share "${HOME}/usr" "${HOME}/usr/share" /opt/local/share /sw "${HOME}/usr/local" "${HOME}/usr/share")
|
||||
|
||||
###
|
||||
# And now some famous cuisine
|
||||
|
||||
function log {
|
||||
echo "$0($$): $@" >&2
|
||||
return 0
|
||||
echo "$0($$): $*" >&2
|
||||
return 0
|
||||
}
|
||||
|
||||
function launch_browser {
|
||||
log "Launching $1"
|
||||
browser=$1
|
||||
url=$2
|
||||
log "Spawning browser.."
|
||||
${browser} "${url}"
|
||||
# note: browser can hiddenly use the -remote feature of
|
||||
# mozilla and therefore return immediately
|
||||
log "Browser (or helper) exited"
|
||||
log "Launching $1"
|
||||
browser=$1
|
||||
url=$2
|
||||
log "Spawning browser.."
|
||||
${browser} "${url}"
|
||||
# note: browser can hiddenly use the -remote feature of
|
||||
# mozilla and therefore return immediately
|
||||
log "Browser (or helper) exited"
|
||||
}
|
||||
|
||||
# First ensure that we can launch the server
|
||||
BINPATH=
|
||||
for i in ${SRCHPATH}; do
|
||||
! test -n "${BINPATH}" && test -x ${i}/htsserver && BINPATH=${i}
|
||||
for i in "${SRCHPATH[@]}"; do
|
||||
! test -n "${BINPATH}" && test -x "${i}/htsserver" && BINPATH="${i}"
|
||||
done
|
||||
for i in ${SRCHDISTPATH}; do
|
||||
! test -n "${DISTPATH}" && test -f "${i}/httrack/lang.def" && DISTPATH="${i}/httrack"
|
||||
for i in "${SRCHDISTPATH[@]}"; do
|
||||
! test -n "${DISTPATH}" && test -f "${i}/httrack/lang.def" && DISTPATH="${i}/httrack"
|
||||
done
|
||||
test -n "${BINPATH}" || ! log "Could not find htsserver" || exit 1
|
||||
test -n "${DISTPATH}" || ! log "Could not find httrack directory" || exit 1
|
||||
test -f ${DISTPATH}/lang.def || ! log "Could not find ${DISTPATH}/lang.def" || exit 1
|
||||
test -f ${DISTPATH}/lang.indexes || ! log "Could not find ${DISTPATH}/lang.indexes" || exit 1
|
||||
test -d ${DISTPATH}/lang || ! log "Could not find ${DISTPATH}/lang" || exit 1
|
||||
test -d ${DISTPATH}/html || ! log "Could not find ${DISTPATH}/html" || exit 1
|
||||
test -f "${DISTPATH}/lang.def" || ! log "Could not find ${DISTPATH}/lang.def" || exit 1
|
||||
test -f "${DISTPATH}/lang.indexes" || ! log "Could not find ${DISTPATH}/lang.indexes" || exit 1
|
||||
test -d "${DISTPATH}/lang" || ! log "Could not find ${DISTPATH}/lang" || exit 1
|
||||
test -d "${DISTPATH}/html" || ! log "Could not find ${DISTPATH}/html" || exit 1
|
||||
|
||||
# Locale
|
||||
HTSLANG="${LC_MESSAGES}"
|
||||
! test -n "${HTSLANG}" && HTSLANG="${LC_ALL}"
|
||||
! test -n "${HTSLANG}" && HTSLANG="${LANG}"
|
||||
HTSLANG="`echo $LANG | cut -f1 -d'.' | cut -f1 -d'_'`"
|
||||
LANGN=`grep -E "^${HTSLANG}:" ${DISTPATH}/lang.indexes | cut -f2 -d':'`
|
||||
HTSLANG="$(echo "$LANG" | cut -f1 -d'.' | cut -f1 -d'_')"
|
||||
LANGN=$(grep -E "^${HTSLANG}:" "${DISTPATH}/lang.indexes" | cut -f2 -d':')
|
||||
! test -n "${LANGN}" && LANGN=1
|
||||
|
||||
# Find the browser
|
||||
# note: not all systems have sensible-browser or www-browser alternative
|
||||
# thefeore, we have to find a bit more if sensible-browser could not be found
|
||||
|
||||
for i in ${SRCHBROWSEREXE}; do
|
||||
for j in ${SRCHPATH}; do
|
||||
if test -x ${j}/${i}; then
|
||||
BROWSEREXE=${j}/${i}
|
||||
fi
|
||||
test -n "$BROWSEREXE" && break
|
||||
done
|
||||
test -n "$BROWSEREXE" && break
|
||||
for i in "${SRCHBROWSEREXE[@]}"; do
|
||||
for j in "${SRCHPATH[@]}"; do
|
||||
if test -x "${j}/${i}"; then
|
||||
BROWSEREXE="${j}/${i}"
|
||||
fi
|
||||
test -n "$BROWSEREXE" && break
|
||||
done
|
||||
test -n "$BROWSEREXE" && break
|
||||
done
|
||||
test -n "$BROWSEREXE" || ! log "Could not find any suitable browser" || exit 1
|
||||
|
||||
# "browse" command
|
||||
if test "$1" = "browse"; then
|
||||
if test -f "${HOME}/.httrack.ini"; then
|
||||
INDEXF=`cat ${HOME}/.httrack.ini | tr '\r' '\n' | grep -E "^path=" | cut -f2- -d'='`
|
||||
if test -n "${INDEXF}" -a -d "${INDEXF}" -a -f "${INDEXF}/index.html"; then
|
||||
INDEXF="${INDEXF}/index.html"
|
||||
else
|
||||
INDEXF=""
|
||||
fi
|
||||
fi
|
||||
if ! test -n "$INDEXF"; then
|
||||
INDEXF="${HOME}/websites/index.html"
|
||||
fi
|
||||
launch_browser "${BROWSEREXE}" "file://${INDEXF}"
|
||||
exit $?
|
||||
if test -f "${HOME}/.httrack.ini"; then
|
||||
INDEXF=$(tr '\r' '\n' <"${HOME}/.httrack.ini" | grep -E "^path=" | cut -f2- -d'=')
|
||||
if test -n "${INDEXF}" -a -d "${INDEXF}" -a -f "${INDEXF}/index.html"; then
|
||||
INDEXF="${INDEXF}/index.html"
|
||||
else
|
||||
INDEXF=""
|
||||
fi
|
||||
fi
|
||||
if ! test -n "$INDEXF"; then
|
||||
INDEXF="${HOME}/websites/index.html"
|
||||
fi
|
||||
launch_browser "${BROWSEREXE}" "file://${INDEXF}"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Create a temporary filename
|
||||
TMPSRVFILE="$(mktemp ${TMPDIR:-/tmp}/.webhttrack.XXXXXXXX)" || ! log "Could not create the temporary file ${TMPSRVFILE}" || exit 1
|
||||
TMPSRVFILE="$(mktemp "${TMPDIR:-/tmp}/.webhttrack.XXXXXXXX")" || ! log "Could not create the temporary file ${TMPSRVFILE}" || exit 1
|
||||
# Launch htsserver binary and setup the server
|
||||
(${BINPATH}/htsserver "${DISTPATH}/" --ppid "$$" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}&
|
||||
(
|
||||
"${BINPATH}/htsserver" "${DISTPATH}/" --ppid "$$" path "${HOME}/websites" lang "${LANGN}" "$@"
|
||||
echo SRVURL=error
|
||||
) >"${TMPSRVFILE}" &
|
||||
# Find the generated SRVURL
|
||||
SRVURL=
|
||||
MAXCOUNT=60
|
||||
while ! test -n "$SRVURL"; do
|
||||
MAXCOUNT=$[$MAXCOUNT - 1]
|
||||
test $MAXCOUNT -gt 0 || exit 1
|
||||
test $MAXCOUNT -lt 50 && echo "waiting for server to reply.."
|
||||
SRVURL=`grep -E URL= ${TMPSRVFILE} | cut -f2- -d=`
|
||||
test ! "$SRVURL" = "error" || ! log "Could not spawn htsserver" || exit 1
|
||||
test -n "$SRVURL" || sleep 1
|
||||
MAXCOUNT=$((MAXCOUNT - 1))
|
||||
test $MAXCOUNT -gt 0 || exit 1
|
||||
test $MAXCOUNT -lt 50 && echo "waiting for server to reply.."
|
||||
SRVURL=$(grep -E URL= "${TMPSRVFILE}" | cut -f2- -d=)
|
||||
test ! "$SRVURL" = "error" || ! log "Could not spawn htsserver" || exit 1
|
||||
test -n "$SRVURL" || sleep 1
|
||||
done
|
||||
|
||||
# Cleanup function
|
||||
# shellcheck disable=SC2120 # $1 is an optional "signal caught" marker; bare calls are intentional
|
||||
function cleanup {
|
||||
test -n "$1" && log "Nasty signal caught, cleaning up.."
|
||||
# Do not kill if browser exited (chrome bug issue) ; server will die itself
|
||||
test -n "$1" && test -f ${TMPSRVFILE} && SRVPID=`grep -E PID= ${TMPSRVFILE} | cut -f2- -d=`
|
||||
test -n "${SRVPID}" && kill -9 ${SRVPID}
|
||||
test -f ${TMPSRVFILE} && rm ${TMPSRVFILE}
|
||||
test -n "$1" && log "..Done"
|
||||
return 0
|
||||
test -n "$1" && log "Nasty signal caught, cleaning up.."
|
||||
# Do not kill if browser exited (chrome bug issue) ; server will die itself
|
||||
test -n "$1" && test -f "${TMPSRVFILE}" && SRVPID=$(grep -E PID= "${TMPSRVFILE}" | cut -f2- -d=)
|
||||
test -n "${SRVPID}" && kill -9 "${SRVPID}"
|
||||
test -f "${TMPSRVFILE}" && rm "${TMPSRVFILE}"
|
||||
test -n "$1" && log "..Done"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Cleanup in case of emergency
|
||||
trap "cleanup now; exit" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
|
||||
trap "cleanup now; exit" HUP INT QUIT ILL TRAP ABRT BUS FPE SEGV PIPE ALRM TERM STKFLT XCPU XFSZ
|
||||
|
||||
# Got SRVURL, launch browser
|
||||
launch_browser "${BROWSEREXE}" "${SRVURL}"
|
||||
|
||||
# That's all, folks!
|
||||
trap "" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
|
||||
trap "" HUP INT QUIT ILL TRAP ABRT BUS FPE SEGV PIPE ALRM TERM STKFLT XCPU XFSZ
|
||||
cleanup
|
||||
exit 0
|
||||
|
||||
@@ -6,11 +6,11 @@ set -euo pipefail
|
||||
# charset -> UTF-8 conversion (hts_convertStringToUTF8).
|
||||
# -#3 <charset> <string> prints the string re-decoded from <charset> as UTF-8.
|
||||
conv() {
|
||||
test "$(httrack -O /dev/null -#3 "$1" "$2")" == "$3" || exit 1
|
||||
test "$(httrack -O /dev/null -#3 "$1" "$2")" == "$3" || exit 1
|
||||
}
|
||||
# crash probe: malformed input must exit cleanly, not abort.
|
||||
runs() {
|
||||
httrack -O /dev/null -#3 "$1" "$2" >/dev/null 2>&1 || exit 1
|
||||
httrack -O /dev/null -#3 "$1" "$2" >/dev/null 2>&1 || exit 1
|
||||
}
|
||||
|
||||
# the source bytes below are UTF-8 (this file is UTF-8); "café" is 0x63 61 66 C3 A9.
|
||||
|
||||
@@ -6,11 +6,11 @@ set -euo pipefail
|
||||
# HTML entity unescaping (hts_unescapeEntitiesWithCharset).
|
||||
# -#6 <string> prints the string with entities decoded (UTF-8 output).
|
||||
ent() {
|
||||
test "$(httrack -O /dev/null -#6 "$1")" == "$2" || exit 1
|
||||
test "$(httrack -O /dev/null -#6 "$1")" == "$2" || exit 1
|
||||
}
|
||||
# crash probe: malformed input must exit cleanly, not abort.
|
||||
runs() {
|
||||
httrack -O /dev/null -#6 "$1" >/dev/null 2>&1 || exit 1
|
||||
httrack -O /dev/null -#6 "$1" >/dev/null 2>&1 || exit 1
|
||||
}
|
||||
|
||||
# named entities
|
||||
|
||||
@@ -7,10 +7,10 @@ set -euo pipefail
|
||||
# -#0 <filter> <string> prints "<string> does match <filter>" or "... does NOT match ...".
|
||||
|
||||
match() {
|
||||
test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does match $1" || exit 1
|
||||
test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does match $1" || exit 1
|
||||
}
|
||||
nomatch() {
|
||||
test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does NOT match $1" || exit 1
|
||||
test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does NOT match $1" || exit 1
|
||||
}
|
||||
|
||||
# bare star matches everything
|
||||
@@ -67,7 +67,7 @@ nomatch '*[\[]' 'a'
|
||||
# filter guide claims (GitHub #148); it parses as the class {'[','\'} followed
|
||||
# by a trailing literal ']'. These assertions document the current (buggy)
|
||||
# behavior so any future matcher fix is a deliberate, visible change.
|
||||
nomatch '*[\[\]]' '[' # not matched, despite the docs
|
||||
match '*[\[\]]' ']' # only via the empty class-match + trailing ']'
|
||||
match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']'
|
||||
nomatch '*[\[\]]' '[' # not matched, despite the docs
|
||||
match '*[\[\]]' ']' # only via the empty class-match + trailing ']'
|
||||
match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']'
|
||||
nomatch '*[\[\]]' '[]x'
|
||||
|
||||
@@ -7,10 +7,10 @@ set -euo pipefail
|
||||
# -#2 <path> prints "<path> is '<mime>'" then "and its local type is '.<ext>'".
|
||||
|
||||
mime() {
|
||||
test "$(httrack -O /dev/null -#2 "$1" | head -1)" == "$1 is '$2'" || exit 1
|
||||
test "$(httrack -O /dev/null -#2 "$1" | head -1)" == "$1 is '$2'" || exit 1
|
||||
}
|
||||
unknown() {
|
||||
test "$(httrack -O /dev/null -#2 "$1" | head -1)" == "$1 is of an unknown MIME type" || exit 1
|
||||
test "$(httrack -O /dev/null -#2 "$1" | head -1)" == "$1 is of an unknown MIME type" || exit 1
|
||||
}
|
||||
|
||||
mime '/a/b.html' 'text/html'
|
||||
|
||||
@@ -220,4 +220,107 @@ crawl "$site5/main.css" "$out5"
|
||||
found "good.css" "$out5"
|
||||
notfound "trunc" "$out5"
|
||||
|
||||
# Offset-0 underflow (#396): a token at the buffer start makes the detector's
|
||||
# word-boundary guard read *(html-1) one byte early (aborts under ASan). The
|
||||
# url() target is still captured; here it just must not underflow.
|
||||
site6="$tmp/parse-off0"
|
||||
mkdir -p "$site6"
|
||||
printf 'body{}\n' >"$site6/off0.css"
|
||||
printf 'url(off0.css)\n' >"$site6/main.css"
|
||||
out6="$tmp/parse-off0-out"
|
||||
crawl "$site6/main.css" "$out6"
|
||||
found "off0.css" "$out6"
|
||||
|
||||
# XMLHttpRequest.open(method, url) (#218): the first argument is an HTTP method,
|
||||
# not a URL. Without the fix "GET" is captured as a link and fetched (the offline
|
||||
# fixture saves a bare file named GET; a live server mangles it to GET.html).
|
||||
# window.open(url) detection must be unaffected.
|
||||
site7="$tmp/xhropen"
|
||||
mkdir -p "$site7"
|
||||
gif "$site7/winopen.gif"
|
||||
cat >"$site7/index.html" <<EOF
|
||||
<html><body><script>
|
||||
var x = new XMLHttpRequest();
|
||||
x.open("GET", "ajax_info.txt");
|
||||
var y = new XMLHttpRequest();
|
||||
y.open("Post", "submit.cgi");
|
||||
window.open("file://$site7/winopen.gif");
|
||||
</script></body></html>
|
||||
EOF
|
||||
out7="$tmp/xhropen-out"
|
||||
crawl "$site7/index.html" "$out7"
|
||||
# negative control: without the fix a file named exactly GET is downloaded
|
||||
notfound "GET" "$out7"
|
||||
# methods are matched case-insensitively (XHR spec normalizes them): a mixed-case
|
||||
# method is rejected too, so a file named Post must not appear either
|
||||
notfound "Post" "$out7"
|
||||
# regression guard: window.open(url) is still detected, so its absolute URL is
|
||||
# rewritten to a local link. The rewrite only happens if the parser saw it, so
|
||||
# these two assertions fail if .open detection broke (not a trivial --near save).
|
||||
saved7=$(savedhtml "$out7")
|
||||
test -n "$saved7" || ! echo "FAIL: saved xhr page not found" || exit 1
|
||||
grep -Fq 'window.open("winopen.gif")' "$saved7" ||
|
||||
! echo "FAIL #218: window.open(url) no longer detected/rewritten" || exit 1
|
||||
! grep -Fq 'window.open("file://' "$saved7" ||
|
||||
! echo "FAIL #218: window.open URL left absolute (not rewritten)" || exit 1
|
||||
|
||||
# Parens in an unquoted url(...) (#163): the source %28/%29 decode to literal
|
||||
# '(' ')' in the saved name, but a literal ')' in the rewritten url() closes the
|
||||
# token early, so they must stay encoded. Negative control: without the fix the
|
||||
# %281%29 greps fail (parens are RFC2396 "mark" chars the escaper leaves alone).
|
||||
site8="$tmp/cssparens"
|
||||
mkdir -p "$site8"
|
||||
for f in 'img (1).gif' 'a(b)c(1).gif' 'q (4).gif'; do gif "$site8/$f"; done
|
||||
cat >"$site8/style.css" <<'EOF'
|
||||
.a { background: url(img%20%281%29.gif); }
|
||||
.b { background: url(a%28b%29c%281%29.gif); }
|
||||
.c { background: url("q%20%284%29.gif"); }
|
||||
EOF
|
||||
out8="$tmp/cssparens-out"
|
||||
crawl "$site8/style.css" "$out8"
|
||||
found "img (1).gif" "$out8"
|
||||
found "a(b)c(1).gif" "$out8"
|
||||
found "q (4).gif" "$out8"
|
||||
css8=$(find "$out8" -type f -path '*/file/*' -name style.css -print -quit)
|
||||
test -n "$css8" || ! echo "FAIL: saved style.css not found" || exit 1
|
||||
grep -Fq 'url(img%20%281%29.gif)' "$css8" ||
|
||||
! echo "FAIL #163: parens in unquoted url() not percent-encoded on rewrite" || exit 1
|
||||
grep -Fq 'url(a%28b%29c%281%29.gif)' "$css8" ||
|
||||
! echo "FAIL #163: not every paren in a url() was percent-encoded" || exit 1
|
||||
grep -Fq 'url("q%20%284%29.gif")' "$css8" ||
|
||||
! echo "FAIL #163: quoted url() altered or parens left literal on rewrite" || exit 1
|
||||
|
||||
# The url() detector is not CSS-specific: <script> and inline style= get the
|
||||
# same encoding, but ordinary href/src (ending_p is the quote, not ')') keep
|
||||
# literal parens -- the attribute checks guard the gate against over-firing.
|
||||
site9="$tmp/urlparens"
|
||||
mkdir -p "$site9"
|
||||
for f in 'js (1).gif' 'inl (2).gif' 'asrc (3).gif' 'ahref (4).gif'; do gif "$site9/$f"; done
|
||||
cat >"$site9/index.html" <<EOF
|
||||
<html><body>
|
||||
<script>var bg = "url(js%20%281%29.gif)";</script>
|
||||
<div style="background-image:url(inl%20%282%29.gif)"></div>
|
||||
<img src="asrc%20%283%29.gif">
|
||||
<a href="ahref%20%284%29.gif">link</a>
|
||||
</body></html>
|
||||
EOF
|
||||
out9="$tmp/urlparens-out"
|
||||
crawl "$site9/index.html" "$out9"
|
||||
saved9=$(savedhtml "$out9")
|
||||
test -n "$saved9" || ! echo "FAIL: saved urlparens page not found" || exit 1
|
||||
# rewrite-only: the JS-string asset is not queued for download
|
||||
grep -Fq 'url(js%20%281%29.gif)' "$saved9" ||
|
||||
! echo "FAIL #163: parens in <script> url() not percent-encoded" || exit 1
|
||||
found "inl (2).gif" "$out9"
|
||||
grep -Fq 'url(inl%20%282%29.gif)' "$saved9" ||
|
||||
! echo "FAIL #163: parens in inline style url() not percent-encoded" || exit 1
|
||||
found "asrc (3).gif" "$out9"
|
||||
found "ahref (4).gif" "$out9"
|
||||
grep -Fq 'src="asrc%20(3).gif"' "$saved9" ||
|
||||
! echo "FAIL #163: parens in a plain src attribute were wrongly encoded" || exit 1
|
||||
grep -Fq 'href="ahref%20(4).gif"' "$saved9" ||
|
||||
! echo "FAIL #163: parens in a plain href attribute were wrongly encoded" || exit 1
|
||||
! grep -Eq '(src|href)="[^"]*%28' "$saved9" ||
|
||||
! echo "FAIL #163: gate over-fired onto a non-url() attribute link" || exit 1
|
||||
|
||||
exit 0
|
||||
|
||||
68
tests/01_engine-relative.test
Executable file
68
tests/01_engine-relative.test
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# lienrelatif (build relative path) + ident_url_relatif (resolve a link, collapse
|
||||
# ./ and ../). Regression net for #137/#162; expected values hand-computed.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# relative path from <curr>'s directory to <link>
|
||||
rel() {
|
||||
local got
|
||||
got=$(httrack -O /dev/null -#l "$1" "$2")
|
||||
test "$got" == "relative=$3" ||
|
||||
{
|
||||
echo "FAIL rel($1, $2): got '$got' want 'relative=$3'"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
# resolve <link> against origin <adr>/<fil> -> adr=.. fil=..
|
||||
ident() {
|
||||
local got
|
||||
got=$(httrack -O /dev/null -#i "$1" "$2" "$3")
|
||||
test "$got" == "$4" ||
|
||||
{
|
||||
echo "FAIL ident($1, $2, $3): got '$got' want '$4'"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
### lienrelatif
|
||||
|
||||
rel 'dir/page.html' 'dir/index.html' 'page.html'
|
||||
rel 'dir/page.html' 'dir/page.html' 'page.html' # self-link
|
||||
rel 'a.html' 'dir/index.html' '../a.html'
|
||||
rel 'x.html' 'a/b/c/index.html' '../../../x.html'
|
||||
rel 'h/a/x.jpg' 'h/a/sub/page.html' '../x.jpg'
|
||||
rel 'a/b/c/x.html' 'index.html' 'a/b/c/x.html'
|
||||
rel 'h/sub/x.jpg' 'h/page.html' 'sub/x.jpg'
|
||||
rel 'h/dir2/x.jpg' 'h/dir1/page.html' '../dir2/x.jpg' # sibling dir
|
||||
rel 'h/bc/x.jpg' 'h/b/page.html' '../bc/x.jpg' # b/bc prefix trap
|
||||
rel 'h/b/x.jpg' 'h/bc/page.html' '../b/x.jpg'
|
||||
rel 'h2/img/x.jpg' 'h1/p/page.html' '../../h2/img/x.jpg' # cross-host
|
||||
rel 'img.cdn/photo.jpg' 'www.site/articles/2020/post.html' '../../../img.cdn/photo.jpg'
|
||||
rel 'h/a/' 'h/a/sub/page.html' '../' # link is ancestor dir
|
||||
rel 'x.html' 'page.html' 'x.html'
|
||||
rel 'dir/page.html?x=1' 'dir/index.html?y=2' 'page.html' # ? stripped
|
||||
|
||||
### ident_url_relatif
|
||||
|
||||
ident 'img.gif' 'www.foo.com' '/dir/page.html' 'adr=www.foo.com fil=/dir/img.gif'
|
||||
ident 'sub/img.gif' 'www.foo.com' '/dir/page.html' 'adr=www.foo.com fil=/dir/sub/img.gif'
|
||||
ident '/img.gif' 'www.foo.com' '/dir/page.html' 'adr=www.foo.com fil=/img.gif'
|
||||
# embedded ../ collapses (#137)
|
||||
ident '../img.gif' 'www.foo.com' '/dir/sub/page.html' 'adr=www.foo.com fil=/dir/img.gif'
|
||||
ident 'sub/../logo.png' 'www.foo.com' '/articles/2020/post.html' 'adr=www.foo.com fil=/articles/2020/logo.png'
|
||||
ident '../../pix/sub/../logo.png' 'www.foo.com' '/articles/2020/post.html' 'adr=www.foo.com fil=/pix/logo.png'
|
||||
ident '../../../../x.gif' 'www.foo.com' '/a/b/page.html' 'adr=www.foo.com fil=/x.gif' # above-root clamp
|
||||
ident '?page=2' 'www.foo.com' '/dir/index.html?old=1' 'adr=www.foo.com fil=/dir/index.html?page=2'
|
||||
ident 'http://other.com/a/b/../c/index.html' 'www.foo.com' '/p.html' 'adr=other.com fil=/a/c/index.html'
|
||||
# file:// collapses ../ like the other schemes; traversal contained, // authority kept
|
||||
ident 'file:///var/data/pix/sub/../logo.png' 'www.foo.com' '/p.html' 'adr=file:// fil=/var/data/pix/logo.png'
|
||||
ident 'file:///a/b/c/../../d/e.gif' 'www.foo.com' '/p.html' 'adr=file:// fil=/a/d/e.gif'
|
||||
ident 'file:///a/../../b' 'www.foo.com' '/p.html' 'adr=file:// fil=/b'
|
||||
ident 'file://srv/share/../x' 'www.foo.com' '/p.html' 'adr=file:// fil=//srv/x'
|
||||
ident 'mailto:foo@bar.com' 'www.foo.com' '/p.html' 'error=-1' # unsupported scheme
|
||||
ident 'javascript:void(0)' 'www.foo.com' '/p.html' 'error=-1'
|
||||
|
||||
echo "OK"
|
||||
@@ -5,7 +5,7 @@ set -euo pipefail
|
||||
|
||||
# path simplify engine (fil_simplifie): collapses ./ and ../ segments.
|
||||
simp() {
|
||||
test "$(httrack -O /dev/null -#1 "$1")" == "simplified=$2" || exit 1
|
||||
test "$(httrack -O /dev/null -#1 "$1")" == "simplified=$2" || exit 1
|
||||
}
|
||||
|
||||
simp './foo/bar/' 'foo/bar/'
|
||||
@@ -26,3 +26,17 @@ simp './a/../../b' 'b'
|
||||
|
||||
# empty segments ('//') are not dot-segments and are preserved, per RFC 3986
|
||||
simp 'a//b' 'a//b'
|
||||
simp 'a//b/../c' 'a//c'
|
||||
|
||||
# absolute paths keep the leading '/'; above-root '..' is clamped to it
|
||||
simp '/a/../b' '/b'
|
||||
simp '/a/../../b' '/b'
|
||||
simp '/../x' '/x'
|
||||
|
||||
# collapses to nothing -> './' (relative) or '/' (absolute)
|
||||
simp '..' './'
|
||||
simp 'a/..' './'
|
||||
simp '/' '/'
|
||||
|
||||
simp 'a/b/..' 'a/' # trailing bare '..'
|
||||
simp 'a/../b?x=../y' 'b?x=../y' # '?' freezes simplification
|
||||
|
||||
@@ -21,9 +21,15 @@ test "$out" == "strsafe: OK" || exit 1
|
||||
# the bounded macro aborts (non-zero exit), so don't let set -e trip on it
|
||||
err=$(httrack -#8 overflow "this string is far too long for the buffer" 2>&1) || true
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"overflow while copying"*) ;;
|
||||
*) echo "expected htssafe overflow abort, got: $err" >&2; exit 1 ;;
|
||||
*"strsafe: NOT aborted"*)
|
||||
echo "over-capacity write was NOT caught" >&2
|
||||
exit 1
|
||||
;;
|
||||
*"overflow while copying"*) ;;
|
||||
*)
|
||||
echo "expected htssafe overflow abort, got: $err" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Same guarantee for the htsbuff builder. The source is exactly the buffer
|
||||
@@ -32,7 +38,13 @@ esac
|
||||
# aborted"). Match the specific htsbuff abort message, not just any assert.
|
||||
err=$(httrack -#8 overflow-buff "abcd" 2>&1) || true
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "htsbuff over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"htsbuff append overflow"*) ;;
|
||||
*) echo "expected htsbuff overflow abort, got: $err" >&2; exit 1 ;;
|
||||
*"strsafe: NOT aborted"*)
|
||||
echo "htsbuff over-capacity write was NOT caught" >&2
|
||||
exit 1
|
||||
;;
|
||||
*"htsbuff append overflow"*) ;;
|
||||
*)
|
||||
echo "expected htsbuff overflow abort, got: $err" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
bash crawl-test.sh --errors 0 --files 5 httrack http://ut.httrack.com/simple/basic.html
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
bash crawl-test.sh --errors 0 --files 3 \
|
||||
--found ut.httrack.com/cookies/third.html \
|
||||
--found ut.httrack.com/cookies/second.html \
|
||||
--found ut.httrack.com/cookies/entrance.html \
|
||||
httrack http://ut.httrack.com/cookies/entrance.php
|
||||
--found ut.httrack.com/cookies/third.html \
|
||||
--found ut.httrack.com/cookies/second.html \
|
||||
--found ut.httrack.com/cookies/entrance.html \
|
||||
httrack http://ut.httrack.com/cookies/entrance.php
|
||||
|
||||
@@ -3,21 +3,21 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# unicode tests
|
||||
bash crawl-test.sh \
|
||||
--errors 1 --files 5 \
|
||||
--found 'café.ut.httrack.com/unicode-links/café3860.html' \
|
||||
--found 'café.ut.httrack.com/unicode-links/café30f4.html' \
|
||||
--found 'café.ut.httrack.com/unicode-links/café5e1f.html' \
|
||||
--found 'café.ut.httrack.com/unicode-links/café7b30.html' \
|
||||
httrack 'http://ut.httrack.com/unicode-links/idna.html' \
|
||||
'+*.ut.httrack.com/*' --robots=0
|
||||
--errors 1 --files 5 \
|
||||
--found 'café.ut.httrack.com/unicode-links/café3860.html' \
|
||||
--found 'café.ut.httrack.com/unicode-links/café30f4.html' \
|
||||
--found 'café.ut.httrack.com/unicode-links/café5e1f.html' \
|
||||
--found 'café.ut.httrack.com/unicode-links/café7b30.html' \
|
||||
httrack 'http://ut.httrack.com/unicode-links/idna.html' \
|
||||
'+*.ut.httrack.com/*' --robots=0
|
||||
|
||||
# unicode tests (bogus links)
|
||||
bash crawl-test.sh \
|
||||
--errors 0 --files 1 \
|
||||
--found 'ut.httrack.com/unicode-links/idna_bogus.html' \
|
||||
httrack 'http://ut.httrack.com/unicode-links/idna_bogus.html' \
|
||||
'-*' --robots=0
|
||||
--errors 0 --files 1 \
|
||||
--found 'ut.httrack.com/unicode-links/idna_bogus.html' \
|
||||
httrack 'http://ut.httrack.com/unicode-links/idna_bogus.html' \
|
||||
'-*' --robots=0
|
||||
|
||||
@@ -3,67 +3,67 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# unicode tests
|
||||
bash crawl-test.sh \
|
||||
--errors 1 --files 10 \
|
||||
--found ut.httrack.com/unicode-links/caf%a91bce.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/café463e.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/café9fa8.html \
|
||||
--found ut.httrack.com/unicode-links/caféae52.html \
|
||||
--found ut.httrack.com/unicode-links/caféc009.html \
|
||||
--found ut.httrack.com/unicode-links/utf8.html \
|
||||
httrack http://ut.httrack.com/unicode-links/utf8.html
|
||||
--errors 1 --files 10 \
|
||||
--found ut.httrack.com/unicode-links/caf%a91bce.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/café463e.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/café9fa8.html \
|
||||
--found ut.httrack.com/unicode-links/caféae52.html \
|
||||
--found ut.httrack.com/unicode-links/caféc009.html \
|
||||
--found ut.httrack.com/unicode-links/utf8.html \
|
||||
httrack http://ut.httrack.com/unicode-links/utf8.html
|
||||
|
||||
bash crawl-test.sh \
|
||||
--errors 4 --files 7 \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/café9fa8.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/caf%e939bd.html \
|
||||
--found ut.httrack.com/unicode-links/caf%e9ae52.html \
|
||||
--found ut.httrack.com/unicode-links/caféaec2.html \
|
||||
--found ut.httrack.com/unicode-links/caféfad6.html \
|
||||
--found ut.httrack.com/unicode-links/default.html \
|
||||
httrack http://ut.httrack.com/unicode-links/default.html
|
||||
--errors 4 --files 7 \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/café9fa8.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/caf%e939bd.html \
|
||||
--found ut.httrack.com/unicode-links/caf%e9ae52.html \
|
||||
--found ut.httrack.com/unicode-links/caféaec2.html \
|
||||
--found ut.httrack.com/unicode-links/caféfad6.html \
|
||||
--found ut.httrack.com/unicode-links/default.html \
|
||||
httrack http://ut.httrack.com/unicode-links/default.html
|
||||
|
||||
bash crawl-test.sh \
|
||||
--errors 2 --files 9 \
|
||||
--found ut.httrack.com/unicode-links/caf%a9ae52.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a9bf59.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café647f.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/caféaec2.html \
|
||||
--found ut.httrack.com/unicode-links/caféfad6.html \
|
||||
--found ut.httrack.com/unicode-links/iso88591.html \
|
||||
httrack http://ut.httrack.com/unicode-links/iso88591.html
|
||||
--errors 2 --files 9 \
|
||||
--found ut.httrack.com/unicode-links/caf%a9ae52.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a9bf59.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café647f.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/caféaec2.html \
|
||||
--found ut.httrack.com/unicode-links/caféfad6.html \
|
||||
--found ut.httrack.com/unicode-links/iso88591.html \
|
||||
httrack http://ut.httrack.com/unicode-links/iso88591.html
|
||||
|
||||
bash crawl-test.sh \
|
||||
--errors 4 --files 9 \
|
||||
--found ut.httrack.com/unicode-links/caf%a8%a6c72a.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a9bf59.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/cafébf43.html \
|
||||
--found ut.httrack.com/unicode-links/cafédcd8.html \
|
||||
--found ut.httrack.com/unicode-links/café2461.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a8%a61bce.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a9ae52.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/gb18030.html \
|
||||
httrack http://ut.httrack.com/unicode-links/gb18030.html
|
||||
--errors 4 --files 9 \
|
||||
--found ut.httrack.com/unicode-links/caf%a8%a6c72a.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a9bf59.html \
|
||||
--found ut.httrack.com/unicode-links/café8007.html \
|
||||
--found ut.httrack.com/unicode-links/cafébf43.html \
|
||||
--found ut.httrack.com/unicode-links/cafédcd8.html \
|
||||
--found ut.httrack.com/unicode-links/café2461.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a8%a61bce.html \
|
||||
--found ut.httrack.com/unicode-links/caf%a9ae52.html \
|
||||
--found ut.httrack.com/unicode-links/café7b30.html \
|
||||
--found ut.httrack.com/unicode-links/café30f4.html \
|
||||
--found ut.httrack.com/unicode-links/café5e1f.html \
|
||||
--found ut.httrack.com/unicode-links/café3860.html \
|
||||
--found ut.httrack.com/unicode-links/gb18030.html \
|
||||
httrack http://ut.httrack.com/unicode-links/gb18030.html
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# http://code.google.com/p/httrack/issues/detail?id=42&can=1
|
||||
# we expect 2 errors only because other links are too longs (to be modified if suitable)
|
||||
bash crawl-test.sh --errors 2 --files 1 \
|
||||
--found ut.httrack.com/overflow/longquerywithaccents.html \
|
||||
httrack http://ut.httrack.com/overflow/longquerywithaccents.php
|
||||
--found ut.httrack.com/overflow/longquerywithaccents.html \
|
||||
httrack http://ut.httrack.com/overflow/longquerywithaccents.php
|
||||
|
||||
@@ -3,45 +3,45 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# http://code.google.com/p/httrack/issues/detail?id=4&can=1
|
||||
bash crawl-test.sh --errors 0 --files 4 \
|
||||
--found ut.httrack.com/parsing/back5e1f.gif \
|
||||
--found ut.httrack.com/parsing/events.html \
|
||||
--found ut.httrack.com/parsing/fade230f4.gif \
|
||||
--found ut.httrack.com/parsing/fade3860.gif \
|
||||
httrack http://ut.httrack.com/parsing/events.html
|
||||
--found ut.httrack.com/parsing/back5e1f.gif \
|
||||
--found ut.httrack.com/parsing/events.html \
|
||||
--found ut.httrack.com/parsing/fade230f4.gif \
|
||||
--found ut.httrack.com/parsing/fade3860.gif \
|
||||
httrack http://ut.httrack.com/parsing/events.html
|
||||
|
||||
# http://code.google.com/p/httrack/issues/detail?id=2&can=1
|
||||
bash crawl-test.sh --errors 0 --files 3 \
|
||||
--found ut.httrack.com/parsing/background-image.css \
|
||||
--found ut.httrack.com/parsing/background-image.html \
|
||||
--found ut.httrack.com/parsing/fade.gif \
|
||||
httrack http://ut.httrack.com/parsing/background-image.html
|
||||
--found ut.httrack.com/parsing/background-image.css \
|
||||
--found ut.httrack.com/parsing/background-image.html \
|
||||
--found ut.httrack.com/parsing/fade.gif \
|
||||
httrack http://ut.httrack.com/parsing/background-image.html
|
||||
|
||||
# javascript parsing
|
||||
bash crawl-test.sh --errors 0 --files 3 \
|
||||
--found ut.httrack.com/parsing/back.gif \
|
||||
--found ut.httrack.com/parsing/fade.gif \
|
||||
--found ut.httrack.com/parsing/javascript.html \
|
||||
httrack http://ut.httrack.com/parsing/javascript.html
|
||||
--found ut.httrack.com/parsing/back.gif \
|
||||
--found ut.httrack.com/parsing/fade.gif \
|
||||
--found ut.httrack.com/parsing/javascript.html \
|
||||
httrack http://ut.httrack.com/parsing/javascript.html
|
||||
|
||||
# handling of + before query string
|
||||
bash crawl-test.sh --errors 0 --files 6 \
|
||||
--found ut.httrack.com/parsing/escaping.html \
|
||||
--found "ut.httrack.com/parsing/foo bar30f4.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar5e1f.html" \
|
||||
--found "ut.httrack.com/parsing/foo+bar+plus3860.html" \
|
||||
--found "ut.httrack.com/parsing/foo barae52.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar7b30.html" \
|
||||
httrack http://ut.httrack.com/parsing/escaping.html
|
||||
--found ut.httrack.com/parsing/escaping.html \
|
||||
--found "ut.httrack.com/parsing/foo bar30f4.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar5e1f.html" \
|
||||
--found "ut.httrack.com/parsing/foo+bar+plus3860.html" \
|
||||
--found "ut.httrack.com/parsing/foo barae52.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar7b30.html" \
|
||||
httrack http://ut.httrack.com/parsing/escaping.html
|
||||
|
||||
# handling of # encoded in filename
|
||||
# see http://code.google.com/p/httrack/issues/detail?id=25
|
||||
bash crawl-test.sh --errors 2 --files 4 \
|
||||
--found "ut.httrack.com/parsing/escaping2.html" \
|
||||
--found "ut.httrack.com/parsing/++foo++bar++plus++.html" \
|
||||
--found "ut.httrack.com/parsing/foo#bar#.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar.html" \
|
||||
httrack http://ut.httrack.com/parsing/escaping2.html
|
||||
--found "ut.httrack.com/parsing/escaping2.html" \
|
||||
--found "ut.httrack.com/parsing/++foo++bar++plus++.html" \
|
||||
--found "ut.httrack.com/parsing/foo#bar#.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar.html" \
|
||||
httrack http://ut.httrack.com/parsing/escaping2.html
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
if test "${HTTPS_SUPPORT:-}" == "no"; then
|
||||
echo "no https support compiled, skipping"
|
||||
exit 77
|
||||
echo "no https support compiled, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
bash crawl-test.sh --errors 0 --files 5 httrack https://ut.httrack.com/simple/basic.html
|
||||
|
||||
@@ -35,6 +35,7 @@ TESTS = \
|
||||
01_engine-mime.test \
|
||||
01_engine-parse.test \
|
||||
01_engine-rcfile.test \
|
||||
01_engine-relative.test \
|
||||
01_engine-simplify.test \
|
||||
01_engine-strsafe.test \
|
||||
02_manpage-regen.test \
|
||||
|
||||
@@ -6,39 +6,39 @@
|
||||
|
||||
# do not enable online tests (./configure --disable-online-unit-tests)
|
||||
if test "$ONLINE_UNIT_TESTS" == "no"; then
|
||||
echo "online tests are disabled" >&2
|
||||
exit 1
|
||||
echo "online tests are disabled" >&2
|
||||
exit 1
|
||||
|
||||
# enable online tests (--enable-online-unit-tests)
|
||||
elif test "$ONLINE_UNIT_TESTS" == "yes"; then
|
||||
exit 0
|
||||
exit 0
|
||||
|
||||
# check if online tests are reachable
|
||||
else
|
||||
|
||||
# test url
|
||||
url=http://ut.httrack.com/enabled
|
||||
# test url
|
||||
url=http://ut.httrack.com/enabled
|
||||
|
||||
# cache file name
|
||||
cache=check-network_sh.cache
|
||||
# cache file name
|
||||
cache=check-network_sh.cache
|
||||
|
||||
# cached result ?
|
||||
if test -f $cache ; then
|
||||
if grep -q "ok" $cache ; then
|
||||
exit 0
|
||||
else
|
||||
echo "online tests are disabled (cached)" >&2
|
||||
exit 1
|
||||
fi
|
||||
# cached result ?
|
||||
if test -f $cache; then
|
||||
if grep -q "ok" $cache; then
|
||||
exit 0
|
||||
else
|
||||
echo "online tests are disabled (cached)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# fetch single file
|
||||
elif bash crawl-test.sh --errors 0 --files 1 httrack --timeout=3 --max-time=3 "$url" 2>/dev/null >/dev/null ; then
|
||||
echo "ok" > $cache
|
||||
exit 0
|
||||
else
|
||||
echo "error" > $cache
|
||||
echo "online tests are disabled (auto)" >&2
|
||||
exit 1
|
||||
fi
|
||||
# fetch single file
|
||||
elif bash crawl-test.sh --errors 0 --files 1 httrack --timeout=3 --max-time=3 "$url" 2>/dev/null >/dev/null; then
|
||||
echo "ok" >$cache
|
||||
exit 0
|
||||
else
|
||||
echo "error" >$cache
|
||||
echo "online tests are disabled (auto)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
@@ -2,185 +2,184 @@
|
||||
#
|
||||
|
||||
function warning {
|
||||
echo "** $*" >&2
|
||||
return 0
|
||||
echo "** $*" >&2
|
||||
return 0
|
||||
}
|
||||
|
||||
function die {
|
||||
warning "$*"
|
||||
exit 1
|
||||
warning "$*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
function debug {
|
||||
if test -n "$verbose"; then
|
||||
echo "$*" >&2
|
||||
fi
|
||||
if test -n "$verbose"; then
|
||||
echo "$*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
function info {
|
||||
printf "[$*] ..\t" >&2
|
||||
printf '[%s] ..\t' "$*" >&2
|
||||
}
|
||||
|
||||
function result {
|
||||
echo "$*" >&2
|
||||
echo "$*" >&2
|
||||
}
|
||||
|
||||
function cleanup {
|
||||
debug "cleaning function called"
|
||||
if test -n "$tmpdir"; then
|
||||
if test -d "$tmpdir"; then
|
||||
if test -z "$nopurge"; then
|
||||
debug "cleaning up $tmpdir"
|
||||
rm -rf "$tmpdir"
|
||||
fi
|
||||
debug "cleaning function called"
|
||||
if test -n "$tmpdir"; then
|
||||
if test -d "$tmpdir"; then
|
||||
if test -z "$nopurge"; then
|
||||
debug "cleaning up $tmpdir"
|
||||
rm -rf "$tmpdir"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if test -n "$crawlpid"; then
|
||||
debug "killing $crawlpid"
|
||||
kill -9 "$crawlpid"
|
||||
crawlpid=
|
||||
fi
|
||||
fi
|
||||
if test -n "$crawlpid"; then
|
||||
debug "killing $crawlpid"
|
||||
kill -9 "$crawlpid"
|
||||
crawlpid=
|
||||
fi
|
||||
}
|
||||
|
||||
function usage {
|
||||
cat << EOF
|
||||
cat <<EOF
|
||||
usage: $0
|
||||
EOF
|
||||
}
|
||||
|
||||
function assert_equals {
|
||||
info "$1"
|
||||
if test ! "$2" == "$3"; then
|
||||
result "expected '$2', got '$3'"
|
||||
exit 1
|
||||
else
|
||||
result "OK ($2)"
|
||||
fi
|
||||
info "$1"
|
||||
if test ! "$2" == "$3"; then
|
||||
result "expected '$2', got '$3'"
|
||||
exit 1
|
||||
else
|
||||
result "OK ($2)"
|
||||
fi
|
||||
}
|
||||
|
||||
function start-crawl {
|
||||
# parse args
|
||||
pos=1
|
||||
while test "$#" -ge "$pos" ; do
|
||||
case "${!pos}" in
|
||||
--debug)
|
||||
verbose=1
|
||||
;;
|
||||
--no-purge|--summary|--print-files)
|
||||
;;
|
||||
--errors|--files|--found|--not-found|--directory)
|
||||
pos=$[${pos}+1]
|
||||
test "$#" -ge "$pos" || warning "missing argument" || return 1
|
||||
;;
|
||||
httrack)
|
||||
pos=$[${pos}+1]
|
||||
break;
|
||||
;;
|
||||
*)
|
||||
warning "unrecognized option ${!pos}"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
pos=$[${pos}+1]
|
||||
done
|
||||
debug "remaining args: ${@:${pos}}"
|
||||
# parse args
|
||||
pos=1
|
||||
while test "$#" -ge "$pos"; do
|
||||
case "${!pos}" in
|
||||
--debug)
|
||||
verbose=1
|
||||
;;
|
||||
--no-purge | --summary | --print-files) ;;
|
||||
--errors | --files | --found | --not-found | --directory)
|
||||
pos=$((pos + 1))
|
||||
test "$#" -ge "$pos" || warning "missing argument" || return 1
|
||||
;;
|
||||
httrack)
|
||||
pos=$((pos + 1))
|
||||
break
|
||||
;;
|
||||
*)
|
||||
warning "unrecognized option ${!pos}"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
pos=$((pos + 1))
|
||||
done
|
||||
debug "remaining args: ${*:pos}"
|
||||
|
||||
# ut/ won't exceed 2 minutes
|
||||
moreargs="--quiet --max-time=120 --timeout=30 --connection-per-second=5"
|
||||
# ut/ won't exceed 2 minutes
|
||||
moreargs=(--quiet --max-time=120 --timeout=30 --connection-per-second=5)
|
||||
|
||||
# proxy environment ?
|
||||
if test -n "$http_proxy"; then
|
||||
moreargs="$moreargs --proxy $http_proxy"
|
||||
fi
|
||||
# proxy environment ?
|
||||
if test -n "${http_proxy:-}"; then
|
||||
moreargs+=(--proxy "$http_proxy")
|
||||
fi
|
||||
|
||||
test -n "$tmpdir" || ! warning "no tmpdir" || return 1
|
||||
tmp="${tmpdir}/crawl"
|
||||
rm -rf "$tmp"
|
||||
mkdir "$tmp" || ! warning "could not create $tmp" || return 1
|
||||
|
||||
which httrack >/dev/null || ! warning "could not find httrack" || return 1
|
||||
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
|
||||
test -n "$ver" || ! warning "could not run httrack" || return 1
|
||||
|
||||
# start crawl
|
||||
log="${tmp}/log"
|
||||
debug starting httrack -O "${tmp}" ${moreargs} ${@:${pos}}
|
||||
info "running httrack ${@:${pos}}"
|
||||
httrack -O "${tmp}" --user-agent="httrack $ver ut ($(uname -omrs))" ${moreargs} ${@:${pos}} >"${log}" 2>&1 &
|
||||
crawlpid="$!"
|
||||
debug "started cralwer on pid $crawlpid"
|
||||
wait "$crawlpid"
|
||||
result="$?"
|
||||
crawlpid=
|
||||
test "$result" -eq 0 || ! result "error code $result" || return 1
|
||||
result "OK"
|
||||
grep -iE "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt" >&2
|
||||
|
||||
# now audit
|
||||
while test "$#" -gt 0; do
|
||||
case "$1" in
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
;;
|
||||
--summary)
|
||||
grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt"
|
||||
;;
|
||||
--print-files)
|
||||
find "${tmp}" -mindepth 1 -type f
|
||||
;;
|
||||
--errors)
|
||||
shift
|
||||
assert_equals "checking errors" "$1" "$(grep -iEc "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt")"
|
||||
;;
|
||||
--found)
|
||||
shift
|
||||
info "checking for $1"
|
||||
if test -f "${tmp}/$1" ; then
|
||||
result "OK"
|
||||
else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--not-found)
|
||||
shift
|
||||
info "checking for $1"
|
||||
if test -f "${tmp}/$1" ; then
|
||||
result "OK"
|
||||
else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--directory)
|
||||
shift
|
||||
info "checking for $1"
|
||||
if test -d "${tmp}/$1" ; then
|
||||
result "OK"
|
||||
else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--files)
|
||||
shift
|
||||
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt" \
|
||||
| sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
|
||||
assert_equals "checking files" "$1" "$nFiles"
|
||||
;;
|
||||
httrack)
|
||||
break;
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# cleanup
|
||||
if test -z "$nopurge"; then
|
||||
test -n "$tmpdir" || ! warning "no tmpdir" || return 1
|
||||
tmp="${tmpdir}/crawl"
|
||||
rm -rf "$tmp"
|
||||
else
|
||||
tmpdir=
|
||||
fi
|
||||
mkdir "$tmp" || ! warning "could not create $tmp" || return 1
|
||||
|
||||
which httrack >/dev/null || ! warning "could not find httrack" || return 1
|
||||
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
|
||||
test -n "$ver" || ! warning "could not run httrack" || return 1
|
||||
|
||||
# start crawl
|
||||
log="${tmp}/log"
|
||||
debug starting httrack -O "${tmp}" "${moreargs[@]}" "${@:pos}"
|
||||
info "running httrack ${*:pos}"
|
||||
httrack -O "${tmp}" --user-agent="httrack $ver ut ($(uname -omrs))" "${moreargs[@]}" "${@:pos}" >"${log}" 2>&1 &
|
||||
crawlpid="$!"
|
||||
debug "started cralwer on pid $crawlpid"
|
||||
wait "$crawlpid"
|
||||
result="$?"
|
||||
crawlpid=
|
||||
test "$result" -eq 0 || ! result "error code $result" || return 1
|
||||
result "OK"
|
||||
grep -iE "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt" >&2
|
||||
|
||||
# now audit
|
||||
while test "$#" -gt 0; do
|
||||
case "$1" in
|
||||
--no-purge)
|
||||
nopurge=1
|
||||
;;
|
||||
--summary)
|
||||
grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt"
|
||||
;;
|
||||
--print-files)
|
||||
find "${tmp}" -mindepth 1 -type f
|
||||
;;
|
||||
--errors)
|
||||
shift
|
||||
assert_equals "checking errors" "$1" "$(grep -iEc "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt")"
|
||||
;;
|
||||
--found)
|
||||
shift
|
||||
info "checking for $1"
|
||||
if test -f "${tmp}/$1"; then
|
||||
result "OK"
|
||||
else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--not-found)
|
||||
shift
|
||||
info "checking for $1"
|
||||
if test -f "${tmp}/$1"; then
|
||||
result "OK"
|
||||
else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--directory)
|
||||
shift
|
||||
info "checking for $1"
|
||||
if test -d "${tmp}/$1"; then
|
||||
result "OK"
|
||||
else
|
||||
result "not found"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--files)
|
||||
shift
|
||||
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt" |
|
||||
sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
|
||||
assert_equals "checking files" "$1" "$nFiles"
|
||||
;;
|
||||
httrack)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# cleanup
|
||||
if test -z "$nopurge"; then
|
||||
rm -rf "$tmp"
|
||||
else
|
||||
tmpdir=
|
||||
fi
|
||||
}
|
||||
|
||||
# check args
|
||||
@@ -195,7 +194,7 @@ tmpdir=
|
||||
crawlpid=
|
||||
nopurge=
|
||||
verbose=
|
||||
trap "cleanup" 0 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
|
||||
trap cleanup EXIT HUP INT QUIT ILL TRAP ABRT BUS FPE SEGV PIPE ALRM TERM STKFLT XCPU XFSZ
|
||||
|
||||
# working directory
|
||||
tmpdir="${tmptopdir}/httrack_ut.$$"
|
||||
|
||||
@@ -2,19 +2,19 @@
|
||||
#
|
||||
|
||||
error=0
|
||||
for i in *.test ; do
|
||||
if bash $i ; then
|
||||
echo "$i: passed" >&2
|
||||
else
|
||||
echo "$i: ERROR" >&2
|
||||
error=$[${error}+1]
|
||||
fi
|
||||
for i in *.test; do
|
||||
if bash "$i"; then
|
||||
echo "$i: passed" >&2
|
||||
else
|
||||
echo "$i: ERROR" >&2
|
||||
error=$((error + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
if test "$error" -eq 0; then
|
||||
echo "all tests passed" >&2
|
||||
echo "all tests passed" >&2
|
||||
else
|
||||
echo "${error} test(s) failed" >&2
|
||||
echo "${error} test(s) failed" >&2
|
||||
fi
|
||||
|
||||
exit $error
|
||||
|
||||
Reference in New Issue
Block a user