mirror of
https://github.com/xroche/httrack.git
synced 2026-06-14 22:33:54 +03:00
Compare commits
3 Commits
parser/loc
...
cleanup/ht
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c6ff54040 | ||
|
|
4a057514b9 | ||
|
|
d741188980 |
@@ -271,8 +271,11 @@ int optalias_check(int argc, const char *const *argv, int n_arg,
|
||||
*return_argc = 1;
|
||||
if (argv[n_arg][0] == '-')
|
||||
if (argv[n_arg][1] == '-') {
|
||||
char command[1000];
|
||||
char param[1000];
|
||||
/* sized to HTS_CDLMAXSIZE: a long-form option value (--user-agent,
|
||||
--headers, ...) is copied into param, and the value is bounded by the
|
||||
general per-argument check in htscoremain.c (HTS_CDLMAXSIZE) */
|
||||
char command[HTS_CDLMAXSIZE];
|
||||
char param[HTS_CDLMAXSIZE];
|
||||
char addcommand[256];
|
||||
|
||||
/* */
|
||||
|
||||
@@ -201,8 +201,8 @@ HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data) {
|
||||
while(strnotempty(line)) {
|
||||
socinput(soc, line, 1000);
|
||||
treathead(NULL, NULL, NULL, &blkretour, line); // traiter
|
||||
strcatbuff(data, line);
|
||||
strcatbuff(data, "\r\n");
|
||||
strlcatbuff(data, line, CATCH_URL_DATA_SIZE);
|
||||
strlcatbuff(data, "\r\n", CATCH_URL_DATA_SIZE);
|
||||
}
|
||||
// CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus
|
||||
//strcatbuff(data,"\r\n");
|
||||
|
||||
@@ -40,6 +40,9 @@ Please visit our Website: http://www.httrack.com
|
||||
/* Library internal definictions */
|
||||
#ifdef HTS_INTERNAL_BYTECODE
|
||||
|
||||
// Capacity contract for the catch_url() 'data' buffer (32 Kb).
|
||||
#define CATCH_URL_DATA_SIZE 32768
|
||||
|
||||
// Fonctions
|
||||
void socinput(T_SOC soc, char *s, int max);
|
||||
|
||||
|
||||
@@ -140,6 +140,62 @@ static void basic_selftests(void) {
|
||||
md5selftest();
|
||||
}
|
||||
|
||||
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
|
||||
Returns 0 if every bounded operation behaved correctly, 1 otherwise.
|
||||
The abort-on-overflow guarantee is checked separately by the -#8 "overflow"
|
||||
sub-mode (it aborts the process by design). */
|
||||
static int string_safety_selftests(void) {
|
||||
char buf[8];
|
||||
|
||||
/* strcpybuff into a sized array: exact copy */
|
||||
strcpybuff(buf, "abc");
|
||||
if (strcmp(buf, "abc") != 0)
|
||||
return 1;
|
||||
|
||||
/* strcatbuff append within capacity */
|
||||
strcatbuff(buf, "de");
|
||||
if (strcmp(buf, "abcde") != 0)
|
||||
return 1;
|
||||
|
||||
/* strncatbuff appends at most N source chars */
|
||||
strcpybuff(buf, "ab");
|
||||
strncatbuff(buf, "cdef", 2);
|
||||
if (strcmp(buf, "abcd") != 0)
|
||||
return 1;
|
||||
|
||||
/* strlcpybuff: explicit-capacity copy into a pointer destination, the form
|
||||
the migration moves toward */
|
||||
{
|
||||
char storage[8];
|
||||
char *const p = storage;
|
||||
|
||||
strlcpybuff(p, "hello", sizeof(storage));
|
||||
if (strcmp(p, "hello") != 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* strcpybuff into a pointer destination: routes through the unchecked
|
||||
strcpybuff_ptr_ fallback (the path the -#8 warning flags). The warning is
|
||||
intentional here; we only verify the fallback still copies correctly. */
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wattribute-warning"
|
||||
#endif
|
||||
{
|
||||
char storage[8];
|
||||
char *const p = storage;
|
||||
|
||||
strcpybuff(p, "ptr");
|
||||
if (strcmp(p, "ptr") != 0)
|
||||
return 1;
|
||||
}
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hts_main_internal(int argc, char **argv, httrackp * opt);
|
||||
|
||||
// Main, récupère les paramètres et appelle le robot
|
||||
@@ -1787,10 +1843,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
HTS_PANIC_PRINTF("Empty string given");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
} else if (strlen(argv[na]) >= 256) {
|
||||
HTS_PANIC_PRINTF("Header line string too long");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
}
|
||||
StringCat(opt->headers, argv[na]);
|
||||
StringCat(opt->headers, "\r\n"); /* separator */
|
||||
@@ -2441,6 +2493,28 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
htsmain_free();
|
||||
return 0;
|
||||
break;
|
||||
case '8': /* string-safety selftest: httrack -#8 [overflow <bigstr>] */
|
||||
if (na + 1 < argc && strcmp(argv[na + 1], "overflow") == 0) {
|
||||
/* Deliberately exceed a sized buffer: the bounded macro must
|
||||
abort. The source comes from argv so its length is opaque
|
||||
to the compiler (no static -Wstringop-overflow, genuine
|
||||
runtime check). */
|
||||
char small[4];
|
||||
const char *const src =
|
||||
(na + 2 < argc) ? argv[na + 2] : "overflowing";
|
||||
|
||||
strcpybuff(small, src);
|
||||
printf("strsafe: NOT aborted\n"); /* must be unreachable */
|
||||
htsmain_free();
|
||||
return 1;
|
||||
} else {
|
||||
const int err = string_safety_selftests();
|
||||
|
||||
printf("strsafe: %s\n", err ? "FAIL" : "OK");
|
||||
htsmain_free();
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
case '7': // hashtable selftest: httrack -#7 nb_entries
|
||||
basic_selftests();
|
||||
if (++na < argc) {
|
||||
@@ -2691,11 +2765,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return -1;
|
||||
} else {
|
||||
na++;
|
||||
if (strlen(argv[na]) >= 126) {
|
||||
HTS_PANIC_PRINTF("User-agent length too long");
|
||||
htsmain_free();
|
||||
return -1;
|
||||
}
|
||||
StringCopy(opt->user_agent, argv[na]);
|
||||
if (StringNotEmpty(opt->user_agent))
|
||||
opt->user_agent_send = 1;
|
||||
|
||||
@@ -409,7 +409,7 @@ void help_catchurl(const char *dest_path) {
|
||||
if (soc != INVALID_SOCKET) {
|
||||
char BIGSTK url[HTS_URLMAXSIZE * 2];
|
||||
char method[32];
|
||||
char BIGSTK data[32768];
|
||||
char BIGSTK data[CATCH_URL_DATA_SIZE];
|
||||
|
||||
url[0] = method[0] = data[0] = '\0';
|
||||
//
|
||||
|
||||
@@ -878,7 +878,7 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||
const char *xsend, const char *adr, const char *fil,
|
||||
const char *referer_adr, const char *referer_fil,
|
||||
htsblk * retour) {
|
||||
char BIGSTK buffer_head_request[8192];
|
||||
char BIGSTK buffer_head_request[16384];
|
||||
buff_struct bstr = { buffer_head_request, sizeof(buffer_head_request), 0 };
|
||||
|
||||
//int use_11=0; // HTTP 1.1 utilisé
|
||||
|
||||
@@ -123,41 +123,111 @@ static HTS_UNUSED void htssafe_compile_time_check_(void) {
|
||||
(void) check_pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pointer-destination diagnostics for the buff() macros (GCC/Clang, C only).
|
||||
*
|
||||
* strcpybuff()/strcatbuff()/strncatbuff() bounds-check only when the
|
||||
* destination is a sized char[] array (HTS_IS_CHAR_BUFFER). For a bare char*
|
||||
* the capacity is unknown, so the macro silently falls back to plain
|
||||
* strcpy()/strcat()/strncat() while still looking like a checked call.
|
||||
*
|
||||
* These stubs route that pointer case through __builtin_choose_expr() so the
|
||||
* 'warning' attribute fires only at pointer-destination sites; array sites use
|
||||
* the bounded *_safe_ helpers and stay quiet. The warning names the
|
||||
* explicit-size replacement (strlcpybuff/strlcatbuff). Diagnostic only: no
|
||||
* runtime or ABI change, built only on GCC/Clang in C mode. Other compilers
|
||||
* (MSVC, ...) keep the previous behavior via the #else branches.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
#if defined(__has_attribute)
|
||||
#if __has_attribute(warning)
|
||||
#define HTS_BUFF_PTR_ATTR(msg) __attribute__((unused, noinline, warning(msg)))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef HTS_BUFF_PTR_ATTR
|
||||
/* 'warning' attribute unavailable: keep noinline so the migration can still
|
||||
grep for these symbols, but no compile-time diagnostic is emitted. */
|
||||
#define HTS_BUFF_PTR_ATTR(msg) __attribute__((unused, noinline))
|
||||
#endif
|
||||
|
||||
HTS_BUFF_PTR_ATTR("strcpybuff() destination is a pointer (capacity unknown): "
|
||||
"NOT bounds-checked; use strlcpybuff(dst, src, size)")
|
||||
static char *strcpybuff_ptr_(char *dest, const char *src) {
|
||||
return strcpy(dest, src);
|
||||
}
|
||||
|
||||
HTS_BUFF_PTR_ATTR("strcatbuff() destination is a pointer (capacity unknown): "
|
||||
"NOT bounds-checked; use strlcatbuff(dst, src, size)")
|
||||
static char *strcatbuff_ptr_(char *dest, const char *src) {
|
||||
return strcat(dest, src);
|
||||
}
|
||||
|
||||
HTS_BUFF_PTR_ATTR("strncatbuff() destination is a pointer (capacity unknown): "
|
||||
"NOT bounds-checked; use strlcatbuff(dst, src, size)")
|
||||
static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
||||
return strncat(dest, src, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Append at most N characters from "B" to "A".
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* is assumed to be the capacity of this array.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
#define strncatbuff(A, B, N) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||
strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__), \
|
||||
strncatbuff_ptr_((A), (B), (N)) )
|
||||
#else
|
||||
#define strncatbuff(A, B, N) \
|
||||
( HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strncat(A, B, N) \
|
||||
: strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Append characters of "B" to "A".
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* is assumed to be the capacity of this array.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
#define strcatbuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||
strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__), \
|
||||
strcatbuff_ptr_((A), (B)) )
|
||||
#else
|
||||
#define strcatbuff(A, B) \
|
||||
( HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strcat(A, B) \
|
||||
: strncat_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copy characters from "B" to "A".
|
||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||
* is assumed to be the capacity of this array.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
#define strcpybuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||
strcpy_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__), \
|
||||
strcpybuff_ptr_((A), (B)) )
|
||||
#else
|
||||
#define strcpybuff(A, B) \
|
||||
( HTS_IS_NOT_CHAR_BUFFER(A) \
|
||||
? strcpy(A, B) \
|
||||
: strcpy_safe_(A, sizeof(A), B, \
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Append characters of "B" to "A", "A" having a maximum capacity of "S".
|
||||
|
||||
71
tests/01_engine-cmdline.test
Executable file
71
tests/01_engine-cmdline.test
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# Offline command-line option tests (no network). The -F user-agent and -%X
|
||||
# raw-header values used to be rejected past 126 / 256 bytes (#152); they are
|
||||
# now bounded only by the general per-argument cap (HTS_CDLMAXSIZE). A value up
|
||||
# to that cap is accepted on both the short (-F, -%X) and long (--user-agent,
|
||||
# --headers) forms, and an over-cap value is refused cleanly rather than
|
||||
# overrunning a fixed scratch buffer.
|
||||
|
||||
set -u
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_cmdline.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
echo '<html><body>hello</body></html>' >"$tmp/index.html"
|
||||
|
||||
# a string of N repeated 'A' characters
|
||||
nchars() {
|
||||
printf 'A%.0s' $(seq 1 "$1")
|
||||
}
|
||||
|
||||
# crawl the local fixture with the given extra args; leaves the exit status in RC
|
||||
run() {
|
||||
local out="$1"
|
||||
shift
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack "file://$tmp/index.html" -O "$out" --quiet -n "$@" >"$out/.log" 2>&1
|
||||
RC=$?
|
||||
}
|
||||
|
||||
# assert the value was accepted: clean exit and the fixture was mirrored
|
||||
accepted() {
|
||||
{ test "$RC" -eq 0 && test -n "$(find "$1" -type f -path '*/index.html' -print -quit)"; } ||
|
||||
! echo "FAIL: $2 (exit $RC)" || exit 1
|
||||
}
|
||||
|
||||
# assert the value was refused cleanly: a normal error exit, never a crash
|
||||
# (a SIGABRT from an overflowed scratch buffer would surface as exit 134)
|
||||
refused() {
|
||||
{ test "$RC" -ne 0 && test "$RC" -ne 134; } ||
|
||||
! echo "FAIL: $1 (exit $RC)" || exit 1
|
||||
}
|
||||
|
||||
# a value past the old 126/256 caps but within the cap is accepted, on both the
|
||||
# short and long form of each option
|
||||
long=$(nchars 900)
|
||||
run "$tmp/ua-s" -F "$long"
|
||||
accepted "$tmp/ua-s" "#152: long -F user-agent rejected or crashed"
|
||||
run "$tmp/ua-l" --user-agent "$long"
|
||||
accepted "$tmp/ua-l" "#152: long --user-agent rejected or crashed"
|
||||
run "$tmp/hd-s" "-%X" "X-A: $long"
|
||||
accepted "$tmp/hd-s" "#152: long -%X header rejected or crashed"
|
||||
run "$tmp/hd-l" --headers "X-B: $long"
|
||||
accepted "$tmp/hd-l" "#152: long --headers rejected or crashed"
|
||||
|
||||
# a value just under the cap (>1000) must not overflow the long-form alias
|
||||
# scratch buffer (the param[] copy in optalias_check)
|
||||
run "$tmp/ua-n" --user-agent "$(nchars 1010)"
|
||||
accepted "$tmp/ua-n" "#152: near-cap --user-agent overflowed the param[] buffer"
|
||||
|
||||
# a value over the cap is refused cleanly (graceful error, not a SIGABRT), on
|
||||
# both forms
|
||||
over=$(nchars 1100)
|
||||
run "$tmp/ov-s" -F "$over"
|
||||
refused "#152: over-cap -F not refused cleanly"
|
||||
run "$tmp/ov-l" --user-agent "$over"
|
||||
refused "#152: over-cap --user-agent not refused cleanly"
|
||||
|
||||
exit 0
|
||||
23
tests/01_engine-strsafe.test
Executable file
23
tests/01_engine-strsafe.test
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# htssafe.h bounded string operations (driven by 'httrack -#8').
|
||||
|
||||
# Success path: every bounded op (strcpybuff/strcatbuff/strncatbuff/strlcpybuff)
|
||||
# must behave correctly. Like the other -# debug modes, a trailing token is
|
||||
# required (a bare '-#8' falls through to the usage screen).
|
||||
out=$(httrack -#8 run)
|
||||
test $? -eq 0 || exit 1
|
||||
test "$out" == "strsafe: OK" || exit 1
|
||||
|
||||
# Overflow path: an over-capacity write into a sized buffer must be caught by
|
||||
# the bounded macro and abort the process, not be silently truncated/completed.
|
||||
# Assert the htssafe abort signature specifically, so the test cannot pass for
|
||||
# an unrelated reason (e.g. the -#8 mode being gone and falling through to the
|
||||
# usage screen, which also exits non-zero).
|
||||
err=$(httrack -#8 overflow "this string is far too long for the buffer" 2>&1)
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"overflow while copying"*) ;;
|
||||
*) echo "expected htssafe overflow abort, got: $err" >&2; exit 1 ;;
|
||||
esac
|
||||
@@ -9,6 +9,25 @@ TESTS_ENVIRONMENT += HTTPS_SUPPORT=$(HTTPS_SUPPORT)
|
||||
TESTS_ENVIRONMENT += top_srcdir=$(top_srcdir)
|
||||
|
||||
TEST_EXTENSIONS = .test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
TESTS = \
|
||||
00_runnable.test \
|
||||
01_engine-charset.test \
|
||||
01_engine-cmdline.test \
|
||||
01_engine-entities.test \
|
||||
01_engine-filter.test \
|
||||
01_engine-hashtable.test \
|
||||
01_engine-idna.test \
|
||||
01_engine-mime.test \
|
||||
01_engine-parse.test \
|
||||
01_engine-simplify.test \
|
||||
01_engine-strsafe.test \
|
||||
02_manpage-regen.test \
|
||||
10_crawl-simple.test \
|
||||
11_crawl-cookies.test \
|
||||
11_crawl-idna.test \
|
||||
11_crawl-international.test \
|
||||
11_crawl-longurl.test \
|
||||
11_crawl-parsing.test \
|
||||
12_crawl_https.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
@@ -472,7 +472,7 @@ TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
|
||||
ONLINE_UNIT_TESTS=$(ONLINE_UNIT_TESTS) \
|
||||
HTTPS_SUPPORT=$(HTTPS_SUPPORT) top_srcdir=$(top_srcdir)
|
||||
TEST_EXTENSIONS = .test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
TESTS = 00_runnable.test 01_engine-charset.test 01_engine-cmdline.test 01_engine-entities.test 01_engine-filter.test 01_engine-hashtable.test 01_engine-idna.test 01_engine-mime.test 01_engine-parse.test 01_engine-simplify.test 02_manpage-regen.test 10_crawl-simple.test 11_crawl-cookies.test 11_crawl-idna.test 11_crawl-international.test 11_crawl-longurl.test 11_crawl-parsing.test 12_crawl_https.test
|
||||
CLEANFILES = check-network_sh.cache
|
||||
all: all-am
|
||||
|
||||
|
||||
Reference in New Issue
Block a user