mirror of
https://github.com/xroche/httrack.git
synced 2026-07-04 16:14:47 +03:00
Compare commits
1 Commits
phase0-fol
...
phase0-net
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90e804a712 |
10
configure.ac
10
configure.ac
@@ -63,16 +63,6 @@ AC_SUBST(LT_CV_OBJDIR,$lt_cv_objdir)
|
||||
# Export version info
|
||||
AC_SUBST(VERSION_INFO)
|
||||
|
||||
# Versioned plugin name for dlopen() in hts_create_opt(); soname major is
|
||||
# libtool's current - age, so this tracks VERSION_INFO bumps automatically.
|
||||
HTS_SONAME_MAJOR=$((${VERSION_INFO%%:*} - ${VERSION_INFO##*:}))
|
||||
case "$host_os" in
|
||||
darwin*) HTS_LIBHTSJAVA_NAME="libhtsjava.$HTS_SONAME_MAJOR.dylib" ;;
|
||||
*) HTS_LIBHTSJAVA_NAME="libhtsjava.so.$HTS_SONAME_MAJOR" ;;
|
||||
esac
|
||||
AC_DEFINE_UNQUOTED([HTS_LIBHTSJAVA_NAME], ["$HTS_LIBHTSJAVA_NAME"],
|
||||
[Versioned libhtsjava runtime name, derived from VERSION_INFO])
|
||||
|
||||
### Default CFLAGS
|
||||
DEFAULT_CFLAGS="-Wall -Wformat -Wformat-security \
|
||||
-Wmultichar -Wwrite-strings -Wcast-qual -Wcast-align \
|
||||
|
||||
@@ -175,9 +175,7 @@ HTSEXT_API hts_boolean catch_url(T_SOC soc, char *url, char *method,
|
||||
//
|
||||
socinput(soc, line, 1000);
|
||||
if (strnotempty(line)) {
|
||||
/* widths bound the caller buffers: method[32], url[HTS_URLMAXSIZE*2],
|
||||
protocol[256] */
|
||||
if (sscanf(line, "%31s %2047s %255s", method, url, protocol) == 3) {
|
||||
if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
|
||||
lien_adrfil af;
|
||||
|
||||
// méthode en majuscule
|
||||
|
||||
@@ -69,15 +69,11 @@ typedef struct t_hts_callbackarg t_hts_callbackarg;
|
||||
typedef struct t_hts_callbackarg t_hts_callbackarg;
|
||||
#endif
|
||||
|
||||
/* Marks a symbol an external wrapper module exports back to the engine.
|
||||
Must override -fvisibility=hidden on ELF, or dlopen()ed plugins (htsjava)
|
||||
hide their own hts_plug()/hts_unplug() entry points. */
|
||||
/* Marks a symbol an external wrapper module exports back to the engine
|
||||
(dllexport on Windows, nothing elsewhere). */
|
||||
#ifndef EXTERNAL_FUNCTION
|
||||
#ifdef _WIN32
|
||||
#define EXTERNAL_FUNCTION __declspec(dllexport)
|
||||
#elif ((defined(__GNUC__) && (__GNUC__ >= 4)) || \
|
||||
(defined(HAVE_VISIBILITY) && HAVE_VISIBILITY))
|
||||
#define EXTERNAL_FUNCTION __attribute__((visibility("default")))
|
||||
#else
|
||||
#define EXTERNAL_FUNCTION
|
||||
#endif
|
||||
|
||||
@@ -190,9 +190,9 @@ int hts_unescapeEntitiesWithCharset(const char *src, char *dest, const size_t ma
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* reserve one byte for the trailing NUL written after the loop */
|
||||
if (j + 1 >= max) {
|
||||
|
||||
/* copy */
|
||||
if (j + 1 > max) {
|
||||
/* overflow */
|
||||
return -1;
|
||||
}
|
||||
@@ -300,11 +300,6 @@ int hts_unescapeUrlSpecial(const char *src, char *dest, const size_t max,
|
||||
|
||||
/* Was the character read successfully ? */
|
||||
if (nRead == utfBufferSize) {
|
||||
/* the 'continue' below skips the NUL-reserve guard: re-check */
|
||||
if (utfBufferJ + utfBufferSize >= max) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Rollback write position to sequence start write position */
|
||||
j = utfBufferJ;
|
||||
|
||||
@@ -319,8 +314,8 @@ int hts_unescapeUrlSpecial(const char *src, char *dest, const size_t max,
|
||||
}
|
||||
}
|
||||
|
||||
/* reserve one byte for the trailing NUL written after the loop */
|
||||
if (j + 1 >= max) {
|
||||
/* Check for overflow */
|
||||
if (j + 1 > max) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -133,8 +133,6 @@ void ftp_split_userpass(const char *src, const char *end, char *user,
|
||||
size_t user_size, char *pass, size_t pass_size) {
|
||||
size_t n = 0;
|
||||
|
||||
assertf(user_size > 0 && pass_size > 0); /* the size-1 math underflows on 0 */
|
||||
|
||||
while (src[n] != '\0' && src[n] != ':') {
|
||||
if (n < user_size - 1)
|
||||
user[n] = src[n];
|
||||
|
||||
@@ -71,8 +71,7 @@ int run_launch_ftp(FTPDownloadStruct * params);
|
||||
int send_line(T_SOC soc, const char *data);
|
||||
int get_ftp_line(T_SOC soc, char *line, size_t line_size, int timeout);
|
||||
/* Split a "user[:pass]@" prefix (end = jump_identification result) into
|
||||
bounded, NUL-terminated user/pass buffers, truncating to fit.
|
||||
Both sizes must be nonzero. */
|
||||
bounded, NUL-terminated user/pass buffers, truncating to fit. */
|
||||
void ftp_split_userpass(const char *src, const char *end, char *user,
|
||||
size_t user_size, char *pass, size_t pass_size);
|
||||
T_SOC get_datasocket(char *to_send, size_t to_send_size);
|
||||
|
||||
@@ -33,19 +33,15 @@ Please visit our Website: http://www.httrack.com
|
||||
#ifndef HTSJAVA_DEFH
|
||||
#define HTSJAVA_DEFH
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_JAVA_HEADER
|
||||
#define HTS_DEF_FWSTRUCT_JAVA_HEADER
|
||||
typedef struct JAVA_HEADER JAVA_HEADER;
|
||||
#endif
|
||||
/* 10-byte on-disk .class header image, fread() directly: fields need exact
|
||||
widths (LP64's 8-byte 'unsigned long' magic never matched 0xCAFEBABE). */
|
||||
struct JAVA_HEADER {
|
||||
uint32_t magic;
|
||||
uint16_t minor;
|
||||
uint16_t major;
|
||||
uint16_t count;
|
||||
unsigned long int magic;
|
||||
unsigned short int minor;
|
||||
unsigned short int major;
|
||||
unsigned short int count;
|
||||
};
|
||||
|
||||
#ifndef HTS_DEF_FWSTRUCT_RESP_STRUCT
|
||||
|
||||
11
src/htslib.c
11
src/htslib.c
@@ -1149,8 +1149,7 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||
char BIGSTK protocol[256], url[HTS_URLMAXSIZE * 2], method[256];
|
||||
|
||||
linput(fp, line, 1000);
|
||||
/* widths bound method[256], url[HTS_URLMAXSIZE*2], protocol[256] */
|
||||
if (sscanf(line, "%255s %2047s %255s", method, url, protocol) == 3) {
|
||||
if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
|
||||
size_t ret;
|
||||
// selon que l'on a ou pas un proxy
|
||||
if (retour->req.proxy.active) {
|
||||
@@ -6023,11 +6022,9 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
||||
"htsswf", "htsjava", "httrack-plugin", NULL
|
||||
};
|
||||
#else
|
||||
#ifndef HTS_LIBHTSJAVA_NAME
|
||||
#define HTS_LIBHTSJAVA_NAME "libhtsjava.so" /* non-autoconf fallback */
|
||||
#endif
|
||||
static const char *defaultModules[] = {"libhtsswf.so.1", HTS_LIBHTSJAVA_NAME,
|
||||
"httrack-plugin", NULL};
|
||||
static const char *defaultModules[] = {
|
||||
"libhtsswf.so.1", "libhtsjava.so.2", "httrack-plugin", NULL
|
||||
};
|
||||
#endif
|
||||
httrackp *opt = malloc(sizeof(httrackp));
|
||||
|
||||
|
||||
@@ -604,14 +604,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
}
|
||||
|
||||
// Decode title with encoding
|
||||
if (str->page_charset_ != NULL &&
|
||||
*str->page_charset_ != '\0') {
|
||||
char *sUtf = hts_convertStringToUTF8(
|
||||
s, strlen(s), str->page_charset_);
|
||||
if (str->page_charset_ != NULL
|
||||
&& *str->page_charset_ != '\0') {
|
||||
char *const sUtf =
|
||||
hts_convertStringToUTF8(s, strlen(s), str->page_charset_);
|
||||
if (sUtf != NULL) {
|
||||
/* UTF-8 can expand past s[]; truncate to fit */
|
||||
snprintf(s, sizeof(s), "%s", sUtf);
|
||||
freet(sUtf);
|
||||
strcpy(s, sUtf);
|
||||
free(sUtf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -713,8 +713,7 @@ static int st_entities(httrackp *opt, int argc, char **argv) {
|
||||
}
|
||||
s = strdupt(argv[0]);
|
||||
enc = argc >= 2 ? argv[1] : "UTF-8";
|
||||
if (s != NULL &&
|
||||
hts_unescapeEntitiesWithCharset(s, s, strlen(s) + 1, enc) == 0) {
|
||||
if (s != NULL && hts_unescapeEntitiesWithCharset(s, s, strlen(s), enc) == 0) {
|
||||
printf("%s\n", s);
|
||||
freet(s);
|
||||
} else {
|
||||
@@ -723,34 +722,6 @@ static int st_entities(httrackp *opt, int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The unescapers must reserve one byte for the trailing NUL: a 'max'-byte
|
||||
dest holding 'max' output chars pre-fix wrote dest[max] (1-byte OOB, caught
|
||||
by ASan). Both unescapeEntities and unescapeUrl share the guard. */
|
||||
static int st_unescape_bounds(httrackp *opt, int argc, char **argv) {
|
||||
char dest[4];
|
||||
|
||||
(void) opt;
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
assertf(hts_unescapeEntities("abcd", dest, sizeof(dest)) == -1);
|
||||
assertf(hts_unescapeUrl("abcd", dest, sizeof(dest)) == -1);
|
||||
assertf(hts_unescapeEntities("abc", dest, sizeof(dest)) == 0);
|
||||
assertf(strcmp(dest, "abc") == 0);
|
||||
/* raw multi-byte UTF-8 flush path (bypasses the per-byte guard) */
|
||||
assertf(hts_unescapeUrl("ab\xC3\xA9", dest, sizeof(dest)) == -1);
|
||||
assertf(hts_unescapeUrl("a\xC3\xA9", dest, sizeof(dest)) == 0);
|
||||
assertf(strcmp(dest, "a\xC3\xA9") == 0);
|
||||
{
|
||||
/* %xx-encoded flush path (utfBufferJ = lastJ rollback) */
|
||||
char wide[8];
|
||||
|
||||
assertf(hts_unescapeUrl("%C3%A9", wide, sizeof(wide)) == 0);
|
||||
assertf(strcmp(wide, "\xC3\xA9") == 0);
|
||||
}
|
||||
printf("unescape-bounds self-test OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int st_hashtable(httrackp *opt, int argc, char **argv) {
|
||||
char *snum;
|
||||
unsigned long count = 0;
|
||||
@@ -1853,17 +1824,6 @@ static int st_ftpuser(httrackp *opt, int argc, char **argv) {
|
||||
ftp_split_userpass(in, in + 802, user, sizeof(user), pass, sizeof(pass));
|
||||
assertf(strlen(user) == sizeof(user) - 1);
|
||||
assertf(strlen(pass) == sizeof(pass) - 1);
|
||||
{
|
||||
/* tight sizes + guard byte catch an off-by-one the 256 case can't */
|
||||
char ubuf[16], pbuf[16];
|
||||
|
||||
memset(ubuf, 'Z', sizeof(ubuf));
|
||||
memset(pbuf, 'Z', sizeof(pbuf));
|
||||
ftp_split_userpass(in, in + 802, ubuf, 8, pbuf, 8);
|
||||
assertf(strcmp(ubuf, "uuuuuuu") == 0);
|
||||
assertf(strcmp(pbuf, "ppppppp") == 0);
|
||||
assertf(ubuf[8] == 'Z' && pbuf[8] == 'Z');
|
||||
}
|
||||
printf("ftp-userpass self-test OK\n");
|
||||
return 0;
|
||||
}
|
||||
@@ -1913,8 +1873,6 @@ static const struct selftest_entry {
|
||||
{"idna-decode", "<host>", "decode an IDNA/punycode hostname",
|
||||
st_idna_decode},
|
||||
{"entities", "<string> [encoding]", "unescape HTML entities", st_entities},
|
||||
{"unescape-bounds", "", "unescapers reserve the NUL byte (no 1-byte OOB)",
|
||||
st_unescape_bounds},
|
||||
{"hashtable", "<count|file>", "coucal hashtable stress test", st_hashtable},
|
||||
{"strsafe", "[overflow|overflow-buff [str]]", "bounded string-op self-test",
|
||||
st_strsafe},
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Entity/URL unescapers reserve one byte for the trailing NUL (no 1-byte OOB).
|
||||
httrack -O /dev/null -#test=unescape-bounds run | grep -q "unescape-bounds self-test OK"
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
# The java plugin must load (versioned dlopen name) and parse a .class
|
||||
# constant pool: a resource named only inside Foo.class gets crawled.
|
||||
set -e
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
tmproot=$(mktemp -d)
|
||||
trap 'rm -rf "$tmproot"' EXIT
|
||||
mkdir "$tmproot/javaclass"
|
||||
|
||||
cat >"$tmproot/javaclass/index.html" <<'EOF'
|
||||
<html><body><a href="Foo.class">applet</a></body></html>
|
||||
EOF
|
||||
printf 'GIF89a' >"$tmproot/javaclass/hello.gif"
|
||||
# magic/minor/major, count=2, one CONSTANT_Utf8 "hello.gif", class/superclass
|
||||
printf '\xCA\xFE\xBA\xBE\x00\x00\x00\x32\x00\x02\x01\x00\x09hello.gif\x00\x00\x00\x00' \
|
||||
>"$tmproot/javaclass/Foo.class"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --root "$tmproot" --errors 0 \
|
||||
--found 'javaclass/Foo.class' \
|
||||
--found 'javaclass/hello.gif' \
|
||||
httrack 'BASEURL/javaclass/index.html'
|
||||
@@ -57,7 +57,6 @@ TESTS = \
|
||||
01_engine-stripquery.test \
|
||||
01_engine-strsafe.test \
|
||||
01_engine-urlhack.test \
|
||||
01_engine-unescape-bounds.test \
|
||||
01_engine-useragent.test \
|
||||
01_zlib-acceptencoding.test \
|
||||
01_zlib-cache.test \
|
||||
@@ -90,7 +89,6 @@ TESTS = \
|
||||
27_local-cookies-file.test \
|
||||
28_local-pause.test \
|
||||
29_local-redirect-fragment.test \
|
||||
30_local-fragment-link.test \
|
||||
31_local-javaclass.test
|
||||
30_local-fragment-link.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
Reference in New Issue
Block a user