mirror of
https://github.com/xroche/httrack.git
synced 2026-06-29 21:45:24 +03:00
Compare commits
1 Commits
master
...
accept-enc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e2e6a4d4e4 |
17
src/htslib.c
17
src/htslib.c
@@ -1326,16 +1326,12 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||
|
||||
// Compression accepted ?
|
||||
if (retour->req.http11) {
|
||||
hts_boolean compressible = HTS_FALSE;
|
||||
#if HTS_USEZLIB
|
||||
if ((!retour->req.range_used)
|
||||
&& (!retour->req.nocompression))
|
||||
print_buffer(&bstr, "Accept-Encoding: " "gzip" /* gzip if the preffered encoding */
|
||||
", " "identity;q=0.9" H_CRLF);
|
||||
else
|
||||
print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
|
||||
#else
|
||||
print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
|
||||
compressible = (!retour->req.range_used && !retour->req.nocompression);
|
||||
#endif
|
||||
print_buffer(&bstr, "Accept-Encoding: %s" H_CRLF,
|
||||
hts_acceptencoding(compressible));
|
||||
}
|
||||
|
||||
/* Authentification */
|
||||
@@ -4414,6 +4410,11 @@ HTSEXT_API void get_httptype(httrackp *opt, char *s, const char *fil,
|
||||
(void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, flag);
|
||||
}
|
||||
|
||||
/* Advertised Accept-Encoding; gzip and deflate both decode via hts_zunpack */
|
||||
const char *hts_acceptencoding(hts_boolean compressible) {
|
||||
return compressible ? "gzip, deflate, identity;q=0.9" : "identity";
|
||||
}
|
||||
|
||||
// get type of fil (php)
|
||||
// s: buffer (text/html) or NULL
|
||||
// return: 1 if known by user
|
||||
|
||||
@@ -285,6 +285,9 @@ int ishttperror(int err);
|
||||
int get_userhttptype(httrackp * opt, char *s, const char *fil);
|
||||
int give_mimext(char *s, size_t ssize, const char *st);
|
||||
|
||||
/* Advertised Accept-Encoding value (no header name/CRLF); see htslib.c. */
|
||||
const char *hts_acceptencoding(hts_boolean compressible);
|
||||
|
||||
int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename);
|
||||
int may_unknown2(httrackp * opt, const char *mime, const char *filename);
|
||||
|
||||
|
||||
@@ -50,6 +50,9 @@ Please visit our Website: http://www.httrack.com
|
||||
#include "htscharset.h"
|
||||
#include "htsencoding.h"
|
||||
#include "htsmd5.h"
|
||||
#if HTS_USEZLIB
|
||||
#include "htszlib.h"
|
||||
#endif
|
||||
#include "coucal/coucal.h"
|
||||
|
||||
#include <ctype.h>
|
||||
@@ -1336,6 +1339,158 @@ static int st_status(httrackp *opt, int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if HTS_USEZLIB
|
||||
/* Deflate src->path at windowBits (16+ gzip, + zlib, - raw); 0 on success. */
|
||||
static int ae_write_packed(const char *path, int windowBits,
|
||||
const unsigned char *src, size_t len) {
|
||||
unsigned char out[8192];
|
||||
z_stream strm;
|
||||
FILE *f;
|
||||
int zerr;
|
||||
|
||||
memset(&strm, 0, sizeof(strm));
|
||||
if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, windowBits, 8,
|
||||
Z_DEFAULT_STRATEGY) != Z_OK)
|
||||
return 1;
|
||||
if ((f = FOPEN(path, "wb")) == NULL) {
|
||||
deflateEnd(&strm);
|
||||
return 1;
|
||||
}
|
||||
strm.next_in = (Bytef *) src;
|
||||
strm.avail_in = (uInt) len;
|
||||
do {
|
||||
size_t n;
|
||||
|
||||
strm.next_out = out;
|
||||
strm.avail_out = sizeof(out);
|
||||
zerr = deflate(&strm, Z_FINISH);
|
||||
n = sizeof(out) - strm.avail_out;
|
||||
if (n > 0 && fwrite(out, 1, n, f) != n) {
|
||||
deflateEnd(&strm);
|
||||
fclose(f);
|
||||
return 1;
|
||||
}
|
||||
} while (zerr == Z_OK);
|
||||
deflateEnd(&strm);
|
||||
fclose(f);
|
||||
return (zerr == Z_STREAM_END) ? 0 : 1;
|
||||
}
|
||||
|
||||
/* Forged raw deflate (08 1D) that misdetects as zlib; only fallback decodes */
|
||||
static int ae_write_collision(const char *path, const unsigned char *src,
|
||||
size_t len) {
|
||||
/* block-1 LEN low byte 0x1D: with 0x08, (0x081D)%31==0 */
|
||||
const size_t n1 = 29;
|
||||
size_t n2, p = 0;
|
||||
unsigned char *buf;
|
||||
FILE *f;
|
||||
int ok;
|
||||
|
||||
if (len < n1 || len - n1 > 0xFFFF)
|
||||
return 1;
|
||||
n2 = len - n1;
|
||||
buf = malloct(10 + len);
|
||||
if (buf == NULL)
|
||||
return 1;
|
||||
buf[p++] = 0x08; /* BFINAL=0, BTYPE=00, forged padding -> zlib CMF nibble */
|
||||
buf[p++] = (unsigned char) (n1 & 0xff);
|
||||
buf[p++] = (unsigned char) (n1 >> 8);
|
||||
buf[p++] = (unsigned char) (~n1 & 0xff);
|
||||
buf[p++] = (unsigned char) ((~n1 >> 8) & 0xff);
|
||||
memcpy(buf + p, src, n1);
|
||||
p += n1;
|
||||
buf[p++] = 0x01; /* BFINAL=1, BTYPE=00 */
|
||||
buf[p++] = (unsigned char) (n2 & 0xff);
|
||||
buf[p++] = (unsigned char) (n2 >> 8);
|
||||
buf[p++] = (unsigned char) (~n2 & 0xff);
|
||||
buf[p++] = (unsigned char) ((~n2 >> 8) & 0xff);
|
||||
memcpy(buf + p, src + n1, n2);
|
||||
p += n2;
|
||||
f = FOPEN(path, "wb");
|
||||
ok = (f != NULL && fwrite(buf, 1, p, f) == p);
|
||||
if (f != NULL)
|
||||
fclose(f);
|
||||
freet(buf);
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
|
||||
/* Compare path's bytes to expect[0..len); 0 if equal. Streams (large files). */
|
||||
static int ae_check_decoded(const char *path, const unsigned char *expect,
|
||||
size_t len) {
|
||||
unsigned char buf[8192];
|
||||
FILE *f = FOPEN(path, "rb");
|
||||
size_t off = 0, n;
|
||||
|
||||
if (f == NULL)
|
||||
return 1;
|
||||
while ((n = fread(buf, 1, sizeof(buf), f)) > 0) {
|
||||
if (n > len - off || memcmp(buf, expect + off, n) != 0) {
|
||||
fclose(f);
|
||||
return 1;
|
||||
}
|
||||
off += n;
|
||||
}
|
||||
fclose(f);
|
||||
return (off == len) ? 0 : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Accept-Encoding (#450): advertise gzip+deflate; both decode (hts_zunpack) */
|
||||
static int st_acceptencoding(httrackp *opt, int argc, char **argv) {
|
||||
const char *off = hts_acceptencoding(HTS_FALSE);
|
||||
const char *on = hts_acceptencoding(HTS_TRUE);
|
||||
|
||||
(void) opt;
|
||||
assertf(strcmp(off, "identity") == 0);
|
||||
assertf(strstr(on, "gzip") != NULL);
|
||||
assertf(strstr(on, "deflate") != NULL); /* fails on the old gzip-only list */
|
||||
#if HTS_USEZLIB
|
||||
if (argc >= 1) {
|
||||
static const int windowBits[] = {16 + MAX_WBITS, MAX_WBITS, -MAX_WBITS};
|
||||
const unsigned char small[] =
|
||||
"deflate round-trip: HTTrack decodes gzip and deflate alike. "
|
||||
"deflate round-trip: HTTrack decodes gzip and deflate alike.";
|
||||
const size_t slen = sizeof(small) - 1;
|
||||
/* 64 KiB of varied (LCG) bytes: forces the multi-fread loop */
|
||||
const size_t blen = 64 * 1024;
|
||||
unsigned char *body = malloct(blen);
|
||||
uint32_t x = 0x1234567u;
|
||||
char inpath[HTS_URLMAXSIZE], outpath[HTS_URLMAXSIZE];
|
||||
size_t i;
|
||||
|
||||
assertf(body != NULL);
|
||||
for (i = 0; i < blen; i++) {
|
||||
x = x * 1103515245u + 12345u;
|
||||
body[i] = (unsigned char) (x >> 16);
|
||||
}
|
||||
/* gzip, zlib (RFC1950) and raw deflate (RFC1951), both small and large. */
|
||||
for (i = 0; i < sizeof(windowBits) / sizeof(windowBits[0]); i++) {
|
||||
snprintf(inpath, sizeof(inpath), "%s/ae-in-%d.z", argv[0], windowBits[i]);
|
||||
snprintf(outpath, sizeof(outpath), "%s/ae-out-%d", argv[0],
|
||||
windowBits[i]);
|
||||
assertf(ae_write_packed(inpath, windowBits[i], small, slen) == 0);
|
||||
assertf(hts_zunpack(inpath, outpath) == (int) slen);
|
||||
assertf(ae_check_decoded(outpath, small, slen) == 0);
|
||||
assertf(ae_write_packed(inpath, windowBits[i], body, blen) == 0);
|
||||
assertf(hts_zunpack(inpath, outpath) == (int) blen);
|
||||
assertf(ae_check_decoded(outpath, body, blen) == 0);
|
||||
}
|
||||
/* Fallback teeth: raw deflate misdetected as zlib; -1 without the retry. */
|
||||
snprintf(inpath, sizeof(inpath), "%s/ae-collide.z", argv[0]);
|
||||
snprintf(outpath, sizeof(outpath), "%s/ae-collide.out", argv[0]);
|
||||
assertf(ae_write_collision(inpath, body, 64) == 0);
|
||||
assertf(hts_zunpack(inpath, outpath) == 64);
|
||||
assertf(ae_check_decoded(outpath, body, 64) == 0);
|
||||
freet(body);
|
||||
}
|
||||
#else
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
#endif
|
||||
printf("acceptencoding self-test OK: %s\n", on);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/* Registry: name -> handler, with a usage hint and a one-line description. */
|
||||
/* ------------------------------------------------------------ */
|
||||
@@ -1384,6 +1539,8 @@ static const struct selftest_entry {
|
||||
{"cookies", "", "cookie request-header self-test", st_cookies},
|
||||
{"useragent", "", "default User-Agent self-test", st_useragent},
|
||||
{"status", "", "HTTP status code -> reason phrase self-test", st_status},
|
||||
{"acceptencoding", "[dir]",
|
||||
"Accept-Encoding advertises gzip+deflate, both decode", st_acceptencoding},
|
||||
};
|
||||
|
||||
static void list_selftests(void) {
|
||||
|
||||
103
src/htszlib.c
103
src/htszlib.c
@@ -47,48 +47,89 @@ Please visit our Website: http://www.httrack.com
|
||||
*/
|
||||
|
||||
/*
|
||||
Unpack file into a new file
|
||||
Unpack file into a new file (gzip, zlib RFC1950 or raw deflate RFC1951).
|
||||
Return value: size of the new file, or -1 if an error occurred
|
||||
*/
|
||||
/* Note: utf-8 */
|
||||
int hts_zunpack(char *filename, char *newfile) {
|
||||
int ret = -1;
|
||||
|
||||
if (filename != NULL && newfile != NULL) {
|
||||
if (filename[0] && newfile[0]) {
|
||||
char catbuff[CATBUFF_SIZE];
|
||||
FILE *const in = FOPEN(fconv(catbuff, sizeof(catbuff), filename), "rb");
|
||||
const int fd = in != NULL ? fileno(in) : -1;
|
||||
const int dup_fd = fd != -1 ? dup(fd) : -1;
|
||||
// Note: we must dup to be able to flose cleanly.
|
||||
const gzFile gz = dup_fd != -1 ? gzdopen(dup_fd, "rb") : NULL;
|
||||
if (filename != NULL && newfile != NULL && filename[0] && newfile[0]) {
|
||||
char catbuff[CATBUFF_SIZE];
|
||||
FILE *const in = FOPEN(fconv(catbuff, sizeof(catbuff), filename), "rb");
|
||||
|
||||
if (gz) {
|
||||
FILE *const fpout = FOPEN(fconv(catbuff, sizeof(catbuff), newfile), "wb");
|
||||
int size = 0;
|
||||
if (in != NULL) {
|
||||
unsigned char BIGSTK inbuf[8192];
|
||||
size_t navail = fread(inbuf, 1, sizeof(inbuf), in);
|
||||
/* gzip/zlib headers -> +32 windowBits; else raw deflate (RFC1951) */
|
||||
const hts_boolean wrapped =
|
||||
(navail >= 2 &&
|
||||
((inbuf[0] == 0x1f && inbuf[1] == 0x8b) ||
|
||||
((inbuf[0] & 0x0f) == Z_DEFLATED &&
|
||||
(((unsigned) inbuf[0] << 8 | inbuf[1]) % 31) == 0)));
|
||||
int attempt;
|
||||
|
||||
if (fpout) {
|
||||
int nr;
|
||||
/* deflate is ambiguous; on failure retry with the other windowBits */
|
||||
for (attempt = 0; attempt < 2 && ret < 0; attempt++) {
|
||||
const int windowBits =
|
||||
(attempt == 0 ? wrapped : !wrapped) ? (32 + MAX_WBITS) : -MAX_WBITS;
|
||||
FILE *fpout;
|
||||
z_stream strm;
|
||||
|
||||
do {
|
||||
char BIGSTK buff[1024];
|
||||
|
||||
nr = gzread(gz, buff, sizeof(buff));
|
||||
if (nr > 0) {
|
||||
size += nr;
|
||||
if (fwrite(buff, 1, nr, fpout) != nr)
|
||||
nr = size = -1;
|
||||
}
|
||||
} while(nr > 0);
|
||||
if (attempt > 0) {
|
||||
/* rewind input; reopening fpout "wb" discards the partial output */
|
||||
if (fseek(in, 0, SEEK_SET) != 0)
|
||||
break;
|
||||
navail = fread(inbuf, 1, sizeof(inbuf), in);
|
||||
}
|
||||
fpout = FOPEN(fconv(catbuff, sizeof(catbuff), newfile), "wb");
|
||||
if (fpout == NULL)
|
||||
break;
|
||||
memset(&strm, 0, sizeof(strm));
|
||||
if (inflateInit2(&strm, windowBits) != Z_OK) {
|
||||
fclose(fpout);
|
||||
} else
|
||||
size = -1;
|
||||
gzclose(gz);
|
||||
ret = (int) size;
|
||||
}
|
||||
if (in != NULL) {
|
||||
fclose(in);
|
||||
break;
|
||||
}
|
||||
{
|
||||
hts_boolean ok = HTS_TRUE;
|
||||
int size = 0;
|
||||
int zerr = Z_OK;
|
||||
|
||||
/* chunked inflate; first chunk in inbuf, single member */
|
||||
do {
|
||||
strm.next_in = inbuf;
|
||||
strm.avail_in = (uInt) navail;
|
||||
do {
|
||||
unsigned char BIGSTK outbuf[8192];
|
||||
size_t produced;
|
||||
|
||||
strm.next_out = outbuf;
|
||||
strm.avail_out = sizeof(outbuf);
|
||||
zerr = inflate(&strm, Z_NO_FLUSH);
|
||||
if (zerr == Z_NEED_DICT || zerr == Z_DATA_ERROR ||
|
||||
zerr == Z_MEM_ERROR || zerr == Z_STREAM_ERROR) {
|
||||
ok = HTS_FALSE;
|
||||
break;
|
||||
}
|
||||
produced = sizeof(outbuf) - strm.avail_out;
|
||||
if (produced > 0 &&
|
||||
fwrite(outbuf, 1, produced, fpout) != produced) {
|
||||
ok = HTS_FALSE;
|
||||
break;
|
||||
}
|
||||
size += (int) produced;
|
||||
} while (strm.avail_out == 0);
|
||||
if (!ok || zerr == Z_STREAM_END)
|
||||
break;
|
||||
navail = fread(inbuf, 1, sizeof(inbuf), in);
|
||||
} while (navail > 0);
|
||||
if (ok && zerr == Z_STREAM_END)
|
||||
ret = size;
|
||||
}
|
||||
inflateEnd(&strm);
|
||||
fclose(fpout);
|
||||
}
|
||||
fclose(in);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
11
tests/01_zlib-acceptencoding.test
Executable file
11
tests/01_zlib-acceptencoding.test
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Accept-Encoding (#450): advertise gzip+deflate; decode gzip/zlib/raw-deflate.
|
||||
dir=$(mktemp -d)
|
||||
trap 'rm -rf "$dir"' EXIT
|
||||
|
||||
httrack -O /dev/null -#test=acceptencoding "$dir" run |
|
||||
grep -q "acceptencoding self-test OK"
|
||||
@@ -49,6 +49,7 @@ TESTS = \
|
||||
01_engine-strsafe.test \
|
||||
01_engine-urlhack.test \
|
||||
01_engine-useragent.test \
|
||||
01_zlib-acceptencoding.test \
|
||||
01_zlib-cache.test \
|
||||
01_zlib-cache-golden.test \
|
||||
01_zlib-cache-writefail.test \
|
||||
|
||||
Reference in New Issue
Block a user