mirror of
https://github.com/xroche/httrack.git
synced 2026-06-21 01:28:35 +03:00
Compare commits
9 Commits
fix/copy-h
...
fix/proxy-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c52a524a63 | ||
|
|
1907621d37 | ||
|
|
3b2d7afdaa | ||
|
|
6ee539619e | ||
|
|
fb098b27b4 | ||
|
|
5f6a3fb917 | ||
|
|
f9e676dbe3 | ||
|
|
1b440c44b5 | ||
|
|
ac6dd1a570 |
@@ -2532,8 +2532,26 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
|||||||
#if HTS_USEOPENSSL
|
#if HTS_USEOPENSSL
|
||||||
/* SSL mode */
|
/* SSL mode */
|
||||||
if (back[i].r.ssl) {
|
if (back[i].r.ssl) {
|
||||||
|
int tunnel_ok = 1;
|
||||||
|
|
||||||
|
// https via proxy: CONNECT-tunnel before TLS (#85)
|
||||||
|
if (back[i].r.req.proxy.active && back[i].r.ssl_con == NULL) {
|
||||||
|
const int timeout = back[i].timeout > 0 ? back[i].timeout : 30;
|
||||||
|
|
||||||
|
tunnel_ok =
|
||||||
|
http_proxy_tunnel(opt, &back[i].r, back[i].url_adr, timeout);
|
||||||
|
if (!tunnel_ok) {
|
||||||
|
if (!strnotempty(back[i].r.msg))
|
||||||
|
strcpybuff(back[i].r.msg, "proxy CONNECT failed");
|
||||||
|
deletehttp(&back[i].r);
|
||||||
|
back[i].r.soc = INVALID_SOCKET;
|
||||||
|
back[i].r.statuscode = STATUSCODE_NON_FATAL;
|
||||||
|
back[i].status = STATUS_READY;
|
||||||
|
back_set_finished(sback, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
// handshake not yet launched
|
// handshake not yet launched
|
||||||
if (!back[i].r.ssl_con) {
|
if (tunnel_ok && !back[i].r.ssl_con) {
|
||||||
SSL_CTX_set_options(openssl_ctx, SSL_OP_ALL);
|
SSL_CTX_set_options(openssl_ctx, SSL_OP_ALL);
|
||||||
// new session
|
// new session
|
||||||
back[i].r.ssl_con = SSL_new(openssl_ctx);
|
back[i].r.ssl_con = SSL_new(openssl_ctx);
|
||||||
@@ -2551,7 +2569,7 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
|||||||
back[i].r.statuscode = STATUSCODE_SSL_HANDSHAKE;
|
back[i].r.statuscode = STATUSCODE_SSL_HANDSHAKE;
|
||||||
}
|
}
|
||||||
/* Error */
|
/* Error */
|
||||||
if (back[i].r.statuscode == STATUSCODE_SSL_HANDSHAKE) {
|
if (tunnel_ok && back[i].r.statuscode == STATUSCODE_SSL_HANDSHAKE) {
|
||||||
strcpybuff(back[i].r.msg, "bad SSL/TLS handshake");
|
strcpybuff(back[i].r.msg, "bad SSL/TLS handshake");
|
||||||
deletehttp(&back[i].r);
|
deletehttp(&back[i].r);
|
||||||
back[i].r.soc = INVALID_SOCKET;
|
back[i].r.soc = INVALID_SOCKET;
|
||||||
@@ -3838,7 +3856,7 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
|||||||
/* funny log for commandline users */
|
/* funny log for commandline users */
|
||||||
//if (!opt->quiet) {
|
//if (!opt->quiet) {
|
||||||
// petite animation
|
// petite animation
|
||||||
if (opt->verbosedisplay == 1) {
|
if (opt->verbosedisplay == HTS_VERBOSE_SIMPLE) {
|
||||||
if (back[i].status == STATUS_READY) {
|
if (back[i].status == STATUS_READY) {
|
||||||
if (back[i].r.statuscode == HTTP_OK)
|
if (back[i].r.statuscode == HTTP_OK)
|
||||||
printf("* %s%s (" LLintP " bytes) - OK" VT_CLREOL "\r",
|
printf("* %s%s (" LLintP " bytes) - OK" VT_CLREOL "\r",
|
||||||
|
|||||||
@@ -3342,7 +3342,8 @@ int back_fill(struct_back * sback, httrackp * opt, cache_back * cache,
|
|||||||
int ptr, int numero_passe) {
|
int ptr, int numero_passe) {
|
||||||
int n = back_pluggable_sockets(sback, opt);
|
int n = back_pluggable_sockets(sback, opt);
|
||||||
|
|
||||||
if (opt->savename_delayed == 2 && !opt->delayed_cached) /* cancel (always delayed) */
|
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD &&
|
||||||
|
!opt->delayed_cached) /* cancel (always delayed) */
|
||||||
return 0;
|
return 0;
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
int p;
|
int p;
|
||||||
@@ -3846,7 +3847,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
|
|||||||
a = opt->savename_type;
|
a = opt->savename_type;
|
||||||
b = opt->savename_83;
|
b = opt->savename_83;
|
||||||
opt->savename_type = 0;
|
opt->savename_type = 0;
|
||||||
opt->savename_83 = 0;
|
opt->savename_83 = HTS_SAVENAME_83_LONG;
|
||||||
// note: adr,fil peuvent être patchés
|
// note: adr,fil peuvent être patchés
|
||||||
r =
|
r =
|
||||||
url_savename(&afs, NULL, NULL, NULL, opt, sback, cache, hashptr, ptr, numero_passe,
|
url_savename(&afs, NULL, NULL, NULL, opt, sback, cache, hashptr, ptr, numero_passe,
|
||||||
|
|||||||
@@ -612,12 +612,12 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
/* Terminal is a tty, may ask questions and display funny information */
|
/* Terminal is a tty, may ask questions and display funny information */
|
||||||
if (isatty(1)) {
|
if (isatty(1)) {
|
||||||
opt->quiet = 0;
|
opt->quiet = 0;
|
||||||
opt->verbosedisplay = 1;
|
opt->verbosedisplay = HTS_VERBOSE_SIMPLE;
|
||||||
}
|
}
|
||||||
/* Not a tty, no stdin input or funny output! */
|
/* Not a tty, no stdin input or funny output! */
|
||||||
else {
|
else {
|
||||||
opt->quiet = 1;
|
opt->quiet = 1;
|
||||||
opt->verbosedisplay = 0;
|
opt->verbosedisplay = HTS_VERBOSE_NONE;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -953,9 +953,11 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
p = buff;
|
p = buff;
|
||||||
do {
|
do {
|
||||||
int insert_after_argc;
|
int insert_after_argc;
|
||||||
|
int quoted; /* "" unquotes to empty but is still a real token (#106) */
|
||||||
|
|
||||||
// read next
|
// read next
|
||||||
lastp = p;
|
lastp = p;
|
||||||
|
quoted = (p != NULL && *p == '"');
|
||||||
if (p) {
|
if (p) {
|
||||||
p = next_token(p, 1);
|
p = next_token(p, 1);
|
||||||
if (p) {
|
if (p) {
|
||||||
@@ -966,7 +968,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
|
|
||||||
/* Insert parameters BUT so that they can be in the same order */
|
/* Insert parameters BUT so that they can be in the same order */
|
||||||
if (lastp) {
|
if (lastp) {
|
||||||
if (strnotempty(lastp)) {
|
if (strnotempty(lastp) || quoted) {
|
||||||
insert_after_argc = argc - insert_after;
|
insert_after_argc = argc - insert_after;
|
||||||
cmdl_ins(lastp, insert_after_argc, (argv + insert_after), x_argvblk,
|
cmdl_ins(lastp, insert_after_argc, (argv + insert_after), x_argvblk,
|
||||||
x_argvblk_size, x_ptr);
|
x_argvblk_size, x_ptr);
|
||||||
@@ -1815,24 +1817,22 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
com++;
|
com++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'L':
|
case 'L': {
|
||||||
{
|
sscanf(com + 1, "%d", (int *) &opt->savename_83);
|
||||||
sscanf(com + 1, "%d", &opt->savename_83);
|
switch (opt->savename_83) {
|
||||||
switch (opt->savename_83) {
|
case 0: // 8-3 (ISO9660 L1)
|
||||||
case 0: // 8-3 (ISO9660 L1)
|
opt->savename_83 = HTS_SAVENAME_83_DOS;
|
||||||
opt->savename_83 = 1;
|
break;
|
||||||
break;
|
case 1:
|
||||||
case 1:
|
opt->savename_83 = HTS_SAVENAME_83_LONG;
|
||||||
opt->savename_83 = 0;
|
break;
|
||||||
break;
|
default: // 2 == ISO9660 (ISO9660 L2)
|
||||||
default: // 2 == ISO9660 (ISO9660 L2)
|
opt->savename_83 = HTS_SAVENAME_83_ISO9660;
|
||||||
opt->savename_83 = 2;
|
break;
|
||||||
break;
|
|
||||||
}
|
|
||||||
while(isdigit((unsigned char) *(com + 1)))
|
|
||||||
com++;
|
|
||||||
}
|
}
|
||||||
break;
|
while (isdigit((unsigned char) *(com + 1)))
|
||||||
|
com++;
|
||||||
|
} break;
|
||||||
case 's':
|
case 's':
|
||||||
if (isdigit((unsigned char) *(com + 1))) {
|
if (isdigit((unsigned char) *(com + 1))) {
|
||||||
sscanf(com + 1, "%d", (int *) &opt->robots);
|
sscanf(com + 1, "%d", (int *) &opt->robots);
|
||||||
@@ -1989,7 +1989,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
}
|
}
|
||||||
break; // url hack
|
break; // url hack
|
||||||
case 'v':
|
case 'v':
|
||||||
opt->verbosedisplay = 2;
|
opt->verbosedisplay = HTS_VERBOSE_FULL;
|
||||||
if (isdigit((unsigned char) *(com + 1))) {
|
if (isdigit((unsigned char) *(com + 1))) {
|
||||||
sscanf(com + 1, "%d", (int *) &opt->verbosedisplay);
|
sscanf(com + 1, "%d", (int *) &opt->verbosedisplay);
|
||||||
while(isdigit((unsigned char) *(com + 1)))
|
while(isdigit((unsigned char) *(com + 1)))
|
||||||
@@ -2004,7 +2004,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'N':
|
case 'N':
|
||||||
opt->savename_delayed = 2;
|
opt->savename_delayed = HTS_SAVENAME_DELAYED_HARD;
|
||||||
if (isdigit((unsigned char) *(com + 1))) {
|
if (isdigit((unsigned char) *(com + 1))) {
|
||||||
sscanf(com + 1, "%d", (int *) &opt->savename_delayed);
|
sscanf(com + 1, "%d", (int *) &opt->savename_delayed);
|
||||||
while(isdigit((unsigned char) *(com + 1)))
|
while(isdigit((unsigned char) *(com + 1)))
|
||||||
@@ -3131,6 +3131,43 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
|||||||
htsmain_free();
|
htsmain_free();
|
||||||
return err;
|
return err;
|
||||||
} break;
|
} break;
|
||||||
|
case 'Q': { // cookie request-header selftest: httrack -#Q
|
||||||
|
static t_cookie cookie;
|
||||||
|
char hdr[1024];
|
||||||
|
/* RFC 6265: bare name=value pairs, no $Version/$Path (#151). */
|
||||||
|
const char *expected = "Cookie: name=value; has_js=1" H_CRLF;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
const char *dom = "www.example.com";
|
||||||
|
int added;
|
||||||
|
|
||||||
|
cookie.max_len = (int) sizeof(cookie.data);
|
||||||
|
cookie.data[0] = '\0';
|
||||||
|
added = cookie_add(&cookie, "name", "value", dom, "/");
|
||||||
|
added |= cookie_add(&cookie, "has_js", "1", dom, "/");
|
||||||
|
/* different domain: must be filtered out */
|
||||||
|
added |= cookie_add(&cookie, "junk", "x", "other.org", "/");
|
||||||
|
if (added) {
|
||||||
|
printf("cookie-header: FAIL (cookie_add setup)\n");
|
||||||
|
htsmain_free();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
http_cookie_header_selftest(&cookie, dom, "/", hdr,
|
||||||
|
sizeof(hdr));
|
||||||
|
if (strcmp(hdr, expected) != 0)
|
||||||
|
err = 1;
|
||||||
|
if (strstr(hdr, "$Version") != NULL ||
|
||||||
|
strstr(hdr, "$Path") != NULL)
|
||||||
|
err = 1;
|
||||||
|
if (strstr(hdr, "junk") != NULL) // wrong-domain cookie leaked
|
||||||
|
err = 1;
|
||||||
|
printf("cookie-header: %s\n", err ? "FAIL" : "OK");
|
||||||
|
if (err)
|
||||||
|
printf(" got: %s\n", hdr);
|
||||||
|
htsmain_free();
|
||||||
|
return err;
|
||||||
|
} break;
|
||||||
case '!':
|
case '!':
|
||||||
HTS_PANIC_PRINTF
|
HTS_PANIC_PRINTF
|
||||||
("Option #! is disabled for security reasons");
|
("Option #! is disabled for security reasons");
|
||||||
|
|||||||
273
src/htslib.c
273
src/htslib.c
@@ -644,6 +644,165 @@ T_SOC http_fopen(httrackp * opt, const char *adr, const char *fil, htsblk * reto
|
|||||||
return http_xfopen(opt, 0, 1, 1, NULL, adr, fil, retour);
|
return http_xfopen(opt, 0, 1, 1, NULL, adr, fil, retour);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read a CRLF line from a non-blocking socket (waits up to timeout per recv).
|
||||||
|
// Returns the line length (0 = empty), or -1 on timeout/EOF/error.
|
||||||
|
static int proxy_getline(T_SOC soc, char *s, int max, int timeout) {
|
||||||
|
int j = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
unsigned char ch;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if (!check_readinput_t(soc, timeout))
|
||||||
|
return -1; // timed out waiting for data
|
||||||
|
n = (int) recv(soc, &ch, 1, 0);
|
||||||
|
if (n == 1) {
|
||||||
|
if (ch == 13) // CR
|
||||||
|
continue;
|
||||||
|
if (ch == 10) // LF: end of line
|
||||||
|
break;
|
||||||
|
if (j >= max - 1)
|
||||||
|
return -1; // line too long: bound the read against a hostile proxy
|
||||||
|
s[j++] = (char) ch;
|
||||||
|
} else if (n == 0) {
|
||||||
|
return -1; // connection closed
|
||||||
|
} else {
|
||||||
|
#ifdef _WIN32
|
||||||
|
if (WSAGetLastError() == WSAEWOULDBLOCK)
|
||||||
|
continue;
|
||||||
|
#else
|
||||||
|
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
|
||||||
|
continue;
|
||||||
|
#endif
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s[j] = '\0';
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
|
||||||
|
int http_proxy_tunnel(httrackp *opt, htsblk *retour, const char *adr,
|
||||||
|
int timeout) {
|
||||||
|
const T_SOC soc = retour->soc;
|
||||||
|
const char *const host = jump_identification_const(adr); // host[:port]
|
||||||
|
const char *const portsep = jump_toport_const(adr); // ":port" or NULL
|
||||||
|
char BIGSTK authority[HTS_URLMAXSIZE * 2];
|
||||||
|
char BIGSTK req[HTS_URLMAXSIZE * 4 + 1100];
|
||||||
|
char line[1024];
|
||||||
|
int code;
|
||||||
|
|
||||||
|
if (soc == INVALID_SOCKET)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// CONNECT needs an explicit host:port; default the https port
|
||||||
|
authority[0] = '\0';
|
||||||
|
if (portsep != NULL)
|
||||||
|
strlcatbuff(authority, host, sizeof(authority)); // already host:port
|
||||||
|
else
|
||||||
|
snprintf(authority, sizeof(authority), "%s:%d", host, 443);
|
||||||
|
|
||||||
|
// backstop: never let a stray CR/LF in the host smuggle a second line into
|
||||||
|
// the CONNECT request (the host is already sanitized upstream)
|
||||||
|
{
|
||||||
|
const char *c;
|
||||||
|
|
||||||
|
for (c = authority; *c != '\0'; c++) {
|
||||||
|
if ((unsigned char) *c < ' ') {
|
||||||
|
strcpybuff(retour->msg, "proxy CONNECT: invalid host");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(req, sizeof(req), "CONNECT %s HTTP/1.0" H_CRLF "Host: %s" H_CRLF,
|
||||||
|
authority, authority);
|
||||||
|
|
||||||
|
// creds go on the CONNECT, not the tunneled origin request
|
||||||
|
if (link_has_authorization(retour->req.proxy.name)) {
|
||||||
|
const char *a = jump_identification_const(retour->req.proxy.name);
|
||||||
|
const char *astart = jump_protocol_const(retour->req.proxy.name);
|
||||||
|
char autorisation[1100];
|
||||||
|
char user_pass[256];
|
||||||
|
|
||||||
|
autorisation[0] = user_pass[0] = '\0';
|
||||||
|
strncatbuff(user_pass, astart, (int) (a - astart) - 1);
|
||||||
|
strcpybuff(user_pass, unescape_http(OPT_GET_BUFF(opt),
|
||||||
|
OPT_GET_BUFF_SIZE(opt), user_pass));
|
||||||
|
code64((unsigned char *) user_pass, (int) strlen(user_pass),
|
||||||
|
(unsigned char *) autorisation, 0);
|
||||||
|
strlcatbuff(req, "Proxy-Authorization: Basic ", sizeof(req));
|
||||||
|
strlcatbuff(req, autorisation, sizeof(req));
|
||||||
|
strlcatbuff(req, H_CRLF, sizeof(req));
|
||||||
|
}
|
||||||
|
strlcatbuff(req, H_CRLF, sizeof(req)); // end of request headers
|
||||||
|
|
||||||
|
// raw send: ssl is set, so sendc() would route to TLS
|
||||||
|
{
|
||||||
|
const char *p = req;
|
||||||
|
size_t remain = strlen(req);
|
||||||
|
int stalls = 0;
|
||||||
|
|
||||||
|
while (remain > 0) {
|
||||||
|
const int n = (int) send(soc, p, (int) remain, 0);
|
||||||
|
|
||||||
|
if (n > 0) {
|
||||||
|
p += n;
|
||||||
|
remain -= (size_t) n;
|
||||||
|
stalls = 0;
|
||||||
|
} else {
|
||||||
|
#ifdef _WIN32
|
||||||
|
const int wouldblock = (WSAGetLastError() == WSAEWOULDBLOCK);
|
||||||
|
#else
|
||||||
|
const int wouldblock =
|
||||||
|
(errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR);
|
||||||
|
#endif
|
||||||
|
// don't spin forever on a fatal error or an unwritable socket
|
||||||
|
if (!wouldblock || !check_writeinput_t(soc, timeout) ||
|
||||||
|
++stalls > 100) {
|
||||||
|
strcpybuff(retour->msg, "proxy CONNECT: write error");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// proxy status line: "HTTP/1.x <code> ..."
|
||||||
|
if (proxy_getline(soc, line, sizeof(line), timeout) < 0) {
|
||||||
|
strcpybuff(retour->msg, "proxy CONNECT: no response");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (sscanf(line, "HTTP/%*d.%*d %d", &code) < 1)
|
||||||
|
code = 0;
|
||||||
|
if (code < 200 || code >= 300) {
|
||||||
|
snprintf(retour->msg, sizeof(retour->msg), "proxy CONNECT refused: %s",
|
||||||
|
strnotempty(line) ? line : "(no status)");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// drain headers to the blank line; cap the count so a flooding proxy can't
|
||||||
|
// stall the crawl
|
||||||
|
{
|
||||||
|
int headers = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
const int n = proxy_getline(soc, line, sizeof(line), timeout);
|
||||||
|
|
||||||
|
if (n < 0) {
|
||||||
|
strcpybuff(retour->msg, "proxy CONNECT: truncated response");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (n == 0)
|
||||||
|
break; // blank line: tunnel ready
|
||||||
|
if (++headers > 64) {
|
||||||
|
strcpybuff(retour->msg, "proxy CONNECT: too many response headers");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// ouverture d'une liaison http, envoi d'une requète
|
// ouverture d'une liaison http, envoi d'une requète
|
||||||
// mode: 0 GET 1 HEAD [2 POST]
|
// mode: 0 GET 1 HEAD [2 POST]
|
||||||
// treat: traiter header?
|
// treat: traiter header?
|
||||||
@@ -680,14 +839,14 @@ T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect,
|
|||||||
|
|
||||||
/* connexion */
|
/* connexion */
|
||||||
if (retour) {
|
if (retour) {
|
||||||
if ((!(retour->req.proxy.active))
|
/* no proxy, or proxy not usable here (local file) */
|
||||||
|| ((strcmp(adr, "file://") == 0)
|
if ((!(retour->req.proxy.active)) || (strcmp(adr, "file://") == 0)) {
|
||||||
|| (strncmp(adr, "https://", 8) == 0)
|
|
||||||
)
|
|
||||||
) { /* pas de proxy, ou non utilisable ici */
|
|
||||||
soc = newhttp(opt, adr, retour, -1, waitconnect);
|
soc = newhttp(opt, adr, retour, -1, waitconnect);
|
||||||
} else {
|
} else {
|
||||||
soc = newhttp(opt, retour->req.proxy.name, retour, retour->req.proxy.port, waitconnect); // ouvrir sur le proxy à la place
|
// to the proxy; https tunnels to the origin via CONNECT in back_wait
|
||||||
|
// (#85)
|
||||||
|
soc = newhttp(opt, retour->req.proxy.name, retour, retour->req.proxy.port,
|
||||||
|
waitconnect);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
soc = newhttp(opt, adr, NULL, -1, waitconnect);
|
soc = newhttp(opt, adr, NULL, -1, waitconnect);
|
||||||
@@ -874,6 +1033,50 @@ static void print_buffer(buff_struct*const str, const char *format, ...) {
|
|||||||
assertf(str->pos < str->capacity);
|
assertf(str->pos < str->capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Append the request "Cookie:" header line for every stored cookie matching
|
||||||
|
domain/path. RFC 6265 form: bare "name=value" pairs joined by "; ", no
|
||||||
|
$Version/$Path attributes (those are RFC 2965 syntax that modern servers
|
||||||
|
reject, issue #151). Returns the number of cookies emitted. */
|
||||||
|
static int append_cookie_header(buff_struct *bstr, t_cookie *cookie,
|
||||||
|
const char *domain, const char *path) {
|
||||||
|
char buffer[8192];
|
||||||
|
char *b;
|
||||||
|
int cook = 0;
|
||||||
|
int max_cookies = 8;
|
||||||
|
|
||||||
|
if (cookie == NULL)
|
||||||
|
return 0;
|
||||||
|
b = cookie->data;
|
||||||
|
do {
|
||||||
|
b = cookie_find(b, "", domain, path); // next matching cookie
|
||||||
|
if (b != NULL) {
|
||||||
|
max_cookies--;
|
||||||
|
if (!cook) {
|
||||||
|
print_buffer(bstr, "Cookie: ");
|
||||||
|
cook = 1;
|
||||||
|
} else
|
||||||
|
print_buffer(bstr, "; ");
|
||||||
|
print_buffer(bstr, "%s", cookie_get(buffer, b, 5));
|
||||||
|
print_buffer(bstr, "=%s", cookie_get(buffer, b, 6));
|
||||||
|
b = cookie_nextfield(b);
|
||||||
|
}
|
||||||
|
} while (b != NULL && max_cookies > 0);
|
||||||
|
if (cook)
|
||||||
|
print_buffer(bstr, H_CRLF);
|
||||||
|
return cook;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Self-test entry for append_cookie_header(): build the request Cookie line
|
||||||
|
into dst (always NUL-terminated). Returns the number of cookies emitted. */
|
||||||
|
int http_cookie_header_selftest(t_cookie *cookie, const char *domain,
|
||||||
|
const char *path, char *dst, size_t dst_size) {
|
||||||
|
buff_struct bstr = {dst, dst_size, 0};
|
||||||
|
|
||||||
|
assertf(dst != NULL && dst_size > 0);
|
||||||
|
dst[0] = '\0';
|
||||||
|
return append_cookie_header(&bstr, cookie, domain, path);
|
||||||
|
}
|
||||||
|
|
||||||
// envoi d'une requète
|
// envoi d'une requète
|
||||||
int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
||||||
const char *xsend, const char *adr, const char *fil,
|
const char *xsend, const char *adr, const char *fil,
|
||||||
@@ -999,8 +1202,8 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
|||||||
if (xsend)
|
if (xsend)
|
||||||
print_buffer(&bstr, "%s", xsend); // éventuelles autres lignes
|
print_buffer(&bstr, "%s", xsend); // éventuelles autres lignes
|
||||||
|
|
||||||
// tester proxy authentication
|
// for https, auth rides the CONNECT (the tunneled GET would leak it)
|
||||||
if (retour->req.proxy.active) {
|
if (retour->req.proxy.active && strncmp(adr, "https://", 8) != 0) {
|
||||||
if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy!
|
if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy!
|
||||||
const char *a = jump_identification_const(retour->req.proxy.name);
|
const char *a = jump_identification_const(retour->req.proxy.name);
|
||||||
const char *astart = jump_protocol_const(retour->req.proxy.name);
|
const char *astart = jump_protocol_const(retour->req.proxy.name);
|
||||||
@@ -1048,34 +1251,9 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
|||||||
search_tag + strlen(POSTTOK) + 1))));
|
search_tag + strlen(POSTTOK) + 1))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// gestion cookies?
|
// send stored cookies matching this host/path
|
||||||
if (cookie) {
|
if (cookie) {
|
||||||
char buffer[8192];
|
append_cookie_header(&bstr, cookie, jump_identification_const(adr), fil);
|
||||||
char *b = cookie->data;
|
|
||||||
int cook = 0;
|
|
||||||
int max_cookies = 8;
|
|
||||||
|
|
||||||
do {
|
|
||||||
b = cookie_find(b, "", jump_identification_const(adr), fil); // prochain cookie satisfaisant aux conditions
|
|
||||||
if (b != NULL) {
|
|
||||||
max_cookies--;
|
|
||||||
if (!cook) {
|
|
||||||
print_buffer(&bstr, "Cookie: $Version=1; ");
|
|
||||||
cook = 1;
|
|
||||||
} else
|
|
||||||
print_buffer(&bstr, "; ");
|
|
||||||
print_buffer(&bstr, "%s", cookie_get(buffer, b, 5));
|
|
||||||
print_buffer(&bstr, "=%s", cookie_get(buffer, b, 6));
|
|
||||||
print_buffer(&bstr, "; $Path=%s", cookie_get(buffer, b, 2));
|
|
||||||
b = cookie_nextfield(b);
|
|
||||||
}
|
|
||||||
} while(b != NULL && max_cookies > 0);
|
|
||||||
if (cook) { // on a envoyé un (ou plusieurs) cookie?
|
|
||||||
print_buffer(&bstr, H_CRLF);
|
|
||||||
#if DEBUG_COOK
|
|
||||||
printf("Header:\n%s\n", bstr.buffer);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// gérer le keep-alive (garder socket)
|
// gérer le keep-alive (garder socket)
|
||||||
if (retour->req.http11 && !retour->req.nokeepalive) {
|
if (retour->req.http11 && !retour->req.nokeepalive) {
|
||||||
@@ -1808,6 +1986,24 @@ int check_readinput_t(T_SOC soc, int timeout) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wait until the socket is writable, up to timeout seconds
|
||||||
|
int check_writeinput_t(T_SOC soc, int timeout) {
|
||||||
|
if (soc != INVALID_SOCKET) {
|
||||||
|
fd_set fds;
|
||||||
|
struct timeval tv;
|
||||||
|
const int isoc = (int) soc;
|
||||||
|
|
||||||
|
assertf(isoc == soc);
|
||||||
|
FD_ZERO(&fds);
|
||||||
|
FD_SET(isoc, &fds);
|
||||||
|
tv.tv_sec = timeout;
|
||||||
|
tv.tv_usec = 0;
|
||||||
|
select(isoc + 1, NULL, &fds, NULL, &tv);
|
||||||
|
return FD_ISSET(isoc, &fds) ? 1 : 0;
|
||||||
|
} else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// idem, sauf qu'ici on peut choisir la taille max de données à recevoir
|
// idem, sauf qu'ici on peut choisir la taille max de données à recevoir
|
||||||
// SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
|
// SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
|
||||||
// en éliminant les cr (ex: header), arrêt si double-lf
|
// en éliminant les cr (ex: header), arrêt si double-lf
|
||||||
@@ -5468,9 +5664,10 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
|||||||
"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
|
"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
|
||||||
StringCopy(opt->referer, "");
|
StringCopy(opt->referer, "");
|
||||||
StringCopy(opt->from, "");
|
StringCopy(opt->from, "");
|
||||||
opt->savename_83 = 0; // noms longs par défaut
|
opt->savename_83 = HTS_SAVENAME_83_LONG; // long names by default
|
||||||
opt->savename_type = 0; // avec structure originale
|
opt->savename_type = 0; // avec structure originale
|
||||||
opt->savename_delayed = 2; // hard delayed type (default)
|
opt->savename_delayed =
|
||||||
|
HTS_SAVENAME_DELAYED_HARD; // always delay the type check (default)
|
||||||
opt->delayed_cached = HTS_TRUE;
|
opt->delayed_cached = HTS_TRUE;
|
||||||
opt->mimehtml = HTS_FALSE;
|
opt->mimehtml = HTS_FALSE;
|
||||||
opt->parsejava = HTSPARSE_DEFAULT; // parser classes
|
opt->parsejava = HTSPARSE_DEFAULT; // parser classes
|
||||||
@@ -5495,7 +5692,7 @@ HTSEXT_API httrackp *hts_create_opt(void) {
|
|||||||
opt->parseall = HTS_TRUE;
|
opt->parseall = HTS_TRUE;
|
||||||
opt->parsedebug = HTS_FALSE;
|
opt->parsedebug = HTS_FALSE;
|
||||||
opt->norecatch = HTS_FALSE;
|
opt->norecatch = HTS_FALSE;
|
||||||
opt->verbosedisplay = 0; // pas d'animation texte
|
opt->verbosedisplay = HTS_VERBOSE_NONE; // no text animation
|
||||||
opt->sizehack = HTS_FALSE;
|
opt->sizehack = HTS_FALSE;
|
||||||
opt->urlhack = HTS_TRUE;
|
opt->urlhack = HTS_TRUE;
|
||||||
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
|
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
|
||||||
|
|||||||
16
src/htslib.h
16
src/htslib.h
@@ -182,6 +182,11 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode, const char *xsend
|
|||||||
const char *adr, const char *fil,
|
const char *adr, const char *fil,
|
||||||
const char *referer_adr, const char *referer_fil,
|
const char *referer_adr, const char *referer_fil,
|
||||||
htsblk * retour);
|
htsblk * retour);
|
||||||
|
/* Build the request "Cookie:" header line for stored cookies matching
|
||||||
|
domain/path into dst (NUL-terminated). Exposed for the -#Q self-test;
|
||||||
|
wraps the same logic http_sendhead() uses. Returns cookies emitted. */
|
||||||
|
int http_cookie_header_selftest(t_cookie *cookie, const char *domain,
|
||||||
|
const char *path, char *dst, size_t dst_size);
|
||||||
|
|
||||||
//int newhttp(char* iadr,char* err=NULL);
|
//int newhttp(char* iadr,char* err=NULL);
|
||||||
T_SOC newhttp(httrackp * opt, const char *iadr, htsblk * retour, int port,
|
T_SOC newhttp(httrackp * opt, const char *iadr, htsblk * retour, int port,
|
||||||
@@ -193,6 +198,17 @@ HTS_INLINE void deletesoc_r(htsblk * r);
|
|||||||
htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc);
|
htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc);
|
||||||
int check_readinput(htsblk * r);
|
int check_readinput(htsblk * r);
|
||||||
int check_readinput_t(T_SOC soc, int timeout);
|
int check_readinput_t(T_SOC soc, int timeout);
|
||||||
|
int check_writeinput_t(T_SOC soc, int timeout);
|
||||||
|
|
||||||
|
/* Open an HTTP CONNECT tunnel through the active proxy for an https request:
|
||||||
|
`retour->soc` must already be TCP-connected to the proxy, and `adr` is the
|
||||||
|
origin authority (url_adr, e.g. "https://host:port"). Sends the CONNECT
|
||||||
|
request (with Proxy-Authorization when the proxy carries credentials) and
|
||||||
|
reads the proxy's status line, so the caller's TLS handshake then runs
|
||||||
|
end-to-end with the origin. Blocks up to `timeout` seconds. Returns 1 on a
|
||||||
|
2xx tunnel, 0 on failure (retour->msg/statuscode set). */
|
||||||
|
int http_proxy_tunnel(httrackp *opt, htsblk *retour, const char *adr,
|
||||||
|
int timeout);
|
||||||
void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * retour,
|
void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * retour,
|
||||||
char *rcvd);
|
char *rcvd);
|
||||||
void treatfirstline(htsblk * retour, const char *rcvd);
|
void treatfirstline(htsblk * retour, const char *rcvd);
|
||||||
|
|||||||
@@ -184,10 +184,11 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
|
|
||||||
/* 8-3 ? */
|
/* 8-3 ? */
|
||||||
switch (opt->savename_83) {
|
switch (opt->savename_83) {
|
||||||
case 1: // 8-3
|
case HTS_SAVENAME_83_DOS: // 8-3
|
||||||
max_char = 8;
|
max_char = 8;
|
||||||
break;
|
break;
|
||||||
case 2: // Level 2 File names may be up to 31 characters.
|
case HTS_SAVENAME_83_ISO9660: // Level 2 File names may be up to 31
|
||||||
|
// characters.
|
||||||
max_char = 31;
|
max_char = 31;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -324,7 +325,7 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* replace shtml to html.. */
|
/* replace shtml to html.. */
|
||||||
if (opt->savename_delayed == 2)
|
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD)
|
||||||
is_html = -1; /* ALWAYS delay type */
|
is_html = -1; /* ALWAYS delay type */
|
||||||
else
|
else
|
||||||
is_html = ishtml(opt, fil);
|
is_html = ishtml(opt, fil);
|
||||||
@@ -363,7 +364,9 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
) {
|
) {
|
||||||
// tester type avec requète HEAD si on ne connait pas le type du fichier
|
// tester type avec requète HEAD si on ne connait pas le type du fichier
|
||||||
if (!((opt->check_type == 1) && (fil[strlen(fil) - 1] == '/'))) // slash doit être html?
|
if (!((opt->check_type == 1) && (fil[strlen(fil) - 1] == '/'))) // slash doit être html?
|
||||||
if (opt->savename_delayed == 2 || (ishtest = ishtml(opt, fil)) < 0) { // on ne sait pas si c'est un html ou un fichier..
|
if (opt->savename_delayed == HTS_SAVENAME_DELAYED_HARD ||
|
||||||
|
(ishtest = ishtml(opt, fil)) <
|
||||||
|
0) { // unsure whether it's html or a file
|
||||||
// lire dans le cache
|
// lire dans le cache
|
||||||
htsblk r = cache_read_including_broken(opt, cache, adr, fil); // test uniquement
|
htsblk r = cache_read_including_broken(opt, cache, adr, fil); // test uniquement
|
||||||
|
|
||||||
@@ -393,11 +396,12 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
//
|
//
|
||||||
} else if (opt->savename_delayed != 2 && is_userknowntype(opt, fil)) { /* PATCH BY BRIAN SCHRÖDER.
|
} else if (opt->savename_delayed != HTS_SAVENAME_DELAYED_HARD &&
|
||||||
Lookup mimetype not only by extension,
|
is_userknowntype(opt, fil)) { /* PATCH BY BRIAN SCHRÖDER.
|
||||||
but also by filename */
|
Lookup mimetype not only by extension,
|
||||||
/* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type,
|
but also by filename */
|
||||||
that is, ".html" */
|
/* Note: "foo.cgi => text/html" means that foo.cgi shall have the
|
||||||
|
text/html MIME file type, that is, ".html" */
|
||||||
char BIGSTK mime[1024];
|
char BIGSTK mime[1024];
|
||||||
|
|
||||||
mime[0] = ext[0] = '\0';
|
mime[0] = ext[0] = '\0';
|
||||||
@@ -408,9 +412,13 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// note: if savename_delayed is enabled, the naming will be temporary (and slightly invalid!)
|
// note: if savename_delayed is enabled, the naming will be temporary
|
||||||
// note: if we are about to stop (opt->state.stop), back_add() will fail later
|
// (and slightly invalid!)
|
||||||
else if (opt->savename_delayed != 0 && !opt->state.stop) {
|
//
|
||||||
|
// note: if we are about to stop (opt->state.stop), back_add() will
|
||||||
|
// fail later
|
||||||
|
else if (opt->savename_delayed != HTS_SAVENAME_DELAYED_NONE &&
|
||||||
|
!opt->state.stop) {
|
||||||
// Check if the file is ready in backing. We basically take the same logic as later.
|
// Check if the file is ready in backing. We basically take the same logic as later.
|
||||||
// FIXME: we should cleanup and factorize this unholy mess
|
// FIXME: we should cleanup and factorize this unholy mess
|
||||||
if (headers != NULL && headers->status >= 0 && !is_redirect) {
|
if (headers != NULL && headers->status >= 0 && !is_redirect) {
|
||||||
@@ -698,7 +706,7 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
}
|
}
|
||||||
// restaurer
|
// restaurer
|
||||||
opt->state._hts_in_html_parsing = hihp;
|
opt->state._hts_in_html_parsing = hihp;
|
||||||
} // caché?
|
} // caché?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1190,7 +1198,8 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
// Not used anymore unless non-delayed types.
|
// Not used anymore unless non-delayed types.
|
||||||
// de même en cas de manque d'extension on en place une de manière forcée..
|
// de même en cas de manque d'extension on en place une de manière forcée..
|
||||||
// cela évite les /chez/toto et les /chez/toto/index.html incompatibles
|
// cela évite les /chez/toto et les /chez/toto/index.html incompatibles
|
||||||
if (opt->savename_type != -1 && opt->savename_delayed != 2) {
|
if (opt->savename_type != -1 &&
|
||||||
|
opt->savename_delayed != HTS_SAVENAME_DELAYED_HARD) {
|
||||||
char *a = afs->save + strlen(afs->save) - 1;
|
char *a = afs->save + strlen(afs->save) - 1;
|
||||||
|
|
||||||
while((a > afs->save) && (*a != '.') && (*a != '/'))
|
while((a > afs->save) && (*a != '.') && (*a != '/'))
|
||||||
@@ -1236,31 +1245,21 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
size_t i;
|
size_t i;
|
||||||
for(i = 0 ; afs->save[i] != '\0' ; i++) {
|
for(i = 0 ; afs->save[i] != '\0' ; i++) {
|
||||||
unsigned char c = (unsigned char) afs->save[i];
|
unsigned char c = (unsigned char) afs->save[i];
|
||||||
if (c < 32 // control
|
if (c < 32 // control
|
||||||
|| c == 127 // unwise
|
|| c == 127 // unwise
|
||||||
|| c == '~' // unix unwise
|
|| c == '~' // unix unwise
|
||||||
|| c == '\\' // windows separator
|
|| c == '\\' // windows separator
|
||||||
|| c == ':' // windows forbidden
|
|| c == ':' // windows forbidden
|
||||||
|| c == '*' // windows forbidden
|
|| c == '*' // windows forbidden
|
||||||
|| c == '?' // windows forbidden
|
|| c == '?' // windows forbidden
|
||||||
|| c == '\"' // windows forbidden
|
|| c == '\"' // windows forbidden
|
||||||
|| c == '<' // windows forbidden
|
|| c == '<' // windows forbidden
|
||||||
|| c == '>' // windows forbidden
|
|| c == '>' // windows forbidden
|
||||||
|| c == '|' // windows forbidden
|
|| c == '|' // windows forbidden
|
||||||
//|| c == '@' // ?
|
//|| c == '@' // ?
|
||||||
||
|
|| (opt->savename_83 == HTS_SAVENAME_83_ISO9660 // CDROM
|
||||||
(
|
&& (c == '-' || c == '=' || c == '+'))) {
|
||||||
opt->savename_83 == 2 // CDROM
|
afs->save[i] = '_';
|
||||||
&&
|
|
||||||
(
|
|
||||||
c == '-'
|
|
||||||
|| c == '='
|
|
||||||
|| c == '+'
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
{
|
|
||||||
afs->save[i] = '_';
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1521,7 +1520,8 @@ int url_savename(lien_adrfilsave *const afs,
|
|||||||
char *a = afs->save + strlen(afs->save) - 1;
|
char *a = afs->save + strlen(afs->save) - 1;
|
||||||
char *b;
|
char *b;
|
||||||
int n = 2;
|
int n = 2;
|
||||||
char collisionSeparator = ((opt->savename_83 != 2) ? '-' : '_');
|
char collisionSeparator =
|
||||||
|
((opt->savename_83 != HTS_SAVENAME_83_ISO9660) ? '-' : '_');
|
||||||
|
|
||||||
tempo[0] = '\0';
|
tempo[0] = '\0';
|
||||||
|
|
||||||
|
|||||||
10
src/htsopt.h
10
src/htsopt.h
@@ -368,6 +368,13 @@ typedef enum hts_savename_delayed {
|
|||||||
HTS_SAVENAME_DELAYED_HARD = 2 /**< always delay the type check (default) */
|
HTS_SAVENAME_DELAYED_HARD = 2 /**< always delay the type check (default) */
|
||||||
} hts_savename_delayed;
|
} hts_savename_delayed;
|
||||||
|
|
||||||
|
/* Saved-name length layout (opt->savename_83). */
|
||||||
|
typedef enum hts_savename_83 {
|
||||||
|
HTS_SAVENAME_83_LONG = 0, /**< long file names (default) */
|
||||||
|
HTS_SAVENAME_83_DOS = 1, /**< DOS 8.3 names (ISO9660 level 1) */
|
||||||
|
HTS_SAVENAME_83_ISO9660 = 2 /**< ISO9660 level 2 names (up to 31 chars) */
|
||||||
|
} hts_savename_83;
|
||||||
|
|
||||||
/* Host-banning triggers (opt->hostcontrol bitmask). */
|
/* Host-banning triggers (opt->hostcontrol bitmask). */
|
||||||
typedef enum hts_hostcontrol {
|
typedef enum hts_hostcontrol {
|
||||||
HTS_HOSTCONTROL_BAN_TIMEOUT = 1 << 0, /**< ban a timing-out host */
|
HTS_HOSTCONTROL_BAN_TIMEOUT = 1 << 0, /**< ban a timing-out host */
|
||||||
@@ -430,7 +437,8 @@ struct httrackp {
|
|||||||
// int aff_progress; // progress bar
|
// int aff_progress; // progress bar
|
||||||
hts_boolean shell; /**< driven by a shell over stdin/stdout pipes */
|
hts_boolean shell; /**< driven by a shell over stdin/stdout pipes */
|
||||||
t_proxy proxy; /**< proxy configuration */
|
t_proxy proxy; /**< proxy configuration */
|
||||||
int savename_83; /**< force 8.3 (DOS) file names */
|
hts_savename_83
|
||||||
|
savename_83; /**< saved-name length layout (long/DOS/ISO9660) */
|
||||||
int savename_type; /**< saved-name layout (original tree, flat, ...) */
|
int savename_type; /**< saved-name layout (original tree, flat, ...) */
|
||||||
String
|
String
|
||||||
savename_userdef; /**< user-defined name template (e.g. %h%p/%n%q.%t) */
|
savename_userdef; /**< user-defined name template (e.g. %h%p/%n%q.%t) */
|
||||||
|
|||||||
@@ -4262,10 +4262,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
|
|||||||
char com[256];
|
char com[256];
|
||||||
|
|
||||||
linput(stdin, com, 200);
|
linput(stdin, com, 200);
|
||||||
if (opt->verbosedisplay == 2)
|
if (opt->verbosedisplay == HTS_VERBOSE_FULL)
|
||||||
opt->verbosedisplay = 1;
|
opt->verbosedisplay = HTS_VERBOSE_SIMPLE;
|
||||||
else
|
else
|
||||||
opt->verbosedisplay = 2;
|
opt->verbosedisplay = HTS_VERBOSE_FULL;
|
||||||
/* Info for wrappers */
|
/* Info for wrappers */
|
||||||
hts_log_print(opt, LOG_INFO, "engine: change-options");
|
hts_log_print(opt, LOG_INFO, "engine: change-options");
|
||||||
RUN_CALLBACK0(opt, chopt);
|
RUN_CALLBACK0(opt, chopt);
|
||||||
@@ -4375,7 +4375,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
|
|||||||
printf("%c\x0d", ("/-\\|")[roll]);
|
printf("%c\x0d", ("/-\\|")[roll]);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
} else if (opt->verbosedisplay == 1) {
|
} else if (opt->verbosedisplay == HTS_VERBOSE_SIMPLE) {
|
||||||
if (b >= 0) {
|
if (b >= 0) {
|
||||||
if (back[b].r.statuscode == HTTP_OK)
|
if (back[b].r.statuscode == HTTP_OK)
|
||||||
printf("%d/%d: %s%s (" LLintP " bytes) - OK\33[K\r", ptr, opt->lien_tot,
|
printf("%d/%d: %s%s (" LLintP " bytes) - OK\33[K\r", ptr, opt->lien_tot,
|
||||||
@@ -4466,8 +4466,8 @@ int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,
|
|||||||
char in_error_msg[32];
|
char in_error_msg[32];
|
||||||
|
|
||||||
// resolve unresolved type
|
// resolve unresolved type
|
||||||
if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(afs->save)
|
if (opt->savename_delayed != HTS_SAVENAME_DELAYED_NONE &&
|
||||||
&& !opt->state.stop) {
|
*forbidden_url == 0 && IS_DELAYED_EXT(afs->save) && !opt->state.stop) {
|
||||||
int loops;
|
int loops;
|
||||||
int continue_loop;
|
int continue_loop;
|
||||||
|
|
||||||
@@ -4851,7 +4851,7 @@ int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // delayed type check ?
|
} // delayed type check ?
|
||||||
|
|
||||||
ENGINE_SAVE_CONTEXT_BASE();
|
ENGINE_SAVE_CONTEXT_BASE();
|
||||||
|
|
||||||
|
|||||||
@@ -288,7 +288,7 @@ static void __cdecl htsshow_uninit(t_hts_callbackarg * carg) {
|
|||||||
}
|
}
|
||||||
static int __cdecl htsshow_start(t_hts_callbackarg * carg, httrackp * opt) {
|
static int __cdecl htsshow_start(t_hts_callbackarg * carg, httrackp * opt) {
|
||||||
use_show = 0;
|
use_show = 0;
|
||||||
if (opt->verbosedisplay == 2) {
|
if (opt->verbosedisplay == HTS_VERBOSE_FULL) {
|
||||||
use_show = 1;
|
use_show = 1;
|
||||||
vt_clear();
|
vt_clear();
|
||||||
}
|
}
|
||||||
@@ -852,7 +852,7 @@ static void sig_doback(int blind) { // mettre en backing
|
|||||||
if (global_opt != NULL) {
|
if (global_opt != NULL) {
|
||||||
// suppress logging and asking lousy questions
|
// suppress logging and asking lousy questions
|
||||||
global_opt->quiet = 1;
|
global_opt->quiet = 1;
|
||||||
global_opt->verbosedisplay = 0;
|
global_opt->verbosedisplay = HTS_VERBOSE_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!blind)
|
if (!blind)
|
||||||
|
|||||||
15
tests/01_engine-cookies.test
Executable file
15
tests/01_engine-cookies.test
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Issue #151 guard: the request Cookie header must be bare RFC 6265 name=value
|
||||||
|
# pairs, no $Version/$Path attributes. Driven by the 'httrack -#Q' selftest.
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# A trailing token is required; a bare '-#Q' falls through to the usage screen.
|
||||||
|
out=$(httrack -#Q run)
|
||||||
|
|
||||||
|
# Exact-match the success line so a fall-through to usage can't pass the test.
|
||||||
|
test "$out" = "cookie-header: OK" || {
|
||||||
|
echo "expected 'cookie-header: OK', got: $out" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
@@ -89,4 +89,37 @@ grep -q NEWCONTENT "$(find "$out" -path '*/a.html' -print -quit)" || {
|
|||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# --- 3. an empty quoted arg survives the doit.log round-trip (#106) ----------
|
||||||
|
# -%F "" (empty footer) records an empty "" token in doit.log; -r2 follows it so
|
||||||
|
# a "drop the empty token" bug shifts -r2 into -%F's slot (the reprise then sees
|
||||||
|
# -%F -r2 and panics "%F needs to be followed by ..."), making the bug visible
|
||||||
|
# rather than a harmless run off the end of argv.
|
||||||
|
out2="$tmp/out2"
|
||||||
|
rc=0
|
||||||
|
"$bin" "$url" -O "$out2" --quiet -n -%v0 -%F "" -r2 >/dev/null 2>&1 || rc=$?
|
||||||
|
test "$rc" -eq 0 || {
|
||||||
|
echo "FAIL: initial mirror with empty footer exited $rc"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
# precondition: the writer put the empty token on disk for the reader to reload.
|
||||||
|
grep -q ' -%F "" -r2' "$out2/hts-cache/doit.log" || {
|
||||||
|
echo "FAIL: empty footer not recorded as -%F \"\" -r2 in doit.log"
|
||||||
|
grep -- '-%F' "$out2/hts-cache/doit.log" || true
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
# no-url reprise: the reader rebuilds argv from doit.log and rewrites doit.log
|
||||||
|
# from it. The empty token surviving in the regenerated file proves the reader
|
||||||
|
# kept it (a drop/swallow would panic above or rewrite -%F without the "").
|
||||||
|
rc=0
|
||||||
|
"$bin" -O "$out2" --quiet >/dev/null 2>&1 || rc=$?
|
||||||
|
test "$rc" -eq 0 || {
|
||||||
|
echo "FAIL: empty-footer reprise exited $rc (empty token dropped from doit.log?)"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
grep -q ' -%F "" -r2' "$out2/hts-cache/doit.log" || {
|
||||||
|
echo "FAIL: empty footer did not survive the doit.log reload round-trip"
|
||||||
|
grep -- '-%F' "$out2/hts-cache/doit.log" || true
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
136
tests/13_crawl_proxy_https.test
Normal file
136
tests/13_crawl_proxy_https.test
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Issue #85: an https crawl must go through the configured proxy (CONNECT
|
||||||
|
# tunnel), not bypass it and hit the origin directly. Fully local: a self-signed
|
||||||
|
# TLS origin plus a logging CONNECT proxy, so no network access is needed.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
: "${top_srcdir:=..}"
|
||||||
|
|
||||||
|
if test "${HTTPS_SUPPORT:-}" == "no"; then
|
||||||
|
echo "no https support compiled, skipping"
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
if ! command -v python3 >/dev/null 2>&1 || ! command -v openssl >/dev/null 2>&1; then
|
||||||
|
echo "python3/openssl missing, skipping"
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
|
||||||
|
server="$top_srcdir/tests/proxy-https-server.py"
|
||||||
|
tmpdir=$(mktemp -d)
|
||||||
|
pids=
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
for pid in $pids; do
|
||||||
|
kill "$pid" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
rm -rf "$tmpdir"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# self-signed cert for the local TLS origin (httrack does not verify certs)
|
||||||
|
openssl req -x509 -newkey rsa:2048 -keyout "$tmpdir/key.pem" \
|
||||||
|
-out "$tmpdir/cert.pem" -days 2 -nodes -subj "/CN=127.0.0.1" \
|
||||||
|
>/dev/null 2>&1
|
||||||
|
cat "$tmpdir/key.pem" "$tmpdir/cert.pem" >"$tmpdir/both.pem"
|
||||||
|
|
||||||
|
# start_server <logdir> <mode>: launches a proxy+origin pair, sets $origin_port
|
||||||
|
# and $proxy_port from its announced ephemeral ports.
|
||||||
|
start_server() {
|
||||||
|
local dir="$1" mode="$2" ports
|
||||||
|
mkdir -p "$dir"
|
||||||
|
ports="$dir/ports.txt"
|
||||||
|
python3 "$server" "$tmpdir/both.pem" "$dir" "$mode" \
|
||||||
|
>"$ports" 2>"$dir/server.err" &
|
||||||
|
pids="$pids $!"
|
||||||
|
for _ in $(seq 1 100); do
|
||||||
|
grep -q "^ready" "$ports" 2>/dev/null && break
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
grep -q "^ready" "$ports" 2>/dev/null || {
|
||||||
|
echo "server ($mode) did not start" >&2
|
||||||
|
cat "$dir/server.err" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
origin_port=$(awk '/^ORIGIN/{print $2}' "$ports")
|
||||||
|
proxy_port=$(awk '/^PROXY/{print $2}' "$ports")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run httrack, but kill it after a deadline so a hang (e.g. a missing bound on
|
||||||
|
# the proxy response) surfaces as the kill code $HANG_RC instead of stalling the
|
||||||
|
# whole job. A portable stand-in for `timeout`, which macOS lacks.
|
||||||
|
HANG_RC=137 # 128 + SIGKILL
|
||||||
|
run_crawl() {
|
||||||
|
local out="$1" proxy="$2" port="$3"
|
||||||
|
rm -rf "$out"
|
||||||
|
httrack "https://127.0.0.1:${port}/" --proxy "$proxy" \
|
||||||
|
-O "$out" -r1 -s0 --timeout=10 >"$out.log" 2>&1 &
|
||||||
|
local pid=$!
|
||||||
|
(sleep 60 && kill -9 "$pid" 2>/dev/null) &
|
||||||
|
local guard=$!
|
||||||
|
local rc=0
|
||||||
|
wait "$pid" 2>/dev/null || rc=$?
|
||||||
|
kill "$guard" 2>/dev/null || true
|
||||||
|
wait "$guard" 2>/dev/null || true
|
||||||
|
return "$rc"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- working proxy ----------------------------------------------------------
|
||||||
|
ok="$tmpdir/ok"
|
||||||
|
start_server "$ok" ok
|
||||||
|
|
||||||
|
# 1. page retrieved AND the proxy saw a CONNECT to the origin
|
||||||
|
run_crawl "$ok/out" "127.0.0.1:${proxy_port}" "$origin_port"
|
||||||
|
grep -rq "ORIGIN-PAGE-85" "$ok/out" || {
|
||||||
|
echo "FAIL: origin page not downloaded through proxy" >&2
|
||||||
|
cat "$ok/out.log" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
grep -q "^CONNECT 127.0.0.1:${origin_port} " "$ok/proxy.log" || {
|
||||||
|
echo "FAIL: proxy never received a CONNECT (https bypassed the proxy)" >&2
|
||||||
|
cat "$ok/proxy.log" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
echo "OK: https tunneled through proxy via CONNECT"
|
||||||
|
|
||||||
|
# 2. authenticated proxy: creds ride the CONNECT, and NEVER reach the origin
|
||||||
|
: >"$ok/proxy.log"
|
||||||
|
: >"$ok/origin-headers.log"
|
||||||
|
run_crawl "$ok/out2" "user:secret@127.0.0.1:${proxy_port}" "$origin_port"
|
||||||
|
grep -rq "ORIGIN-PAGE-85" "$ok/out2" || {
|
||||||
|
echo "FAIL: origin page not downloaded through authenticated proxy" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
got=$(awk '/^AUTH Basic /{print $3}' "$ok/proxy.log" | head -1)
|
||||||
|
# base64("user:secret"); compared as a literal to stay portable (no base64 -d,
|
||||||
|
# which differs between GNU and BSD)
|
||||||
|
test "$got" == "dXNlcjpzZWNyZXQ=" || {
|
||||||
|
echo "FAIL: Proxy-Authorization not carried on CONNECT (got '$got')" >&2
|
||||||
|
cat "$ok/proxy.log" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
if grep -qi "proxy-authorization" "$ok/origin-headers.log"; then
|
||||||
|
echo "FAIL: proxy credentials leaked to the origin through the tunnel" >&2
|
||||||
|
cat "$ok/origin-headers.log" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "OK: proxy credentials carried on CONNECT, not leaked to origin"
|
||||||
|
|
||||||
|
# --- hostile proxy ----------------------------------------------------------
|
||||||
|
# A proxy that answers 200 then streams headers forever must not hang the crawl:
|
||||||
|
# the client bounds the response. run_crawl kills a hung httrack after 60s, so a
|
||||||
|
# missing bound surfaces as $HANG_RC here.
|
||||||
|
flood="$tmpdir/flood"
|
||||||
|
start_server "$flood" flood
|
||||||
|
rc=0
|
||||||
|
run_crawl "$flood/out" "127.0.0.1:${proxy_port}" "$origin_port" || rc=$?
|
||||||
|
test "$rc" -ne "$HANG_RC" || {
|
||||||
|
echo "FAIL: crawl hung on a flooding proxy (bounded read missing)" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
grep -rq "ORIGIN-PAGE-85" "$flood/out" 2>/dev/null && {
|
||||||
|
echo "FAIL: flooding proxy unexpectedly served the page" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
echo "OK: bounded proxy response, no hang on a flooding proxy"
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
# explicitly: automake does not expand wildcards in EXTRA_DIST, so a glob would
|
# explicitly: automake does not expand wildcards in EXTRA_DIST, so a glob would
|
||||||
# silently drop it from the dist tarball and break "make distcheck".
|
# silently drop it from the dist tarball and break "make distcheck".
|
||||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||||
|
proxy-https-server.py \
|
||||||
fixtures/cache-golden/hts-cache/new.zip
|
fixtures/cache-golden/hts-cache/new.zip
|
||||||
|
|
||||||
TESTS_ENVIRONMENT =
|
TESTS_ENVIRONMENT =
|
||||||
@@ -24,6 +25,7 @@ TESTS = \
|
|||||||
01_engine-cache-golden.test \
|
01_engine-cache-golden.test \
|
||||||
01_engine-charset.test \
|
01_engine-charset.test \
|
||||||
01_engine-cmdline.test \
|
01_engine-cmdline.test \
|
||||||
|
01_engine-cookies.test \
|
||||||
01_engine-copyopt.test \
|
01_engine-copyopt.test \
|
||||||
01_engine-doitlog.test \
|
01_engine-doitlog.test \
|
||||||
01_engine-entities.test \
|
01_engine-entities.test \
|
||||||
@@ -43,6 +45,7 @@ TESTS = \
|
|||||||
11_crawl-international.test \
|
11_crawl-international.test \
|
||||||
11_crawl-longurl.test \
|
11_crawl-longurl.test \
|
||||||
11_crawl-parsing.test \
|
11_crawl-parsing.test \
|
||||||
12_crawl_https.test
|
12_crawl_https.test \
|
||||||
|
13_crawl_proxy_https.test
|
||||||
|
|
||||||
CLEANFILES = check-network_sh.cache
|
CLEANFILES = check-network_sh.cache
|
||||||
|
|||||||
151
tests/proxy-https-server.py
Normal file
151
tests/proxy-https-server.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Local CONNECT proxy + self-signed HTTPS origin for the issue #85 test.
|
||||||
|
|
||||||
|
Starts a TLS origin server and an HTTP proxy that honours CONNECT, on ephemeral
|
||||||
|
ports. Every request line the proxy receives (and any Proxy-Authorization) is
|
||||||
|
appended to the proxy log; every header the origin receives over the tunnel is
|
||||||
|
appended to the origin log. That lets the test assert both that an https crawl
|
||||||
|
tunneled through the proxy and that proxy credentials never leaked to the origin.
|
||||||
|
|
||||||
|
Proxy modes (argv[3], default "ok"):
|
||||||
|
ok - honour CONNECT and tunnel to the origin
|
||||||
|
flood - answer 200 then stream headers forever with no blank line, to exercise
|
||||||
|
the client's bound on the proxy response (must not hang the crawl)
|
||||||
|
|
||||||
|
Usage: proxy-https-server.py <cert.pem> <logdir> [mode]
|
||||||
|
Prints "ORIGIN <port>", "PROXY <port>", then "ready" (one per line) on stdout.
|
||||||
|
"""
|
||||||
|
import http.server
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import socketserver
|
||||||
|
import ssl
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
|
||||||
|
ORIGIN_BODY = b"<html><body>ORIGIN-PAGE-85</body></html>"
|
||||||
|
PROXY_LOG = "proxy.log"
|
||||||
|
ORIGIN_LOG = "origin-headers.log"
|
||||||
|
|
||||||
|
|
||||||
|
def make_origin(logdir):
|
||||||
|
class Origin(http.server.BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
with open(os.path.join(logdir, ORIGIN_LOG), "a") as handle:
|
||||||
|
for key in self.headers.keys():
|
||||||
|
handle.write(key + "\n")
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/html")
|
||||||
|
self.send_header("Content-Length", str(len(ORIGIN_BODY)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(ORIGIN_BODY)
|
||||||
|
|
||||||
|
def log_message(self, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return Origin
|
||||||
|
|
||||||
|
|
||||||
|
def start_origin(certfile, logdir):
|
||||||
|
httpd = socketserver.TCPServer(("127.0.0.1", 0), make_origin(logdir))
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
ctx.load_cert_chain(certfile)
|
||||||
|
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
|
||||||
|
port = httpd.socket.getsockname()[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
return port
|
||||||
|
|
||||||
|
|
||||||
|
def pipe(src, dst):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
data = src.recv(65536)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
dst.sendall(data)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
for sock in (src, dst):
|
||||||
|
try:
|
||||||
|
sock.shutdown(socket.SHUT_RDWR)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def handle_client(conn, logdir, mode):
|
||||||
|
rfile = conn.makefile("rb")
|
||||||
|
request_line = rfile.readline().decode("latin-1").strip()
|
||||||
|
auth = None
|
||||||
|
while True:
|
||||||
|
line = rfile.readline().decode("latin-1")
|
||||||
|
if line in ("\r\n", "\n", ""):
|
||||||
|
break
|
||||||
|
key, _, value = line.partition(":")
|
||||||
|
if key.strip().lower() == "proxy-authorization":
|
||||||
|
auth = value.strip()
|
||||||
|
with open(os.path.join(logdir, PROXY_LOG), "a") as handle:
|
||||||
|
handle.write(request_line + "\n")
|
||||||
|
if auth is not None:
|
||||||
|
handle.write("AUTH " + auth + "\n")
|
||||||
|
parts = request_line.split()
|
||||||
|
if not (len(parts) >= 2 and parts[0] == "CONNECT"):
|
||||||
|
conn.sendall(b"HTTP/1.0 501 Not Implemented\r\n\r\n")
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
if mode == "flood":
|
||||||
|
# 200, then an endless header stream with no terminating blank line: the
|
||||||
|
# client must bound this and give up, not hang.
|
||||||
|
try:
|
||||||
|
conn.sendall(b"HTTP/1.0 200 Connection established\r\n")
|
||||||
|
while True:
|
||||||
|
conn.sendall(b"X-Pad: 0123456789\r\n")
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
host, _, port = parts[1].partition(":")
|
||||||
|
try:
|
||||||
|
upstream = socket.create_connection((host, int(port or 443)))
|
||||||
|
except OSError:
|
||||||
|
conn.sendall(b"HTTP/1.0 502 Bad Gateway\r\n\r\n")
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
conn.sendall(b"HTTP/1.0 200 Connection established\r\n\r\n")
|
||||||
|
threading.Thread(target=pipe, args=(conn, upstream), daemon=True).start()
|
||||||
|
pipe(upstream, conn)
|
||||||
|
|
||||||
|
|
||||||
|
def start_proxy(logdir, mode):
|
||||||
|
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
srv.bind(("127.0.0.1", 0))
|
||||||
|
srv.listen(16)
|
||||||
|
port = srv.getsockname()[1]
|
||||||
|
|
||||||
|
def serve():
|
||||||
|
while True:
|
||||||
|
conn, _ = srv.accept()
|
||||||
|
threading.Thread(
|
||||||
|
target=handle_client, args=(conn, logdir, mode), daemon=True
|
||||||
|
).start()
|
||||||
|
|
||||||
|
threading.Thread(target=serve, daemon=True).start()
|
||||||
|
return port
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
certfile, logdir = sys.argv[1], sys.argv[2]
|
||||||
|
mode = sys.argv[3] if len(sys.argv) > 3 else "ok"
|
||||||
|
for name in (PROXY_LOG, ORIGIN_LOG):
|
||||||
|
open(os.path.join(logdir, name), "w").close()
|
||||||
|
origin_port = start_origin(certfile, logdir)
|
||||||
|
proxy_port = start_proxy(logdir, mode)
|
||||||
|
print("ORIGIN %d" % origin_port, flush=True)
|
||||||
|
print("PROXY %d" % proxy_port, flush=True)
|
||||||
|
print("ready", flush=True)
|
||||||
|
threading.Event().wait()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user