mirror of
https://github.com/xroche/httrack.git
synced 2026-06-19 16:53:18 +03:00
Compare commits
1 Commits
fix/proxy-
...
feature/lo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f9f27b924d |
5
.flake8
Normal file
5
.flake8
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
[flake8]
|
||||||
|
# Match black's formatting so the two tools don't fight.
|
||||||
|
max-line-length = 88
|
||||||
|
# E203/W503 conflict with black's slice and line-break style.
|
||||||
|
extend-ignore = E203, W503
|
||||||
15
tests/13_local-cookies.test
Executable file
15
tests/13_local-cookies.test
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Cookie chain against the local test server (replaces the old online
|
||||||
|
# ut/cookies/*.php fixtures). entrance.php sets cat/cake; second.php checks
|
||||||
|
# them and sets badger; third.php checks all three. A missing or wrong cookie
|
||||||
|
# returns 500, which would surface as an httrack error and a missing file, so a
|
||||||
|
# clean 3-files/0-errors run proves the cookie jar is replayed across links.
|
||||||
|
|
||||||
|
: "${top_srcdir:=..}"
|
||||||
|
|
||||||
|
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 3 \
|
||||||
|
--found 'cookies/entrance.html' \
|
||||||
|
--found 'cookies/second.html' \
|
||||||
|
--found 'cookies/third.html' \
|
||||||
|
httrack 'BASEURL/cookies/entrance.php'
|
||||||
18
tests/14_local-https.test
Executable file
18
tests/14_local-https.test
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# HTTPS crawl against the local test server, using the shipped self-signed
|
||||||
|
# cert. httrack does not verify certs (htslib.c: SSL_CTX_new with no
|
||||||
|
# SSL_CTX_set_verify), so the self-signed cert is accepted as-is and this
|
||||||
|
# exercises the real TLS path offline. basic.html links to link.html with four
|
||||||
|
# distinct query strings, each saved under a hashed name -> 5 files.
|
||||||
|
|
||||||
|
: "${top_srcdir:=..}"
|
||||||
|
|
||||||
|
if test "$HTTPS_SUPPORT" == "no"; then
|
||||||
|
echo "no https support compiled, skipping"
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
|
||||||
|
bash "$top_srcdir/tests/local-crawl.sh" --tls --errors 0 --files 5 \
|
||||||
|
--found 'simple/basic.html' \
|
||||||
|
httrack 'BASEURL/simple/basic.html'
|
||||||
@@ -1,4 +1,7 @@
|
|||||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh
|
# Note: EXTRA_DIST globs are NOT expanded by automake; list fixtures explicitly.
|
||||||
|
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||||
|
local-crawl.sh local-server.py server.crt server.key \
|
||||||
|
server-root/simple/basic.html server-root/simple/link.html
|
||||||
|
|
||||||
TESTS_ENVIRONMENT =
|
TESTS_ENVIRONMENT =
|
||||||
TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
|
TESTS_ENVIRONMENT += PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH
|
||||||
@@ -35,6 +38,8 @@ TESTS = \
|
|||||||
11_crawl-international.test \
|
11_crawl-international.test \
|
||||||
11_crawl-longurl.test \
|
11_crawl-longurl.test \
|
||||||
11_crawl-parsing.test \
|
11_crawl-parsing.test \
|
||||||
12_crawl_https.test
|
12_crawl_https.test \
|
||||||
|
13_local-cookies.test \
|
||||||
|
14_local-https.test
|
||||||
|
|
||||||
CLEANFILES = check-network_sh.cache
|
CLEANFILES = check-network_sh.cache
|
||||||
|
|||||||
@@ -476,7 +476,12 @@ target_alias = @target_alias@
|
|||||||
top_build_prefix = @top_build_prefix@
|
top_build_prefix = @top_build_prefix@
|
||||||
top_builddir = @top_builddir@
|
top_builddir = @top_builddir@
|
||||||
top_srcdir = @top_srcdir@
|
top_srcdir = @top_srcdir@
|
||||||
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh
|
|
||||||
|
# Note: EXTRA_DIST globs are NOT expanded by automake; list fixtures explicitly.
|
||||||
|
EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
|
||||||
|
local-crawl.sh local-server.py server.crt server.key \
|
||||||
|
server-root/simple/basic.html server-root/simple/link.html
|
||||||
|
|
||||||
# note: libtool should handle that
|
# note: libtool should handle that
|
||||||
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
|
### TESTS_ENVIRONMENT += $(SHLIBPATH_VAR)="$(top_builddir)/src/$(LT_CV_OBJDIR)$${$(SHLIBPATH_VAR):+$(PATH_SEPARATOR)}$$$(SHLIBPATH_VAR)"
|
||||||
TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
|
TESTS_ENVIRONMENT = PATH=$(top_builddir)/src$(PATH_SEPARATOR)$$PATH \
|
||||||
@@ -509,7 +514,9 @@ TESTS = \
|
|||||||
11_crawl-international.test \
|
11_crawl-international.test \
|
||||||
11_crawl-longurl.test \
|
11_crawl-longurl.test \
|
||||||
11_crawl-parsing.test \
|
11_crawl-parsing.test \
|
||||||
12_crawl_https.test
|
12_crawl_https.test \
|
||||||
|
13_local-cookies.test \
|
||||||
|
14_local-https.test
|
||||||
|
|
||||||
CLEANFILES = check-network_sh.cache
|
CLEANFILES = check-network_sh.cache
|
||||||
all: all-am
|
all: all-am
|
||||||
|
|||||||
235
tests/local-crawl.sh
Executable file
235
tests/local-crawl.sh
Executable file
@@ -0,0 +1,235 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Launcher for httrack crawl tests against the local Python test server.
|
||||||
|
#
|
||||||
|
# Starts tests/local-server.py on an ephemeral port, discovers the port from
|
||||||
|
# the server's stdout, then runs httrack against http(s)://127.0.0.1:$PORT and
|
||||||
|
# audits the mirror. The server is always killed and the tmpdir removed on exit.
|
||||||
|
#
|
||||||
|
# The token BASEURL in any httrack argument is replaced with the discovered
|
||||||
|
# http(s)://127.0.0.1:$PORT base. --found/--directory paths are relative to the
|
||||||
|
# discovered host root (127.0.0.1_<port>/), since the random port leaks into
|
||||||
|
# the mirror directory name.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash local-crawl.sh [--tls] [--root DIR] \
|
||||||
|
# --errors N --files N --found PATH ... --directory PATH ... \
|
||||||
|
# httrack BASEURL/some/path [httrack-args...]
|
||||||
|
|
||||||
|
set -u
|
||||||
|
|
||||||
|
testdir=$(cd "$(dirname "$0")" && pwd)
|
||||||
|
server="${testdir}/local-server.py"
|
||||||
|
root="${LOCAL_SERVER_ROOT:-${testdir}/server-root}"
|
||||||
|
cert="${testdir}/server.crt"
|
||||||
|
key="${testdir}/server.key"
|
||||||
|
|
||||||
|
tls=
|
||||||
|
verbose=
|
||||||
|
tmpdir=
|
||||||
|
serverpid=
|
||||||
|
crawlpid=
|
||||||
|
|
||||||
|
function warning {
|
||||||
|
echo "** $*" >&2
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
function die {
|
||||||
|
warning "$*"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
function debug {
|
||||||
|
test -n "$verbose" && echo "$*" >&2
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
function info { printf "[%s] ..\t" "$*" >&2; }
|
||||||
|
function result { echo "$*" >&2; }
|
||||||
|
|
||||||
|
function cleanup {
|
||||||
|
if test -n "$crawlpid"; then
|
||||||
|
kill -9 "$crawlpid" 2>/dev/null
|
||||||
|
crawlpid=
|
||||||
|
fi
|
||||||
|
if test -n "$serverpid"; then
|
||||||
|
kill "$serverpid" 2>/dev/null
|
||||||
|
# Reap it so the port is released before we rm the tmpdir/log.
|
||||||
|
wait "$serverpid" 2>/dev/null
|
||||||
|
serverpid=
|
||||||
|
fi
|
||||||
|
if test -n "$tmpdir" && test -d "$tmpdir"; then
|
||||||
|
test -n "$nopurge" || rm -rf "$tmpdir"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function assert_equals {
|
||||||
|
info "$1"
|
||||||
|
if test ! "$2" == "$3"; then
|
||||||
|
result "expected '$2', got '$3'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
result "OK ($2)"
|
||||||
|
}
|
||||||
|
|
||||||
|
nopurge=
|
||||||
|
trap cleanup EXIT HUP INT QUIT PIPE TERM
|
||||||
|
|
||||||
|
# python3 is required; mirror check-network.sh's skip-with-77 convention.
|
||||||
|
command -v python3 >/dev/null || ! echo "python3 not found; skipping local crawl tests" || exit 77
|
||||||
|
|
||||||
|
tmptopdir=${TMPDIR:-/tmp}
|
||||||
|
test -d "$tmptopdir" || mkdir -p "$tmptopdir" || die "no temporary directory; set TMPDIR"
|
||||||
|
tmpdir=$(mktemp -d "${tmptopdir}/httrack_local.XXXXXX") || die "could not create tmpdir"
|
||||||
|
|
||||||
|
# --- parse leading control flags --------------------------------------------
|
||||||
|
declare -a audit=()
|
||||||
|
scheme=http
|
||||||
|
pos=0
|
||||||
|
args=("$@")
|
||||||
|
nargs=$#
|
||||||
|
while test "$pos" -lt "$nargs"; do
|
||||||
|
case "${args[$pos]}" in
|
||||||
|
--debug) verbose=1 ;;
|
||||||
|
--no-purge)
|
||||||
|
nopurge=1
|
||||||
|
audit+=("--no-purge")
|
||||||
|
;;
|
||||||
|
--tls)
|
||||||
|
tls=1
|
||||||
|
scheme=https
|
||||||
|
;;
|
||||||
|
--root)
|
||||||
|
pos=$((pos + 1))
|
||||||
|
root="${args[$pos]}"
|
||||||
|
;;
|
||||||
|
--errors | --files)
|
||||||
|
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||||
|
pos=$((pos + 1))
|
||||||
|
;;
|
||||||
|
--found | --not-found | --directory)
|
||||||
|
audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
|
||||||
|
pos=$((pos + 1))
|
||||||
|
;;
|
||||||
|
httrack)
|
||||||
|
pos=$((pos + 1))
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
*) die "unrecognized option ${args[$pos]}" ;;
|
||||||
|
esac
|
||||||
|
pos=$((pos + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- start the server --------------------------------------------------------
|
||||||
|
test -r "$server" || die "cannot read $server"
|
||||||
|
serverlog="${tmpdir}/server.log"
|
||||||
|
serverargs=(--root "$root")
|
||||||
|
if test -n "$tls"; then
|
||||||
|
serverargs+=(--tls --cert "$cert" --key "$key")
|
||||||
|
fi
|
||||||
|
debug "starting python3 $server ${serverargs[*]}"
|
||||||
|
python3 "$server" "${serverargs[@]}" >"$serverlog" 2>&1 &
|
||||||
|
serverpid=$!
|
||||||
|
|
||||||
|
# Wait for the "PORT <n>" line (server prints it once bound).
|
||||||
|
port=
|
||||||
|
for _ in $(seq 1 50); do
|
||||||
|
if test -s "$serverlog"; then
|
||||||
|
line=$(head -n1 "$serverlog")
|
||||||
|
if test "${line%% *}" == "PORT"; then
|
||||||
|
port="${line#PORT }"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
kill -0 "$serverpid" 2>/dev/null || die "server exited early: $(cat "$serverlog")"
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
test -n "$port" || die "could not discover server port: $(cat "$serverlog")"
|
||||||
|
debug "server listening on ${scheme}://127.0.0.1:${port}"
|
||||||
|
|
||||||
|
baseurl="${scheme}://127.0.0.1:${port}"
|
||||||
|
|
||||||
|
# --- substitute BASEURL in the remaining (httrack) args ----------------------
|
||||||
|
declare -a hts=()
|
||||||
|
while test "$pos" -lt "$nargs"; do
|
||||||
|
hts+=("${args[$pos]//BASEURL/$baseurl}")
|
||||||
|
pos=$((pos + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- run httrack -------------------------------------------------------------
|
||||||
|
which httrack >/dev/null || die "could not find httrack"
|
||||||
|
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
|
||||||
|
test -n "$ver" || die "could not run httrack"
|
||||||
|
|
||||||
|
out="${tmpdir}/crawl"
|
||||||
|
mkdir "$out" || die "could not create $out"
|
||||||
|
# Localhost is fast; disable the rate/bandwidth safety limits but keep a
|
||||||
|
# max-time backstop so a hang cannot wedge the suite.
|
||||||
|
declare -a moreargs=(--quiet --max-time=120 --timeout=30 --disable-security-limits --robots=0)
|
||||||
|
log="${tmpdir}/log"
|
||||||
|
info "running httrack ${hts[*]}"
|
||||||
|
httrack -O "$out" --user-agent="httrack $ver local ($(uname -omrs))" "${moreargs[@]}" "${hts[@]}" >"$log" 2>&1 &
|
||||||
|
crawlpid=$!
|
||||||
|
wait "$crawlpid"
|
||||||
|
crawlres=$?
|
||||||
|
crawlpid=
|
||||||
|
# httrack exits 0 even on hard connect/DNS errors, so this is a backstop only;
|
||||||
|
# the real guard is the audit below (--errors 0 plus the host-root existence check).
|
||||||
|
test "$crawlres" -eq 0 || ! result "httrack exited $crawlres" || {
|
||||||
|
cat "$log" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
result "OK"
|
||||||
|
grep -iE "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt" >&2
|
||||||
|
|
||||||
|
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||||
|
hostroot=
|
||||||
|
for cand in "${out}/127.0.0.1_${port}" "${out}/127.0.0.1"; do
|
||||||
|
if test -d "$cand"; then
|
||||||
|
hostroot="$cand"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
test -n "$hostroot" || die "could not find host root under $out"
|
||||||
|
debug "host root: $hostroot"
|
||||||
|
|
||||||
|
# --- audit -------------------------------------------------------------------
|
||||||
|
i=0
|
||||||
|
while test "$i" -lt "${#audit[@]}"; do
|
||||||
|
case "${audit[$i]}" in
|
||||||
|
--errors)
|
||||||
|
i=$((i + 1))
|
||||||
|
assert_equals "checking errors" "${audit[$i]}" \
|
||||||
|
"$(grep -iEc "^[0-9:]*[[:space:]]Error:" "${out}/hts-log.txt")"
|
||||||
|
;;
|
||||||
|
--files)
|
||||||
|
i=$((i + 1))
|
||||||
|
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${out}/hts-log.txt" |
|
||||||
|
sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
|
||||||
|
assert_equals "checking files" "${audit[$i]}" "$nFiles"
|
||||||
|
;;
|
||||||
|
--found)
|
||||||
|
i=$((i + 1))
|
||||||
|
info "checking for ${audit[$i]}"
|
||||||
|
if test -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||||
|
result "not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--not-found)
|
||||||
|
i=$((i + 1))
|
||||||
|
info "checking absence of ${audit[$i]}"
|
||||||
|
if test ! -f "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||||
|
result "present"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--directory)
|
||||||
|
i=$((i + 1))
|
||||||
|
info "checking for dir ${audit[$i]}"
|
||||||
|
if test -d "${hostroot}/${audit[$i]}"; then result "OK"; else
|
||||||
|
result "not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
i=$((i + 1))
|
||||||
|
done
|
||||||
182
tests/local-server.py
Executable file
182
tests/local-server.py
Executable file
@@ -0,0 +1,182 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Self-contained local web server for httrack's crawl tests.
|
||||||
|
|
||||||
|
Serves static fixtures from a docroot plus a handful of dynamic endpoints
|
||||||
|
(cookies, ...) so httrack can be exercised over loopback, deterministically and
|
||||||
|
offline, instead of crawling the live ut.httrack.com.
|
||||||
|
|
||||||
|
Binds to an ephemeral port (port 0) and prints the chosen port to stdout as
|
||||||
|
"PORT <n>\n" so a launcher can discover it. Pass --tls to wrap the socket with
|
||||||
|
the shipped self-signed test cert; httrack does not verify certs, so no CA
|
||||||
|
trust plumbing is needed.
|
||||||
|
|
||||||
|
stdlib only (http.server + ssl) -- no new build or runtime dependency.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from urllib.parse import quote, unquote, urlsplit
|
||||||
|
|
||||||
|
# Cookie chain replicated from the old ut/cookies/*.php fixtures.
|
||||||
|
COOKIE_PATH = "/cookies/"
|
||||||
|
COOKIES = {
|
||||||
|
"cat": "dog",
|
||||||
|
"cake": "is a lie!",
|
||||||
|
"badger": "mushroom, with 'ants'",
|
||||||
|
}
|
||||||
|
|
||||||
|
PAGE = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
\t"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
||||||
|
<head>
|
||||||
|
\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
\t<title>Sample test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
{body}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Handler(SimpleHTTPRequestHandler):
|
||||||
|
# Quieter logging; the launcher captures httrack's own log anyway.
|
||||||
|
def log_message(self, fmt, *args):
|
||||||
|
if os.environ.get("LOCAL_SERVER_VERBOSE"):
|
||||||
|
super().log_message(fmt, *args)
|
||||||
|
|
||||||
|
# --- helpers -----------------------------------------------------------
|
||||||
|
|
||||||
|
def request_cookies(self):
|
||||||
|
"""Parse the Cookie header into {name: decoded-value}.
|
||||||
|
|
||||||
|
Mirrors PHP's $_COOKIE: values are url-decoded, matching the encoding
|
||||||
|
applied when the cookie was set (see set_cookie)."""
|
||||||
|
jar = {}
|
||||||
|
raw = self.headers.get("Cookie", "")
|
||||||
|
for pair in raw.split(";"):
|
||||||
|
pair = pair.strip()
|
||||||
|
if "=" in pair:
|
||||||
|
name, value = pair.split("=", 1)
|
||||||
|
jar[name.strip()] = unquote(value.strip())
|
||||||
|
return jar
|
||||||
|
|
||||||
|
def set_cookie(self, name, value):
|
||||||
|
"""Queue a Set-Cookie header, url-encoding the value like PHP's
|
||||||
|
setcookie() so spaces/quotes/commas stay a single token that httrack
|
||||||
|
can store and replay verbatim."""
|
||||||
|
self._set_cookies.append(f"{name}={quote(value)}; Path={COOKIE_PATH}")
|
||||||
|
|
||||||
|
def send_html(self, body, status=200, extra_status=None):
|
||||||
|
encoded = PAGE.format(body=body).encode("utf-8")
|
||||||
|
self.send_response(status, extra_status)
|
||||||
|
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(encoded)))
|
||||||
|
for cookie in self._set_cookies:
|
||||||
|
self.send_header("Set-Cookie", cookie)
|
||||||
|
self.end_headers()
|
||||||
|
if self.command != "HEAD":
|
||||||
|
self.wfile.write(encoded)
|
||||||
|
|
||||||
|
def fail_cookie(self, what):
|
||||||
|
# The old PHPs answered 500 with the reason in the status line.
|
||||||
|
self.send_html("", status=500, extra_status=f"The {what} is missing or invalid")
|
||||||
|
|
||||||
|
# --- dynamic routes ----------------------------------------------------
|
||||||
|
|
||||||
|
def route_entrance(self):
|
||||||
|
self.set_cookie("cat", COOKIES["cat"])
|
||||||
|
self.set_cookie("cake", COOKIES["cake"])
|
||||||
|
self.send_html('\tThis is a <a href="second.php">link</a>')
|
||||||
|
|
||||||
|
def route_second(self):
|
||||||
|
jar = self.request_cookies()
|
||||||
|
if jar.get("cat") != COOKIES["cat"]:
|
||||||
|
return self.fail_cookie("cat")
|
||||||
|
if jar.get("cake") != COOKIES["cake"]:
|
||||||
|
return self.fail_cookie("cake")
|
||||||
|
self.set_cookie("badger", COOKIES["badger"])
|
||||||
|
self.send_html('\tThis is a <a href="third.php">link</a>')
|
||||||
|
|
||||||
|
def route_third(self):
|
||||||
|
jar = self.request_cookies()
|
||||||
|
if jar.get("cat") != COOKIES["cat"]:
|
||||||
|
return self.fail_cookie("cat")
|
||||||
|
if jar.get("cake") != COOKIES["cake"]:
|
||||||
|
return self.fail_cookie("cake")
|
||||||
|
if jar.get("badger") != COOKIES["badger"]:
|
||||||
|
return self.fail_cookie("badger")
|
||||||
|
self.send_html("\tThis is a test.")
|
||||||
|
|
||||||
|
def route_robots(self):
|
||||||
|
body = b"User-agent: *\nDisallow:\n"
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
if self.command != "HEAD":
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
ROUTES = {
|
||||||
|
"/cookies/entrance.php": route_entrance,
|
||||||
|
"/cookies/second.php": route_second,
|
||||||
|
"/cookies/third.php": route_third,
|
||||||
|
"/robots.txt": route_robots,
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- dispatch ----------------------------------------------------------
|
||||||
|
|
||||||
|
def dispatch(self):
|
||||||
|
self._set_cookies = []
|
||||||
|
path = urlsplit(self.path).path
|
||||||
|
handler = self.ROUTES.get(path)
|
||||||
|
if handler is not None:
|
||||||
|
handler(self)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if not self.dispatch():
|
||||||
|
super().do_GET()
|
||||||
|
|
||||||
|
def do_HEAD(self):
|
||||||
|
if not self.dispatch():
|
||||||
|
super().do_HEAD()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument("--root", required=True, help="docroot for static files")
|
||||||
|
parser.add_argument("--bind", default="127.0.0.1", help="bind address")
|
||||||
|
parser.add_argument("--tls", action="store_true", help="serve HTTPS")
|
||||||
|
parser.add_argument("--cert", help="TLS certificate (PEM)")
|
||||||
|
parser.add_argument("--key", help="TLS private key (PEM)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
root = os.path.abspath(args.root)
|
||||||
|
|
||||||
|
def factory(*a, **kw):
|
||||||
|
return Handler(*a, directory=root, **kw)
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer((args.bind, 0), factory)
|
||||||
|
|
||||||
|
if args.tls:
|
||||||
|
import ssl
|
||||||
|
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
ctx.load_cert_chain(certfile=args.cert, keyfile=args.key)
|
||||||
|
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
|
||||||
|
|
||||||
|
port = httpd.socket.getsockname()[1]
|
||||||
|
# The launcher reads this line to discover the ephemeral port.
|
||||||
|
print(f"PORT {port}", flush=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
httpd.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
18
tests/server-root/simple/basic.html
Normal file
18
tests/server-root/simple/basic.html
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="fr">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>Sample test</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
This is a <a href="link.html?v=1">link</a>
|
||||||
|
This is a <a href='link.html?v=2'>link</a>
|
||||||
|
This is a <a href="./link.html?v=3">link</a>
|
||||||
|
This is a <a href=link.html?v=4>link</a>
|
||||||
|
|
||||||
|
</body>
|
||||||
3
tests/server-root/simple/link.html
Normal file
3
tests/server-root/simple/link.html
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
This is a link.
|
||||||
|
|
||||||
|
Go back to <a href="basic.html">home</a>.
|
||||||
21
tests/server.crt
Normal file
21
tests/server.crt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIDbzCCAlegAwIBAgIUdWkDDomnY3WW95UqJ+UOASuR/i0wDQYJKoZIhvcNAQEL
|
||||||
|
BQAwODESMBAGA1UEAwwJMTI3LjAuMC4xMSIwIAYDVQQKDBlIVFRyYWNrIGxvY2Fs
|
||||||
|
IHRlc3Qgc2VydmVyMCAXDTI2MDYxNTE0NDQxMFoYDzIwNTYwNjA3MTQ0NDEwWjA4
|
||||||
|
MRIwEAYDVQQDDAkxMjcuMC4wLjExIjAgBgNVBAoMGUhUVHJhY2sgbG9jYWwgdGVz
|
||||||
|
dCBzZXJ2ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDx78mogNhT
|
||||||
|
noWwRa51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf
|
||||||
|
3rwj79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJC
|
||||||
|
SGxIin9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4
|
||||||
|
ZbPgwep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaE
|
||||||
|
nQrAlTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJx
|
||||||
|
rjVEPfA8QSbtAgMBAAGjbzBtMB0GA1UdDgQWBBTHE0KKW8REV4HxajzVsIBxz3iL
|
||||||
|
9zAfBgNVHSMEGDAWgBTHE0KKW8REV4HxajzVsIBxz3iL9zAPBgNVHRMBAf8EBTAD
|
||||||
|
AQH/MBoGA1UdEQQTMBGHBH8AAAGCCWxvY2FsaG9zdDANBgkqhkiG9w0BAQsFAAOC
|
||||||
|
AQEAYlTEftrwGJBXuPmtxhmtw2HO/VTC4TGnq67hH5H+ptwgZJuuxCQ5KW6flTyp
|
||||||
|
FTyMhha33WD4EBL3wqqJsWr9Y4BXqi4G0lRqXBcC1oIUa2VYIDMER7kaY1qTSqE8
|
||||||
|
ARpwdB2BhvngAzDLc+4Jt4jQMRGr8fHAwxpDBoIZ1knbyzYNP73Bajse6/8YtxUu
|
||||||
|
nB2BsldjZnLvyHvRxUpWp92OyQih4jYSrlN6olDFlKDg7++kMhkHtJQW9a1t54VN
|
||||||
|
0ZXrB1ZRuHUUvGBq26x71riTWor7HNOSQaGeCMQjZNQkh5tfshNygUGSZVXTEwhG
|
||||||
|
xSrOL7NqBt2+EkVwf7LjGzjmBw==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
28
tests/server.key
Normal file
28
tests/server.key
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
-----BEGIN PRIVATE KEY-----
|
||||||
|
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDx78mogNhTnoWw
|
||||||
|
Ra51NeGtapQ1PfTYLlIMUzuloFXOsR1/ozRkFucqHNftF22wf0gg4VQJSBSf3rwj
|
||||||
|
79vsnt3nyaD03bTAafpHXkd+IJxQowiG8TfOJF0R/Qg9g7DCE66R9agQpMJCSGxI
|
||||||
|
in9p/4ld4Hn6869d4hNq4fHxNf/qkj2cnf8DYxrldz2FGsi6yMed4tzz2Am4ZbPg
|
||||||
|
wep+fy843ZdYrVIms9vJluNa9E+6Vpw9FwdjzQ/IBBMLvGaC2pDkc95YelaEnQrA
|
||||||
|
lTO/0l5vjc8XuTQFlo3DbUg+WEld/pxvCqsd/q1mqjL0WbxtXl2zCwGzAoJxrjVE
|
||||||
|
PfA8QSbtAgMBAAECggEACgNK4klq1T3IpKdNoBY5yoE7CbUQZBNkBpSPRxHgBezj
|
||||||
|
SVFfgrZGnOySrIJSt4JHtuynG2Hl+0ku74HRep/ck+eOsh5W3mZvGvMLnGxhwR3u
|
||||||
|
Or99osTIgU0VQTkpC0SLQ16FCnih0uJycNIikdLR7uuya1tt1OyIBzK7XlNGIywT
|
||||||
|
p85zJc7/6TfTC9eM7lqh7JGR7KplBxSvgZL1pUr7y4rNpKms6uzOvPND79CcKnbU
|
||||||
|
BBA9Tu4qdOkoOljsZKkvh3pihxyG9X6d8QTZ/uX3pkvliwSFBc+Sz9EootA3/4r5
|
||||||
|
gVWpQ2t/AY7fY4hqzLIX/HivVaPj3cWk1G+SHm0XNQKBgQD5I9rijqFvV/p6FmUl
|
||||||
|
FbnjJFFHHgZLivlGxAC5vOyJNQQaqdeDzg7yMotNmQTggVGjT6sjdosQb3n+ctuk
|
||||||
|
EhQnZSU5VkNKv1+PTR35WrRkaECCaqz3Pv79pV9GVcX3it7UuYjNiOeSPqINWe+X
|
||||||
|
49JwnJFz+qQ1BchAwOis4zkENwKBgQD4mShDaYLOO97VpgZj4cGxHHWyEK9CRQvp
|
||||||
|
I7HxRmfaWS3JHwb88lOmALEU6pAj5cYJPAznv8BnUWcVHalZbkQ1JWYtUJRqj6OI
|
||||||
|
Ym7rw/nm4Ay5ijbdEism173dSk3IjOe+PdAlxzsOuVzYdBTqElmeQWtBzhY9aHvX
|
||||||
|
r+A02C2j+wKBgHHDo6Gsi57yR5gUPd9vSlCkNtEIrss0DJv5yHMIB+KnaNZcE+NF
|
||||||
|
5qFF30Jxyz5RDtxJ9tXcvaeln8lG3XDQKI/MqfDCqTuqo5ImHrfMaW8oA70JxS2p
|
||||||
|
gHqGVzkg1aMxsIrmpcdk6olnPExocvWivGdbtzeEjhMALu8Sp6y6nUCFAoGBAK5h
|
||||||
|
KLgYw/OMVaQCIMthaa+l6f0s7PMMYe1453H6VBD6qz4/8HPwO7LfG1gzrUYxADgs
|
||||||
|
ElVh0UHn/On383nS+i9Ze5Hfyyvwc+LQQURKJPrJQMPJavCptPE7NmiKnYNHK6vr
|
||||||
|
yh0l4oxShAklbCJBGvICq4zuVfVfXDeQnDIVTfaPAoGBAMCrZqYdOUhUu+aUqxZq
|
||||||
|
qO/TTQxrxftU63jGUg+o042TdgI4KWLn07wvHJ8/E2OqF35eXenvcuKbNLI1l72J
|
||||||
|
4cp+3cUv8iAXThTRYEztr5CS/wta4o4CNN8zfjn5dV9AI4Hmt4V7EaGWpBcViGbj
|
||||||
|
n0Mhag+dO8DHuenqi1yfMrAt
|
||||||
|
-----END PRIVATE KEY-----
|
||||||
Reference in New Issue
Block a user