mirror of
https://github.com/xroche/httrack.git
synced 2026-06-25 11:37:28 +03:00
Compare commits
1 Commits
master
...
fix/update
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
82d1de5d06 |
11
tests/21_local-intl-update.test
Normal file
11
tests/21_local-intl-update.test
Normal file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# #157: a dotless, accented URL named .html on the first crawl must keep .html
|
||||
# across an update -- not revert to the extensionless name.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun \
|
||||
--found 'intl/Instalação_CVS_no_Ubuntu.html' \
|
||||
--not-found 'intl/Instalação_CVS_no_Ubuntu' \
|
||||
httrack 'BASEURL/intl/index.html'
|
||||
@@ -60,6 +60,7 @@ TESTS = \
|
||||
17_local-empty-ct.test \
|
||||
18_local-update.test \
|
||||
19_local-connect-fallback.test \
|
||||
20_local-resume-loop.test
|
||||
20_local-resume-loop.test \
|
||||
21_local-intl-update.test
|
||||
|
||||
CLEANFILES = check-network_sh.cache
|
||||
|
||||
@@ -196,6 +196,15 @@ if test -n "$rerun"; then
|
||||
exit 1
|
||||
}
|
||||
result "OK (update)"
|
||||
# The update summary reports "files updated"; a fresh crawl never does. Assert
|
||||
# it so a regression that bypasses the cache (re-crawls fresh) can't pass.
|
||||
info "checking update used the cache"
|
||||
if grep -aqE "mirror complete in .*files updated" "${out}/hts-log.txt"; then
|
||||
result "OK"
|
||||
else
|
||||
result "update pass did not report cache activity"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
|
||||
|
||||
@@ -177,6 +177,17 @@ class Handler(SimpleHTTPRequestHandler):
|
||||
body, ctype = self.TYPE_MATRIX[path]
|
||||
self.send_raw(body, ctype)
|
||||
|
||||
# --- special chars in URLs across an update (issue #157) ---------------
|
||||
# A dotless, accented basename served as text/html (MediaWiki style). The
|
||||
# name the first crawl picks (.html) must survive the update pass.
|
||||
INTL_NAME = "Instalação_CVS_no_Ubuntu"
|
||||
|
||||
def route_intl_index(self):
|
||||
self.send_html('\t<a href="%s">accented</a>\n' % self.INTL_NAME)
|
||||
|
||||
def route_intl_page(self):
|
||||
self.send_raw(b"<html><body>accented page</body></html>\n", "text/html")
|
||||
|
||||
# resume / 416 loop (#206): the first GET stalls after a prefix so the crawl
|
||||
# can be interrupted (partial + temp-ref); every later request is 416.
|
||||
RESUME_PREFIX = b"PARTIAL-" + b"x" * 4096 # flushed before the stall
|
||||
@@ -233,6 +244,8 @@ class Handler(SimpleHTTPRequestHandler):
|
||||
"/types/style.css": route_types,
|
||||
"/types/data.json": route_types,
|
||||
"/types/gen.php": route_types,
|
||||
"/intl/index.html": route_intl_index,
|
||||
"/intl/" + INTL_NAME: route_intl_page,
|
||||
"/resume/index.html": route_resume_index,
|
||||
"/resume/blob.txt": route_resume,
|
||||
}
|
||||
@@ -242,7 +255,8 @@ class Handler(SimpleHTTPRequestHandler):
|
||||
def dispatch(self):
|
||||
self._set_cookies = []
|
||||
path = urlsplit(self.path).path
|
||||
handler = self.ROUTES.get(path)
|
||||
# Match percent-encoded paths (accented #157 route) by their decoded form.
|
||||
handler = self.ROUTES.get(path) or self.ROUTES.get(unquote(path))
|
||||
if handler is not None:
|
||||
handler(self)
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user