mirror of
https://github.com/xroche/httrack.git
synced 2026-06-20 00:58:47 +03:00
Compare commits
1 Commits
master
...
fix/css-ur
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42acbe6c97 |
@@ -317,6 +317,27 @@ static int is_http_method(const char *s, size_t len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Percent-encode '(' and ')' in a link emitted into an unquoted CSS url(...):
|
||||
a literal ')' closes the token early and the UA mis-parses the value (#163).
|
||||
The UA decodes %28/%29 back to the saved-on-disk name. */
|
||||
static void escape_url_parens(char *const s, const size_t size) {
|
||||
char BIGSTK buff[HTS_URLMAXSIZE * 2];
|
||||
size_t i, j;
|
||||
|
||||
for (i = 0, j = 0; s[i] != '\0' && j + 3 < size && j + 3 < sizeof(buff);
|
||||
i++) {
|
||||
if (s[i] == '(' || s[i] == ')') {
|
||||
buff[j++] = '%';
|
||||
buff[j++] = '2';
|
||||
buff[j++] = s[i] == '(' ? '8' : '9';
|
||||
} else {
|
||||
buff[j++] = s[i];
|
||||
}
|
||||
}
|
||||
buff[j] = '\0';
|
||||
strlcpybuff(s, buff, size);
|
||||
}
|
||||
|
||||
/* Main parser */
|
||||
int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
char catbuff[CATBUFF_SIZE];
|
||||
@@ -3027,6 +3048,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
/* Never escape high-chars (we don't know the encoding!!) */
|
||||
inplace_escape_uri_utf(tempo, sizeof(tempo));
|
||||
|
||||
// unquoted CSS url(...): keep parens escaped (#163)
|
||||
if (ending_p == ')')
|
||||
escape_url_parens(tempo, sizeof(tempo));
|
||||
|
||||
//if (!no_esc_utf)
|
||||
// escape_uri(tempo); // escape with %xx
|
||||
//else {
|
||||
|
||||
@@ -264,4 +264,22 @@ grep -Fq 'window.open("winopen.gif")' "$saved7" ||
|
||||
! grep -Fq 'window.open("file://' "$saved7" ||
|
||||
! echo "FAIL #218: window.open URL left absolute (not rewritten)" || exit 1
|
||||
|
||||
# Parens inside an unquoted CSS url(...) (#163): the saved-on-disk name has
|
||||
# literal '(' ')' (the source %28/%29 decode when fetching), but a literal ')'
|
||||
# in the rewritten url() would close the token early and break the value, so
|
||||
# they must stay percent-encoded. Negative control: without the fix the output
|
||||
# is url(img%20(1).gif) and the grep for %281%29 fails (parens are RFC2396
|
||||
# "mark" chars, which the URI escaper leaves alone).
|
||||
site8="$tmp/cssparens"
|
||||
mkdir -p "$site8"
|
||||
gif "$site8/img (1).gif"
|
||||
printf 'body { background-image: url(img%%20%%281%%29.gif); }\n' >"$site8/style.css"
|
||||
out8="$tmp/cssparens-out"
|
||||
crawl "$site8/style.css" "$out8"
|
||||
found "img (1).gif" "$out8"
|
||||
css8=$(find "$out8" -type f -path '*/file/*' -name style.css -print -quit)
|
||||
test -n "$css8" || ! echo "FAIL: saved style.css not found" || exit 1
|
||||
grep -Fq 'url(img%20%281%29.gif)' "$css8" ||
|
||||
! echo "FAIL #163: parens in unquoted CSS url() not percent-encoded on rewrite" || exit 1
|
||||
|
||||
exit 0
|
||||
|
||||
Reference in New Issue
Block a user