mirror of
https://github.com/xroche/httrack.git
synced 2026-05-17 08:26:14 +03:00
Do not unescape '+' before the query string
Fixed issue 18
This commit is contained in:
@@ -206,6 +206,7 @@ int hts_unescapeEntities(const char *src, char *dest, const size_t max) {
|
||||
|
||||
int hts_unescapeUrl(const char *src, char *dest, const size_t max) {
|
||||
size_t i, j, lastI, lastJ, k, utfBufferJ, utfBufferSize;
|
||||
int seenQuery = 0;
|
||||
char utfBuffer[32];
|
||||
|
||||
assert(src != dest);
|
||||
@@ -218,7 +219,7 @@ int hts_unescapeUrl(const char *src, char *dest, const size_t max) {
|
||||
unsigned char cUtf = (unsigned char) c;
|
||||
|
||||
/* Replacement for ' ' */
|
||||
if (c == '+') {
|
||||
if (c == '+' && seenQuery) {
|
||||
c = cUtf = ' ';
|
||||
k = 0; /* cancel any sequence */
|
||||
}
|
||||
@@ -250,6 +251,9 @@ int hts_unescapeUrl(const char *src, char *dest, const size_t max) {
|
||||
/* ASCII (and not in %xx) */
|
||||
else if (cUtf < 0x80 && i != lastI + 1) {
|
||||
k = 0; /* cancel any sequence */
|
||||
if (!seenQuery && c == '?') {
|
||||
seenQuery = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* UTF-8 sequence in progress (either a raw or a %xx character) */
|
||||
|
||||
@@ -24,3 +24,13 @@ bash crawl-test.sh --errors 0 --files 3 \
|
||||
--found ut.httrack.com/parsing/fade.gif \
|
||||
--found ut.httrack.com/parsing/javascript.html \
|
||||
httrack http://ut.httrack.com/parsing/javascript.html
|
||||
|
||||
# handling of + before query string
|
||||
bash crawl-test.sh --errors 0 --files 6 \
|
||||
--found ut.httrack.com/parsing/escaping.html \
|
||||
--found "ut.httrack.com/parsing/foo bar30f4.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar5e1f.html" \
|
||||
--found "ut.httrack.com/parsing/foo+bar3860.html" \
|
||||
--found "ut.httrack.com/parsing/foo barae52.html" \
|
||||
--found "ut.httrack.com/parsing/foo bar7b30.html" \
|
||||
httrack http://ut.httrack.com/parsing/escaping.html
|
||||
|
||||
Reference in New Issue
Block a user