mirror of
https://github.com/xroche/httrack.git
synced 2026-06-28 04:57:49 +03:00
Compare commits
1 Commits
master
...
fix/filter
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0edf5e3cde |
@@ -76,7 +76,8 @@ int fa_strjoker(int type, char **filters, int nfil, const char *nom, LLint * siz
|
||||
}
|
||||
if (size)
|
||||
sz = *size;
|
||||
if (strjoker(nom, filters[i] + filteroffs, &sz, size_flag)) { // reconnu
|
||||
/* size unknown (scan time): no size pointer => size tests stay neutral */
|
||||
if (strjoker(nom, filters[i] + filteroffs, size ? &sz : NULL, size_flag)) {
|
||||
if (size)
|
||||
if (sz != *size)
|
||||
sizelimit = sz;
|
||||
|
||||
@@ -524,6 +524,30 @@ static int st_filter(httrackp *opt, int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Size-aware filter verdict via fa_strjoker: a negative <size> means the size
|
||||
is still unknown (scan time), so a size rule like -*.jpg*[<10] must stay
|
||||
neutral. */
|
||||
static int st_filtersize(httrackp *opt, int argc, char **argv) {
|
||||
LLint sz;
|
||||
int size_flag = 0, verdict, known;
|
||||
|
||||
(void) opt;
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "filtersize: needs <size> <string> <filter> [filter...]\n");
|
||||
return 1;
|
||||
}
|
||||
known = (argv[0][0] != '-'); /* "-1"/"-" => size unknown */
|
||||
sz = known ? (LLint) strtoll(argv[0], NULL, 10) : -1;
|
||||
verdict = fa_strjoker(0, &argv[2], argc - 2, argv[1], known ? &sz : NULL,
|
||||
known ? &size_flag : NULL, NULL);
|
||||
printf("verdict=%s size_flag=%d\n",
|
||||
verdict > 0 ? "allowed"
|
||||
: verdict < 0 ? "forbidden"
|
||||
: "unknown",
|
||||
size_flag);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int st_simplify(httrackp *opt, int argc, char **argv) {
|
||||
(void) opt;
|
||||
if (argc < 1) {
|
||||
@@ -1038,6 +1062,9 @@ static const struct selftest_entry {
|
||||
} selftests[] = {
|
||||
{"filter", "<pattern> <string>", "match a string against a wildcard filter",
|
||||
st_filter},
|
||||
{"filtersize", "<size> <string> <filter>...",
|
||||
"size-aware filter verdict (negative size = unknown/scan time)",
|
||||
st_filtersize},
|
||||
{"simplify", "<path>", "collapse ./ and ../ in a path", st_simplify},
|
||||
{"mime", "<filename>", "MIME type for a filename", st_mime},
|
||||
{"charset", "<charset> <string>",
|
||||
|
||||
@@ -71,3 +71,24 @@ nomatch '*[\[\]]' '[' # not matched, despite the docs
|
||||
match '*[\[\]]' ']' # only via the empty class-match + trailing ']'
|
||||
match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']'
|
||||
nomatch '*[\[\]]' '[]x'
|
||||
|
||||
# Size-based rules (-#test=filtersize <size> <string> <filter...>): a negative size
|
||||
# means the size is still unknown (scan time). A size exclusion must stay neutral
|
||||
# then, so the file is fetched and only cancelled once its size is known (#143).
|
||||
fsize() {
|
||||
local want="$1"
|
||||
shift
|
||||
test "$(httrack -O /dev/null -#test=filtersize "$@")" == "$want" || exit 1
|
||||
}
|
||||
fsize 'verdict=allowed size_flag=0' -1 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # scan time: keep
|
||||
fsize 'verdict=forbidden size_flag=1' 5 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # <10KB: cancel
|
||||
fsize 'verdict=allowed size_flag=1' 20 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # >=10KB: keep
|
||||
fsize 'verdict=forbidden size_flag=0' -1 foo.txt -* '+*.jpg' '-*.jpg*[<10]' # not a jpg
|
||||
|
||||
# [name]/[file]/[path] never span '?' mid-string; a trailing query is still
|
||||
# tolerated by the global '?' rule (same as plain *.aspx), not the class (#144).
|
||||
nomatch '*[path]/end' 'a?b/end'
|
||||
nomatch '*[file]end' 'foo?xend'
|
||||
nomatch '*[name]X' 'abc?X'
|
||||
match '*[file]' 'foo?x=1' # trailing query: tolerated, as for *.aspx
|
||||
match '*.aspx' 'page.aspx?y=2'
|
||||
|
||||
Reference in New Issue
Block a user