mirror of
https://github.com/xroche/httrack.git
synced 2026-06-28 13:07:35 +03:00
Mechanical pass: run shfmt -i 4 over the whole tracked shell tree (the test harness .test files, the regen generators, webhttrack, the CGI search helper, and the build/dist scripts) so they share one style. shfmt also normalised backticks to $(...) and $[..] to $((..)). No behaviour change: arithmetic is preserved exactly, non-ASCII bytes are untouched, and the full make check suite still passes. The tab indented .test files become 4-space indented, hence the wide diff. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>
74 lines
2.2 KiB
Bash
Executable File
74 lines
2.2 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
|
|
set -euo pipefail
|
|
|
|
# wildcard filter engine (strjoker), the core of +/- include/exclude rules.
|
|
# -#0 <filter> <string> prints "<string> does match <filter>" or "... does NOT match ...".
|
|
|
|
match() {
|
|
test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does match $1" || exit 1
|
|
}
|
|
nomatch() {
|
|
test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does NOT match $1" || exit 1
|
|
}
|
|
|
|
# bare star matches everything
|
|
match '*' 'anything/at/all'
|
|
|
|
# prefix / suffix
|
|
match 'foo*' 'foobar'
|
|
nomatch 'foo*' 'xfoobar'
|
|
match '*.gif' 'a/b/c.gif'
|
|
|
|
# extension match is case-insensitive
|
|
match '*.GIF' 'a.gif'
|
|
|
|
# character classes
|
|
match '*[A-Z].txt' 'B.txt'
|
|
nomatch '*[A-Z].txt' 'b.txt'
|
|
match '*[0-9]' '5'
|
|
nomatch '*[0-9]' 'x'
|
|
|
|
# comma-separated class: both ranges are active, the comma is not matched
|
|
# literally and a char in neither range fails
|
|
match '*[A-Z,0-9]' 'Q'
|
|
match '*[A-Z,0-9]' '3'
|
|
nomatch '*[A-Z,0-9]' 'a'
|
|
|
|
# named groups: [file] stops at '/', [path] spans it
|
|
match '*[file].html' 'foo.html'
|
|
nomatch '*[file].html' 'foo/bar.html'
|
|
match '*[path]x' 'a/b/x'
|
|
|
|
# *[] means "nothing more after the star"
|
|
nomatch '*[]' 'abc'
|
|
|
|
# multiple stars
|
|
match '*foo*bar' 'foozbar'
|
|
|
|
# '?' is the query-string marker, not a single-char wildcard
|
|
nomatch 'a?c' 'abc'
|
|
|
|
# backslash escapes a metacharacter inside a class so it is matched literally.
|
|
# Quirk: the decoder also adds the backslash itself to the set, so '\X' matches
|
|
# both X and '\'. These assertions pin that behavior.
|
|
match '*[\*]' '*'
|
|
match '*[\*]' "\\"
|
|
nomatch '*[\*]' 'a'
|
|
match '*[\\]' "\\"
|
|
nomatch '*[\\]' 'a'
|
|
match '*[\[]' '['
|
|
match '*[\[]' "\\"
|
|
nomatch '*[\[]' 'a'
|
|
|
|
# A literal ']' cannot be a class member: the class parser stops at the first
|
|
# ']', escaped or not. So '*[\[\]]' does NOT mean "the [ or ] character" as the
|
|
# filter guide claims (GitHub #148); it parses as the class {'[','\'} followed
|
|
# by a trailing literal ']'. These assertions document the current (buggy)
|
|
# behavior so any future matcher fix is a deliberate, visible change.
|
|
nomatch '*[\[\]]' '[' # not matched, despite the docs
|
|
match '*[\[\]]' ']' # only via the empty class-match + trailing ']'
|
|
match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']'
|
|
nomatch '*[\[\]]' '[]x'
|