mirror of
https://github.com/xroche/httrack.git
synced 2026-06-28 21:17:57 +03:00
Mechanical pass: run shfmt -i 4 over the whole tracked shell tree (the test harness .test files, the regen generators, webhttrack, the CGI search helper, and the build/dist scripts) so they share one style. shfmt also normalised backticks to $(...) and $[..] to $((..)). No behaviour change: arithmetic is preserved exactly, non-ASCII bytes are untouched, and the full make check suite still passes. The tab indented .test files become 4-space indented, hence the wide diff. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>
52 lines
1.5 KiB
Bash
Executable File
52 lines
1.5 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
|
|
set -euo pipefail
|
|
|
|
# HTML entity unescaping (hts_unescapeEntitiesWithCharset).
|
|
# -#6 <string> prints the string with entities decoded (UTF-8 output).
|
|
ent() {
|
|
test "$(httrack -O /dev/null -#6 "$1")" == "$2" || exit 1
|
|
}
|
|
# crash probe: malformed input must exit cleanly, not abort.
|
|
runs() {
|
|
httrack -O /dev/null -#6 "$1" >/dev/null 2>&1 || exit 1
|
|
}
|
|
|
|
# named entities
|
|
ent '&' '&'
|
|
ent '<>' '<>'
|
|
ent 'é' 'é'
|
|
|
|
# numeric: decimal and hex
|
|
ent 'AB' 'AB'
|
|
ent 'A' 'A'
|
|
ent 'é' 'é'
|
|
|
|
# malformed numeric reference (decimal 'e9' has no digits) is left verbatim
|
|
ent '&#e9;' '&#e9;'
|
|
|
|
# U+0000 is not emitted; the reference is left verbatim
|
|
ent '�' '�'
|
|
|
|
# unknown entity is left verbatim
|
|
ent '&unknownentity;' '&unknownentity;'
|
|
|
|
# no entities: pass-through
|
|
ent 'plain text' 'plain text'
|
|
|
|
# decoding is a single pass: &amp; -> & (not &)
|
|
ent '&amp;' '&'
|
|
|
|
# KNOWN BUG: (U+00A0) decodes to a plain space (0x20), not C2 A0. The
|
|
# engine forces 160 -> 32 in htsencoding.c (FIXME hack). Locked here; if that
|
|
# hack is ever removed, update this to expect the C2 A0 byte.
|
|
ent ' ' ' '
|
|
|
|
# overflowing numeric reference must not crash (value far above U+10FFFF)
|
|
runs '�'
|
|
|
|
# original compound case. NOTE: the space after '&foo;' is the known bug
|
|
# above (U+00A0 -> 0x20), not a real space in the source.
|
|
ent '&foo; thé&café&#e9;もののけ姫' '&foo; thé&café&#e9;もののけ姫'
|