mirror of
https://github.com/xroche/httrack.git
synced 2026-06-20 09:09:02 +03:00
Compare commits
4 Commits
build/drop
...
tests/stri
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a498745df3 | ||
|
|
f875590b90 | ||
|
|
c1a8c5ffa8 | ||
|
|
46a9f6db5d |
62
.github/workflows/ci.yml
vendored
62
.github/workflows/ci.yml
vendored
@@ -320,37 +320,17 @@ jobs:
|
||||
lint:
|
||||
name: lint (shellcheck, shfmt)
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
SHFMT_VERSION: v3.8.0
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# shfmt is a pinned release binary, so it never changes: cache it keyed on
|
||||
# the version. Same rationale as the git-clang-format driver below -- avoid
|
||||
# re-downloading an unchanging file from github.com on every run.
|
||||
- name: Cache shfmt binary
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/shfmt/shfmt
|
||||
key: shfmt-${{ env.SHFMT_VERSION }}-${{ runner.arch }}
|
||||
|
||||
- name: Install linters
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends shellcheck
|
||||
# shfmt is not packaged in apt; fetch a pinned release binary (cold
|
||||
# cache only), retrying through transient errors.
|
||||
shfmt="$HOME/.cache/shfmt/shfmt"
|
||||
if [ ! -s "$shfmt" ]; then
|
||||
echo "shfmt cache MISS: fetching ${SHFMT_VERSION} from github.com"
|
||||
mkdir -p "$(dirname "$shfmt")"
|
||||
curl --retry 5 --retry-all-errors -fsSL -o "$shfmt" \
|
||||
"https://github.com/mvdan/sh/releases/download/${SHFMT_VERSION}/shfmt_${SHFMT_VERSION}_linux_$(dpkg --print-architecture)"
|
||||
else
|
||||
echo "shfmt cache HIT: using cached ${SHFMT_VERSION}"
|
||||
fi
|
||||
sudo install -m 0755 "$shfmt" /usr/local/bin/shfmt
|
||||
# noble ships shfmt 3.8.0 (universe), matching the pinned local dev
|
||||
# version; use it rather than fetching a release binary from github.com.
|
||||
sudo apt-get install -y --no-install-recommends shellcheck shfmt
|
||||
shfmt --version
|
||||
|
||||
# Lint the scripts we maintain; the legacy scripts are a separate cleanup.
|
||||
- name: shellcheck
|
||||
@@ -366,24 +346,11 @@ jobs:
|
||||
name: format (clang-format-19, changed lines)
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
# Single-source the tag so the cache key and the fetch URL can never drift.
|
||||
LLVM_TAG: llvmorg-19.1.7
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# The git-clang-format driver is pinned to an immutable release tag, so the
|
||||
# fetched file never changes: cache it keyed on the tag. raw.githubusercontent.com
|
||||
# 429-rate-limits the shared runner egress IPs, and re-downloading an unchanging
|
||||
# file every run was the only thing that could (and did) hit that limit.
|
||||
- name: Cache git-clang-format driver
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/git-clang-format/git-clang-format
|
||||
key: git-clang-format-${{ env.LLVM_TAG }}
|
||||
|
||||
- name: Install clang-format 19 (pinned, from apt.llvm.org)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
@@ -394,17 +361,9 @@ jobs:
|
||||
| sudo tee /etc/apt/sources.list.d/llvm-19.list >/dev/null
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends clang-format-19
|
||||
# Cold cache only: fetch the driver, retrying through transient 429s.
|
||||
driver="$HOME/.cache/git-clang-format/git-clang-format"
|
||||
if [ ! -s "$driver" ]; then
|
||||
echo "git-clang-format cache MISS: fetching ${LLVM_TAG} from raw.githubusercontent.com"
|
||||
mkdir -p "$(dirname "$driver")"
|
||||
curl --retry 5 --retry-all-errors -fsSL -o "$driver" \
|
||||
"https://raw.githubusercontent.com/llvm/llvm-project/${LLVM_TAG}/clang/tools/clang-format/git-clang-format"
|
||||
else
|
||||
echo "git-clang-format cache HIT: using cached ${LLVM_TAG}"
|
||||
fi
|
||||
sudo install -m 0755 "$driver" /usr/local/bin/git-clang-format
|
||||
# The clang-format-19 package ships the git-clang-format driver;
|
||||
# expose it unsuffixed so "git clang-format" finds it.
|
||||
sudo ln -sf /usr/bin/git-clang-format-19 /usr/local/bin/git-clang-format
|
||||
clang-format-19 --version
|
||||
|
||||
- name: Check formatting of changed lines
|
||||
@@ -418,10 +377,9 @@ jobs:
|
||||
--diff --extensions c,h "$base")"
|
||||
rc=$?
|
||||
set -e
|
||||
# Classify by output first: a non-empty diff means "not clean",
|
||||
# regardless of the driver's exit convention (the release-tag driver
|
||||
# exits 0 and signals via stdout; some packaged drivers exit 1 on a
|
||||
# diff). A nonzero exit with clean output is a real checker error.
|
||||
# Classify by output, not exit code: a non-empty diff means "not
|
||||
# clean" (git-clang-format may exit 0 or 1 on a diff). A nonzero exit
|
||||
# with clean output is a real checker error.
|
||||
case "$diff" in
|
||||
"" | "no modified files to format" | *"did not modify any files"*)
|
||||
if [ "$rc" -ne 0 ]; then
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# check that httrack starts
|
||||
httrack --version >/dev/null
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# charset -> UTF-8 conversion (hts_convertStringToUTF8).
|
||||
# -#3 <charset> <string> prints the string re-decoded from <charset> as UTF-8.
|
||||
conv() {
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
# --headers) forms, and an over-cap value is refused cleanly rather than
|
||||
# overrunning a fixed scratch buffer.
|
||||
|
||||
set -u
|
||||
set -euo pipefail
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_cmdline.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
@@ -26,8 +26,8 @@ run() {
|
||||
shift
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack "file://$tmp/index.html" -O "$out" --quiet -n "$@" >"$out/.log" 2>&1
|
||||
RC=$?
|
||||
RC=0
|
||||
httrack "file://$tmp/index.html" -O "$out" --quiet -n "$@" >"$out/.log" 2>&1 || RC=$?
|
||||
}
|
||||
|
||||
# crawl using exactly the given args as the only URL(s), no implicit primary URL;
|
||||
@@ -37,8 +37,8 @@ run_only() {
|
||||
shift
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack -O "$out" --quiet -n "$@" >"$out/.log" 2>&1
|
||||
RC=$?
|
||||
RC=0
|
||||
httrack -O "$out" --quiet -n "$@" >"$out/.log" 2>&1 || RC=$?
|
||||
}
|
||||
|
||||
# assert the value was accepted: clean exit and the fixture was mirrored
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# HTML entity unescaping (hts_unescapeEntitiesWithCharset).
|
||||
# -#6 <string> prints the string with entities decoded (UTF-8 output).
|
||||
ent() {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# wildcard filter engine (strjoker), the core of +/- include/exclude rules.
|
||||
# -#0 <filter> <string> prints "<string> does match <filter>" or "... does NOT match ...".
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# httrack internal hashtable autotest on 100K keys
|
||||
httrack -#7 100000
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# IDNA / punycode encode (-#4) and decode (-#5). This code has a CVE history,
|
||||
# so the edge cases below cover passthrough, round-trips, and malformed input.
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# MIME type guessing from extension (get_httptype / give_mimext).
|
||||
# -#2 <path> prints "<path> is '<mime>'" then "and its local type is '.<ext>'".
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# Offline HTML parser tests: each section crawls a file:// fixture (no network)
|
||||
# and checks which assets the parser captured and how it rewrote the links.
|
||||
|
||||
set -u
|
||||
set -euo pipefail
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_parse.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
@@ -19,7 +19,9 @@ crawl() {
|
||||
local html="$1" out="$2"
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack "file://$html" -O "$out" --quiet --near -n >"$out/.log" 2>&1
|
||||
# the crawl's own exit status is irrelevant here; the assertions below check
|
||||
# the mirrored files, so don't let set -e trip on a non-zero httrack exit
|
||||
httrack "file://$html" -O "$out" --quiet --near -n >"$out/.log" 2>&1 || true
|
||||
}
|
||||
|
||||
# assert a file with the given basename was saved somewhere under <out>
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# path simplify engine (fil_simplifie): collapses ./ and ../ segments.
|
||||
simp() {
|
||||
test "$(httrack -O /dev/null -#1 "$1")" == "simplified=$2" || exit 1
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# htssafe.h bounded string operations (driven by 'httrack -#8').
|
||||
|
||||
# Success path: every bounded op (strcpybuff/strcatbuff/strncatbuff/strlcpybuff)
|
||||
# must behave correctly. Like the other -# debug modes, a trailing token is
|
||||
# required (a bare '-#8' falls through to the usage screen).
|
||||
out=$(httrack -#8 run)
|
||||
test $? -eq 0 || exit 1
|
||||
rc=0
|
||||
out=$(httrack -#8 run) || rc=$?
|
||||
test "$rc" -eq 0 || exit 1
|
||||
test "$out" == "strsafe: OK" || exit 1
|
||||
|
||||
# Overflow path: an over-capacity write into a sized buffer must be caught by
|
||||
@@ -15,7 +18,8 @@ test "$out" == "strsafe: OK" || exit 1
|
||||
# Assert the htssafe abort signature specifically, so the test cannot pass for
|
||||
# an unrelated reason (e.g. the -#8 mode being gone and falling through to the
|
||||
# usage screen, which also exits non-zero).
|
||||
err=$(httrack -#8 overflow "this string is far too long for the buffer" 2>&1)
|
||||
# the bounded macro aborts (non-zero exit), so don't let set -e trip on it
|
||||
err=$(httrack -#8 overflow "this string is far too long for the buffer" 2>&1) || true
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"overflow while copying"*) ;;
|
||||
@@ -26,7 +30,7 @@ esac
|
||||
# capacity (4 bytes into a 4-byte buffer), so this also pins the boundary: a
|
||||
# '<=' off-by-one in the capacity check would let it through (and print "NOT
|
||||
# aborted"). Match the specific htsbuff abort message, not just any assert.
|
||||
err=$(httrack -#8 overflow-buff "abcd" 2>&1)
|
||||
err=$(httrack -#8 overflow-buff "abcd" 2>&1) || true
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "htsbuff over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"htsbuff append overflow"*) ;;
|
||||
|
||||
@@ -8,6 +8,9 @@
|
||||
# POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms (such as
|
||||
# process substitution) despite the #!/bin/bash above.
|
||||
|
||||
# pipefail is a bashism; keep to POSIX set flags ($(BASH) may be /bin/sh here).
|
||||
set -eu
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
gen="$top_srcdir/man/makeman.sh"
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
# 2. change a source file, re-mirror -> the update must pick up the new content
|
||||
# (guards the update decision that reads the cached metadata).
|
||||
|
||||
set -eu
|
||||
set -euo pipefail
|
||||
|
||||
site=$(mktemp -d)
|
||||
out=$(mktemp -d)
|
||||
@@ -42,7 +42,7 @@ test "$(errors)" = 0 || {
|
||||
exit 1
|
||||
}
|
||||
for suffix in a.html sub/b.html; do
|
||||
find "$out" -path "*/$suffix" | grep -q . || {
|
||||
test -n "$(find "$out" -path "*/$suffix" -print -quit)" || {
|
||||
echo "missing $suffix after update" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
bash crawl-test.sh --errors 0 --files 5 httrack http://ut.httrack.com/simple/basic.html
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
bash crawl-test.sh --errors 0 --files 3 \
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# unicode tests
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# unicode tests
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# http://code.google.com/p/httrack/issues/detail?id=42&can=1
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
# http://code.google.com/p/httrack/issues/detail?id=4&can=1
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
bash check-network.sh || ! echo "skipping online unit tests" || exit 77
|
||||
|
||||
if test "$HTTPS_SUPPORT" == "no"; then
|
||||
if test "${HTTPS_SUPPORT:-}" == "no"; then
|
||||
echo "no https support compiled, skipping"
|
||||
exit 77
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user