mirror of
https://github.com/xroche/httrack.git
synced 2026-06-16 23:33:18 +03:00
Compare commits
53 Commits
fix/webhtt
...
ci/cache-g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90847cf083 | ||
|
|
1611add5a9 | ||
|
|
92f4ea044b | ||
|
|
4344801983 | ||
|
|
25b9a53c89 | ||
|
|
5a716a0e30 | ||
|
|
4bc6855213 | ||
|
|
fe8bd59d19 | ||
|
|
83d813eb7f | ||
|
|
31eead95df | ||
|
|
1f29ed41db | ||
|
|
9db360e5fd | ||
|
|
88bfcff10c | ||
|
|
1df45fc231 | ||
|
|
3a0f5779dd | ||
|
|
46fd973e0b | ||
|
|
ddc39b7dc0 | ||
|
|
085937b305 | ||
|
|
594820d3eb | ||
|
|
36a9f5a827 | ||
|
|
20880c1a4d | ||
|
|
a6fc0e9dab | ||
|
|
f227135d16 | ||
|
|
223564eaca | ||
|
|
7db49a64b6 | ||
|
|
f1c04c10eb | ||
|
|
17fc54869d | ||
|
|
d2e43549d8 | ||
|
|
a9b16d96ea | ||
|
|
4ed828ff78 | ||
|
|
82ace34c4d | ||
|
|
3970eb3706 | ||
|
|
d3c41b31e8 | ||
|
|
f8367eeac7 | ||
|
|
9279a4b349 | ||
|
|
b52e8c4c0f | ||
|
|
665f51d1a0 | ||
|
|
e4e5d4699a | ||
|
|
a50691c0f8 | ||
|
|
5f96e86818 | ||
|
|
6002bc20ca | ||
|
|
bdbc741597 | ||
|
|
d0a1b957cd | ||
|
|
6c329744e7 | ||
|
|
1375ef97d7 | ||
|
|
13207a92fc | ||
|
|
d3eecbf211 | ||
|
|
7ec77156d0 | ||
|
|
3cd8197cc7 | ||
|
|
37f50bb925 | ||
|
|
d8d1eafcd1 | ||
|
|
80d0e90819 | ||
|
|
8dde8dc03c |
269
.github/workflows/ci.yml
vendored
269
.github/workflows/ci.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
env:
|
||||
CC: ${{ matrix.cc }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
@@ -61,6 +61,169 @@ jobs:
|
||||
if: failure()
|
||||
run: cat tests/test-suite.log 2>/dev/null || true
|
||||
|
||||
# Portability: build and test on macOS (Darwin/clang) on a native runner --
|
||||
# no VM. The tree has no __APPLE__ branches, so Darwin exercises the
|
||||
# generic-Unix path on a second libc and kernel. brew's openssl@3 is keg-only,
|
||||
# so point configure at it; everything else is in the SDK or default paths.
|
||||
macos:
|
||||
name: build (macOS arm64, clang)
|
||||
runs-on: macos-14
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
set -euo pipefail
|
||||
brew install autoconf automake libtool autoconf-archive
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ssl="$(brew --prefix openssl@3)"
|
||||
autoreconf -fi
|
||||
./configure CPPFLAGS="-I${ssl}/include" LDFLAGS="-L${ssl}/lib"
|
||||
|
||||
- name: Build
|
||||
run: make -j"$(sysctl -n hw.ncpu)"
|
||||
|
||||
- name: Test
|
||||
run: make check
|
||||
|
||||
- name: Print the test log on failure
|
||||
if: failure()
|
||||
run: cat tests/test-suite.log 2>/dev/null || true
|
||||
|
||||
# Portability/hardening: 32-bit (i386) build on the x86-64 runner via multilib
|
||||
# -- no extra hardware. Exercises the 32-bit size_t/pointer ABI, where size
|
||||
# and bounds math can truncate or wrap in ways 64-bit never reveals (the axis
|
||||
# the overflow-safe bounds work targets). --build (not --host) keeps configure
|
||||
# out of cross mode, so the i386 binary still runs the test suite here.
|
||||
linux-i386:
|
||||
name: build (linux i386, gcc -m32)
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install build dependencies (multilib + 32-bit libs)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo dpkg --add-architecture i386
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential gcc-multilib autoconf automake libtool \
|
||||
autoconf-archive zlib1g-dev:i386 libssl-dev:i386
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
set -euo pipefail
|
||||
autoreconf -fi
|
||||
./configure --build=i686-pc-linux-gnu CC="gcc -m32"
|
||||
|
||||
- name: Build
|
||||
run: make -j"$(nproc)"
|
||||
|
||||
- name: Test
|
||||
run: make check
|
||||
|
||||
- name: Print the test log on failure
|
||||
if: failure()
|
||||
run: cat tests/test-suite.log 2>/dev/null || true
|
||||
|
||||
# Memory safety: build and run the suite under AddressSanitizer +
|
||||
# UndefinedBehaviorSanitizer. The offline engine self-tests drive the parsers
|
||||
# that chew on untrusted crawled input (charset, mime, HTML, entities, IDNA,
|
||||
# filters, cache) straight through the sanitizers, so a buffer overrun,
|
||||
# use-after-free, or signed overflow there fails the build instead of slipping
|
||||
# past a plain -O2 build. gcc's runtimes; one job is enough (the bug class is
|
||||
# arch-independent and the matrix already covers compile portability).
|
||||
sanitize:
|
||||
name: sanitize (ASan+UBSan, gcc)
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential autoconf automake libtool autoconf-archive \
|
||||
zlib1g-dev libssl-dev
|
||||
|
||||
- name: Configure (sanitized)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
autoreconf -fi
|
||||
./configure CC=gcc \
|
||||
CFLAGS="-fsanitize=address,undefined -fno-sanitize-recover=all -g -O1 -fno-omit-frame-pointer" \
|
||||
LDFLAGS="-fsanitize=address,undefined"
|
||||
|
||||
- name: Build
|
||||
run: make -j"$(nproc)"
|
||||
|
||||
- name: Test (sanitized)
|
||||
# Leaks at exit are out of scope (the CLI frees little on the way out);
|
||||
# we want memory-safety errors, so turn leak detection off and make every
|
||||
# other finding abort the run.
|
||||
#
|
||||
# Poison fresh allocations with 0xCA and freed blocks with 0xCB (decimal
|
||||
# 202/203) so memory never reads back as accidental zeros: a missing-NUL
|
||||
# fread buffer then runs strlen off into the redzone instead of stopping
|
||||
# at a lucky zero. Distinct bytes tell the two apart in a dump (0xCA =
|
||||
# uninitialized, 0xCB = use-after-free). ASan caps its malloc fill at 4096
|
||||
# bytes by default, so max_malloc_fill_size lifts it to cover large cache
|
||||
# buffers; free_fill flags use-after-free reads.
|
||||
env:
|
||||
ASAN_OPTIONS: detect_leaks=0:abort_on_error=1:halt_on_error=1:strict_string_checks=1:malloc_fill_byte=202:max_malloc_fill_size=2147483647:free_fill_byte=203:max_free_fill_size=2147483647
|
||||
UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1
|
||||
run: make check
|
||||
|
||||
- name: Print the test log on failure
|
||||
if: failure()
|
||||
run: cat tests/test-suite.log 2>/dev/null || true
|
||||
|
||||
# Optional-dependency build: compile and test with HTTPS/OpenSSL disabled --
|
||||
# the configuration users on minimal systems build, and one libssl is not even
|
||||
# installed here so configure cannot silently re-enable it. The matrix above
|
||||
# always has libssl, so the #if HTS_USEOPENSSL branches would otherwise never
|
||||
# be compiled and could rot unnoticed.
|
||||
no-ssl:
|
||||
name: build (no openssl, --disable-https)
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install build dependencies (no libssl)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential autoconf automake libtool autoconf-archive zlib1g-dev
|
||||
|
||||
- name: Configure (https disabled)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
autoreconf -fi
|
||||
./configure --disable-https
|
||||
|
||||
- name: Build
|
||||
run: make -j"$(nproc)"
|
||||
|
||||
- name: Test
|
||||
run: make check
|
||||
|
||||
- name: Print the test log on failure
|
||||
if: failure()
|
||||
run: cat tests/test-suite.log 2>/dev/null || true
|
||||
|
||||
# Validate the Debian packaging via the same script maintainers release with.
|
||||
# One amd64/gcc run is enough: packaging (control/rules/manifest/lintian/quilt
|
||||
# source build) is arch- and compiler-independent, and the build matrix above
|
||||
@@ -69,7 +232,7 @@ jobs:
|
||||
name: deb package (lintian)
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
@@ -84,8 +247,44 @@ jobs:
|
||||
|
||||
# --unsigned: CI has no GPG key (also skips the release sig/checksums).
|
||||
# debuild builds every package, then lintian gates on errors.
|
||||
#
|
||||
# DEB_BUILD_OPTIONS trims work CI does not need (release builds via
|
||||
# mkdeb.sh are untouched): noautodbgsym drops the -dbgsym packages whose
|
||||
# LTO payloads are slow to compress and that CI never ships; parallel uses
|
||||
# every core. We let debuild run its test pass -- the only one now that
|
||||
# mkdeb no longer runs its own -- so CI exercises the packaged tests.
|
||||
- name: Build Debian packages
|
||||
run: bash tools/mkdeb.sh --unsigned --no-release-artifacts
|
||||
run: |
|
||||
export DEB_BUILD_OPTIONS="noautodbgsym parallel=$(nproc)"
|
||||
bash tools/mkdeb.sh --unsigned --no-release-artifacts
|
||||
|
||||
# Release-tarball integrity: `make distcheck` rolls the dist tarball, then
|
||||
# configures, builds and tests it out-of-tree from a read-only source tree and
|
||||
# checks nothing is left behind. Catches a file referenced in *_SOURCES or
|
||||
# EXTRA_DIST but missing from the tarball -- the same "ships broken to users"
|
||||
# class as a stale committed Makefile.in.
|
||||
distcheck:
|
||||
name: distcheck (release tarball)
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential autoconf automake libtool autoconf-archive \
|
||||
zlib1g-dev libssl-dev
|
||||
|
||||
- name: distcheck
|
||||
run: |
|
||||
set -euo pipefail
|
||||
autoreconf -fi
|
||||
./configure
|
||||
make -j"$(nproc)" distcheck
|
||||
|
||||
dco:
|
||||
name: DCO sign-off
|
||||
@@ -93,7 +292,7 @@ jobs:
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -121,20 +320,37 @@ jobs:
|
||||
lint:
|
||||
name: lint (shellcheck, shfmt)
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
SHFMT_VERSION: v3.8.0
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# shfmt is a pinned release binary, so it never changes: cache it keyed on
|
||||
# the version. Same rationale as the git-clang-format driver below -- avoid
|
||||
# re-downloading an unchanging file from github.com on every run.
|
||||
- name: Cache shfmt binary
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/shfmt/shfmt
|
||||
key: shfmt-${{ env.SHFMT_VERSION }}-${{ runner.arch }}
|
||||
|
||||
- name: Install linters
|
||||
env:
|
||||
SHFMT_VERSION: v3.8.0
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends shellcheck
|
||||
# shfmt is not packaged in apt; fetch a pinned release binary.
|
||||
curl -fsSL -o /tmp/shfmt \
|
||||
"https://github.com/mvdan/sh/releases/download/${SHFMT_VERSION}/shfmt_${SHFMT_VERSION}_linux_$(dpkg --print-architecture)"
|
||||
sudo install -m 0755 /tmp/shfmt /usr/local/bin/shfmt
|
||||
# shfmt is not packaged in apt; fetch a pinned release binary (cold
|
||||
# cache only), retrying through transient errors.
|
||||
shfmt="$HOME/.cache/shfmt/shfmt"
|
||||
if [ ! -s "$shfmt" ]; then
|
||||
echo "shfmt cache MISS: fetching ${SHFMT_VERSION} from github.com"
|
||||
mkdir -p "$(dirname "$shfmt")"
|
||||
curl --retry 5 --retry-all-errors -fsSL -o "$shfmt" \
|
||||
"https://github.com/mvdan/sh/releases/download/${SHFMT_VERSION}/shfmt_${SHFMT_VERSION}_linux_$(dpkg --print-architecture)"
|
||||
else
|
||||
echo "shfmt cache HIT: using cached ${SHFMT_VERSION}"
|
||||
fi
|
||||
sudo install -m 0755 "$shfmt" /usr/local/bin/shfmt
|
||||
|
||||
# Lint the scripts we maintain; the legacy scripts are a separate cleanup.
|
||||
- name: shellcheck
|
||||
@@ -150,11 +366,24 @@ jobs:
|
||||
name: format (clang-format-19, changed lines)
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
# Single-source the tag so the cache key and the fetch URL can never drift.
|
||||
LLVM_TAG: llvmorg-19.1.7
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# The git-clang-format driver is pinned to an immutable release tag, so the
|
||||
# fetched file never changes: cache it keyed on the tag. raw.githubusercontent.com
|
||||
# 429-rate-limits the shared runner egress IPs, and re-downloading an unchanging
|
||||
# file every run was the only thing that could (and did) hit that limit.
|
||||
- name: Cache git-clang-format driver
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/git-clang-format/git-clang-format
|
||||
key: git-clang-format-${{ env.LLVM_TAG }}
|
||||
|
||||
- name: Install clang-format 19 (pinned, from apt.llvm.org)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
@@ -165,11 +394,17 @@ jobs:
|
||||
| sudo tee /etc/apt/sources.list.d/llvm-19.list >/dev/null
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends clang-format-19
|
||||
# git-clang-format driver, pinned to an immutable release tag (not a
|
||||
# moving branch) since we curl and then execute it.
|
||||
sudo curl -fsSL -o /usr/local/bin/git-clang-format \
|
||||
https://raw.githubusercontent.com/llvm/llvm-project/llvmorg-19.1.7/clang/tools/clang-format/git-clang-format
|
||||
sudo chmod 0755 /usr/local/bin/git-clang-format
|
||||
# Cold cache only: fetch the driver, retrying through transient 429s.
|
||||
driver="$HOME/.cache/git-clang-format/git-clang-format"
|
||||
if [ ! -s "$driver" ]; then
|
||||
echo "git-clang-format cache MISS: fetching ${LLVM_TAG} from raw.githubusercontent.com"
|
||||
mkdir -p "$(dirname "$driver")"
|
||||
curl --retry 5 --retry-all-errors -fsSL -o "$driver" \
|
||||
"https://raw.githubusercontent.com/llvm/llvm-project/${LLVM_TAG}/clang/tools/clang-format/git-clang-format"
|
||||
else
|
||||
echo "git-clang-format cache HIT: using cached ${LLVM_TAG}"
|
||||
fi
|
||||
sudo install -m 0755 "$driver" /usr/local/bin/git-clang-format
|
||||
clang-format-19 --version
|
||||
|
||||
- name: Check formatting of changed lines
|
||||
|
||||
67
AGENTS.md
Normal file
67
AGENTS.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# AGENTS.md — working in the HTTrack tree
|
||||
|
||||
Policy and PR etiquette live in [CONTRIBUTING.md](CONTRIBUTING.md). This file is
|
||||
the operational checklist: toolchain, invariants, and how to ship a change.
|
||||
|
||||
## Build & test
|
||||
- Fresh clone first: `git submodule update --init src/coucal`
|
||||
- `bash configure && make && make check`
|
||||
|
||||
## Hard invariants
|
||||
- **Toolchain edit** (`configure.ac`, any `Makefile.am`, `m4/`) → run
|
||||
`autoreconf -fi` and commit the regenerated tracked files. The repo ships the
|
||||
generated `configure`/`Makefile.in` so users build without autotools; CI does
|
||||
**not** catch staleness.
|
||||
- **Format only changed lines** with `git clang-format` (clang-format 19). Never
|
||||
reformat untouched code: the engine was formatted by an old tool and won't
|
||||
round-trip.
|
||||
- **Byte-safe edits.** Files with raw high bytes are ISO-8859-1 (French
|
||||
comments). Edit them byte-wise (`perl -0pi`, `sed`), not through a tool that
|
||||
re-encodes to UTF-8 and corrupts them.
|
||||
|
||||
## Security (HTTrack parses hostile input off the network)
|
||||
- Bounds-check every copy. Overflow-safe form: put the untrusted value alone,
|
||||
`untrusted < limit - controlled` — never `controlled + untrusted < limit`,
|
||||
which can wrap and pass.
|
||||
|
||||
## Code & prose
|
||||
- Be terse. Comment the why, in English; translate French comments you touch.
|
||||
- Strip AI tells from prose (em-dash overuse, rule-of-three, filler, vague
|
||||
attributions). Ref: Wikipedia "Signs of AI writing". Claude Code: `/humanizer`.
|
||||
- Behavior change → add a test. Fast path: a hidden `httrack -#N` debug
|
||||
subcommand (`htscoremain.c`) driven by a `tests/NN_*.test`, over a slow crawl.
|
||||
|
||||
## Review your change adversarially (strongly suggested)
|
||||
Before pushing, and when reviewing others, don't skim for bugs:
|
||||
- **One invariant at a time.** Name a property the diff must preserve (bounds
|
||||
hold, cache/wire format unchanged, no use-after-free, ABI stable), then
|
||||
construct inputs that would break it. "General correctness" is not a charter.
|
||||
- **Audit tests against the spec, not the code.** For each new test ask: "what
|
||||
buggy path would still pass this?" If you can build one, the test is
|
||||
confirmation-biased: assertions copied from observed output lock bugs in.
|
||||
- **Risk areas need runtime probes.** Touching hostile-input parsing, struct
|
||||
layout/ABI, cache/wire format, or a security path? A static or unit check
|
||||
isn't enough; exercise the wrong behavior at runtime. Claude Code:
|
||||
`/review-recipe`.
|
||||
|
||||
## Commits
|
||||
- **Sign-off is mandatory.** Every commit carries a `Signed-off-by` trailer:
|
||||
`git commit -s` (DCO, CI-enforced — unsigned commits are rejected).
|
||||
- **Co-Authored-By is mandatory for AI-assisted commits.** Carry a
|
||||
`Co-Authored-By:` trailer naming the assistant. Attribute there, never in a
|
||||
PR-body footer.
|
||||
- PRs land as a merge commit; every commit on the branch goes onto master, so
|
||||
keep each commit message clean and meaningful.
|
||||
|
||||
## PR descriptions
|
||||
- Plain concise prose; lead with what changed and why. No What/Why/How template.
|
||||
- Title names the problem, not the implementation.
|
||||
- Don't restate the diff — give what it can't show: motivation, context,
|
||||
tradeoffs, risk.
|
||||
- Length tracks the change: a typo is one sentence; a security fix earns a writeup.
|
||||
- Verify claims against the code before you write them; flag drift, don't repeat it.
|
||||
- Don't hard-wrap (GitHub reflows). No "Generated with Claude" footer. Run the
|
||||
prose through `/humanizer`.
|
||||
|
||||
## Toolchain
|
||||
C · clang-format-19 · autoreconf · shfmt + shellcheck (shell) · black + flake8 (Python)
|
||||
@@ -1,12 +1,15 @@
|
||||
# Contributing to HTTrack
|
||||
|
||||
HTTrack is small and old. Keep changes easy to review and safe to merge.
|
||||
HTTrack is small and old. Keep changes easy to review and safe to merge. Working
|
||||
with an AI assistant? The operational checklist is [AGENTS.md](AGENTS.md).
|
||||
|
||||
## Pull requests
|
||||
|
||||
- One change per PR. Small diffs merge fast.
|
||||
- PRs are squash-merged: the title and description become the commit message, so
|
||||
explain *why*.
|
||||
- PRs land as a merge commit, so the branch's commits go onto master as-is: keep
|
||||
each commit message clean and explain *why*.
|
||||
- Be terse in the PR title and description: name the problem, not the fix, don't
|
||||
restate the diff, and calibrate length to the change.
|
||||
- Add or update tests for engine changes (`tests/`), and keep CI green.
|
||||
|
||||
## Style
|
||||
@@ -30,6 +33,9 @@ Welcome, and nothing to disclose. Two rules:
|
||||
- **Own every line** as if you wrote it. Can't explain it in review? Not ready.
|
||||
- **Don't push your work onto reviewers.** A raw generated patch a maintainer has
|
||||
to vet from scratch will be closed.
|
||||
- **Attribution is mandatory.** AI-assisted commits must carry a
|
||||
`Co-Authored-By:` trailer naming the assistant, not a footer in the PR
|
||||
description.
|
||||
|
||||
The sign-off covers AI-assisted code too.
|
||||
|
||||
|
||||
@@ -257,6 +257,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
|
||||
52
configure
vendored
52
configure
vendored
@@ -695,6 +695,7 @@ HAVE_VISIBILITY
|
||||
CFLAG_VISIBILITY
|
||||
LDFLAGS_PIE
|
||||
CFLAGS_PIE
|
||||
LIBC_FORCE_LINK
|
||||
DEFAULT_LDFLAGS
|
||||
DEFAULT_CFLAGS
|
||||
VERSION_INFO
|
||||
@@ -3684,7 +3685,9 @@ fi
|
||||
|
||||
|
||||
|
||||
VERSION_INFO="2:49:0"
|
||||
# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to
|
||||
# 128), an incompatible ABI break, so bump current and reset revision/age.
|
||||
VERSION_INFO="3:0:0"
|
||||
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
|
||||
printf %s "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
|
||||
@@ -15871,6 +15874,53 @@ esac
|
||||
fi
|
||||
|
||||
|
||||
# Force libc back into DT_NEEDED for libraries that reach it only through
|
||||
# libhttrack (libhtsjava, the libtest callbacks), but only with a GNU-style
|
||||
# linker; Apple ld rejects these flags and links libSystem unconditionally.
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the linker accepts -Wl,--push-state,--no-as-needed,-lc,--pop-state" >&5
|
||||
printf %s "checking whether the linker accepts -Wl,--push-state,--no-as-needed,-lc,--pop-state... " >&6; }
|
||||
if test ${ax_cv_check_ldflags___Wl___push_state___no_as_needed__lc___pop_state+y}
|
||||
then :
|
||||
printf %s "(cached) " >&6
|
||||
else case e in #(
|
||||
e)
|
||||
ax_check_save_flags=$LDFLAGS
|
||||
LDFLAGS="$LDFLAGS -Wl,--push-state,--no-as-needed,-lc,--pop-state"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"
|
||||
then :
|
||||
ax_cv_check_ldflags___Wl___push_state___no_as_needed__lc___pop_state=yes
|
||||
else case e in #(
|
||||
e) ax_cv_check_ldflags___Wl___push_state___no_as_needed__lc___pop_state=no ;;
|
||||
esac
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.beam \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
LDFLAGS=$ax_check_save_flags ;;
|
||||
esac
|
||||
fi
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_ldflags___Wl___push_state___no_as_needed__lc___pop_state" >&5
|
||||
printf "%s\n" "$ax_cv_check_ldflags___Wl___push_state___no_as_needed__lc___pop_state" >&6; }
|
||||
if test "x$ax_cv_check_ldflags___Wl___push_state___no_as_needed__lc___pop_state" = xyes
|
||||
then :
|
||||
LIBC_FORCE_LINK="-Wl,--push-state,--no-as-needed,-lc,--pop-state"
|
||||
else case e in #(
|
||||
e) : ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
|
||||
|
||||
### PIE
|
||||
CFLAGS_PIE=""
|
||||
LDFLAGS_PIE=""
|
||||
|
||||
11
configure.ac
11
configure.ac
@@ -29,7 +29,9 @@ AC_CONFIG_SRCDIR(src/httrack.c)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
AM_INIT_AUTOMAKE([subdir-objects])
|
||||
VERSION_INFO="2:49:0"
|
||||
# 3:0:0: htsblk layout changed (contenttype/charset/contentencoding widened to
|
||||
# 128), an incompatible ABI break, so bump current and reset revision/age.
|
||||
VERSION_INFO="3:0:0"
|
||||
AM_MAINTAINER_MODE
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
@@ -91,6 +93,13 @@ AX_CHECK_LINK_FLAG([-Wl,--no-undefined], [DEFAULT_LDFLAGS="$DEFAULT_LDFLAGS -Wl,
|
||||
AX_CHECK_LINK_FLAG([-Wl,-z,relro,-z,now], [DEFAULT_LDFLAGS="$DEFAULT_LDFLAGS -Wl,-z,relro,-z,now"])
|
||||
AX_CHECK_LINK_FLAG([-Wl,-z,noexecstack], [DEFAULT_LDFLAGS="$DEFAULT_LDFLAGS -Wl,-z,noexecstack"])
|
||||
|
||||
# Force libc back into DT_NEEDED for libraries that reach it only through
|
||||
# libhttrack (libhtsjava, the libtest callbacks), but only with a GNU-style
|
||||
# linker; Apple ld rejects these flags and links libSystem unconditionally.
|
||||
AX_CHECK_LINK_FLAG([-Wl,--push-state,--no-as-needed,-lc,--pop-state],
|
||||
[LIBC_FORCE_LINK="-Wl,--push-state,--no-as-needed,-lc,--pop-state"])
|
||||
AC_SUBST([LIBC_FORCE_LINK])
|
||||
|
||||
### PIE
|
||||
CFLAGS_PIE=""
|
||||
LDFLAGS_PIE=""
|
||||
|
||||
1
debian/webhttrack.files
vendored
1
debian/webhttrack.files
vendored
@@ -4,3 +4,4 @@ usr/share/man/man1/webhttrack.1
|
||||
usr/share/man/man1/htsserver.1
|
||||
usr/share/applications/WebHTTrack-Websites.desktop
|
||||
usr/share/applications/WebHTTrack.desktop
|
||||
usr/share/metainfo/com.httrack.WebHTTrack.metainfo.xml
|
||||
|
||||
@@ -12,6 +12,7 @@ WebIcon16x16dir = $(datadir)/icons/hicolor/16x16/apps
|
||||
WebIcon32x32dir = $(datadir)/icons/hicolor/32x32/apps
|
||||
WebIcon48x48dir = $(datadir)/icons/hicolor/48x48/apps
|
||||
VFolderEntrydir = $(prefix)/share/applications
|
||||
MetaInfodir = $(datadir)/metainfo
|
||||
|
||||
# Wildcards are globbed against $(srcdir): a bare "*.html" is resolved against
|
||||
# the build dir and stays unexpanded (breaking "make") in an out-of-tree build.
|
||||
@@ -33,11 +34,12 @@ WebIcon16x16_DATA = $(srcdir)/server/div/16x16/*.png
|
||||
WebIcon32x32_DATA = $(srcdir)/server/div/32x32/*.png
|
||||
WebIcon48x48_DATA = $(srcdir)/server/div/48x48/*.png
|
||||
VFolderEntry_DATA = $(srcdir)/server/div/*.desktop
|
||||
MetaInfo_DATA = $(srcdir)/server/div/*.metainfo.xml
|
||||
|
||||
EXTRA_DIST = $(HelpHtml_DATA) $(HelpHtmlimg_DATA) $(HelpHtmlimages_DATA) \
|
||||
$(HelpHtmldiv_DATA) $(WebHtml_DATA) $(WebHtmlimages_DATA) \
|
||||
$(WebPixmap_DATA) $(WebIcon16x16_DATA) $(WebIcon32x32_DATA) $(WebIcon48x48_DATA) \
|
||||
$(VFolderEntry_DATA) \
|
||||
$(VFolderEntry_DATA) $(MetaInfo_DATA) \
|
||||
httrack.css
|
||||
|
||||
install-data-hook:
|
||||
|
||||
@@ -152,14 +152,15 @@ am__uninstall_files_from_dir = { \
|
||||
am__installdirs = "$(DESTDIR)$(HelpHtmldir)" \
|
||||
"$(DESTDIR)$(HelpHtmlTxtdir)" "$(DESTDIR)$(HelpHtmldivdir)" \
|
||||
"$(DESTDIR)$(HelpHtmlimagesdir)" "$(DESTDIR)$(HelpHtmlimgdir)" \
|
||||
"$(DESTDIR)$(HelpHtmlrootdir)" "$(DESTDIR)$(VFolderEntrydir)" \
|
||||
"$(DESTDIR)$(WebHtmldir)" "$(DESTDIR)$(WebHtmlimagesdir)" \
|
||||
"$(DESTDIR)$(WebIcon16x16dir)" "$(DESTDIR)$(WebIcon32x32dir)" \
|
||||
"$(DESTDIR)$(WebIcon48x48dir)" "$(DESTDIR)$(WebPixmapdir)"
|
||||
"$(DESTDIR)$(HelpHtmlrootdir)" "$(DESTDIR)$(MetaInfodir)" \
|
||||
"$(DESTDIR)$(VFolderEntrydir)" "$(DESTDIR)$(WebHtmldir)" \
|
||||
"$(DESTDIR)$(WebHtmlimagesdir)" "$(DESTDIR)$(WebIcon16x16dir)" \
|
||||
"$(DESTDIR)$(WebIcon32x32dir)" "$(DESTDIR)$(WebIcon48x48dir)" \
|
||||
"$(DESTDIR)$(WebPixmapdir)"
|
||||
DATA = $(HelpHtml_DATA) $(HelpHtmlTxt_DATA) $(HelpHtmldiv_DATA) \
|
||||
$(HelpHtmlimages_DATA) $(HelpHtmlimg_DATA) \
|
||||
$(HelpHtmlroot_DATA) $(VFolderEntry_DATA) $(WebHtml_DATA) \
|
||||
$(WebHtmlimages_DATA) $(WebIcon16x16_DATA) \
|
||||
$(HelpHtmlroot_DATA) $(MetaInfo_DATA) $(VFolderEntry_DATA) \
|
||||
$(WebHtml_DATA) $(WebHtmlimages_DATA) $(WebIcon16x16_DATA) \
|
||||
$(WebIcon32x32_DATA) $(WebIcon48x48_DATA) $(WebPixmap_DATA)
|
||||
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
|
||||
am__DIST_COMMON = $(srcdir)/Makefile.in
|
||||
@@ -212,6 +213,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
@@ -320,6 +322,7 @@ WebIcon16x16dir = $(datadir)/icons/hicolor/16x16/apps
|
||||
WebIcon32x32dir = $(datadir)/icons/hicolor/32x32/apps
|
||||
WebIcon48x48dir = $(datadir)/icons/hicolor/48x48/apps
|
||||
VFolderEntrydir = $(prefix)/share/applications
|
||||
MetaInfodir = $(datadir)/metainfo
|
||||
|
||||
# Wildcards are globbed against $(srcdir): a bare "*.html" is resolved against
|
||||
# the build dir and stays unexpanded (breaking "make") in an out-of-tree build.
|
||||
@@ -341,10 +344,11 @@ WebIcon16x16_DATA = $(srcdir)/server/div/16x16/*.png
|
||||
WebIcon32x32_DATA = $(srcdir)/server/div/32x32/*.png
|
||||
WebIcon48x48_DATA = $(srcdir)/server/div/48x48/*.png
|
||||
VFolderEntry_DATA = $(srcdir)/server/div/*.desktop
|
||||
MetaInfo_DATA = $(srcdir)/server/div/*.metainfo.xml
|
||||
EXTRA_DIST = $(HelpHtml_DATA) $(HelpHtmlimg_DATA) $(HelpHtmlimages_DATA) \
|
||||
$(HelpHtmldiv_DATA) $(WebHtml_DATA) $(WebHtmlimages_DATA) \
|
||||
$(WebPixmap_DATA) $(WebIcon16x16_DATA) $(WebIcon32x32_DATA) $(WebIcon48x48_DATA) \
|
||||
$(VFolderEntry_DATA) \
|
||||
$(VFolderEntry_DATA) $(MetaInfo_DATA) \
|
||||
httrack.css
|
||||
|
||||
all: all-am
|
||||
@@ -511,6 +515,27 @@ uninstall-HelpHtmlrootDATA:
|
||||
@list='$(HelpHtmlroot_DATA)'; test -n "$(HelpHtmlrootdir)" || list=; \
|
||||
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||
dir='$(DESTDIR)$(HelpHtmlrootdir)'; $(am__uninstall_files_from_dir)
|
||||
install-MetaInfoDATA: $(MetaInfo_DATA)
|
||||
@$(NORMAL_INSTALL)
|
||||
@list='$(MetaInfo_DATA)'; test -n "$(MetaInfodir)" || list=; \
|
||||
if test -n "$$list"; then \
|
||||
echo " $(MKDIR_P) '$(DESTDIR)$(MetaInfodir)'"; \
|
||||
$(MKDIR_P) "$(DESTDIR)$(MetaInfodir)" || exit 1; \
|
||||
fi; \
|
||||
for p in $$list; do \
|
||||
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||
echo "$$d$$p"; \
|
||||
done | $(am__base_list) | \
|
||||
while read files; do \
|
||||
echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(MetaInfodir)'"; \
|
||||
$(INSTALL_DATA) $$files "$(DESTDIR)$(MetaInfodir)" || exit $$?; \
|
||||
done
|
||||
|
||||
uninstall-MetaInfoDATA:
|
||||
@$(NORMAL_UNINSTALL)
|
||||
@list='$(MetaInfo_DATA)'; test -n "$(MetaInfodir)" || list=; \
|
||||
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||
dir='$(DESTDIR)$(MetaInfodir)'; $(am__uninstall_files_from_dir)
|
||||
install-VFolderEntryDATA: $(VFolderEntry_DATA)
|
||||
@$(NORMAL_INSTALL)
|
||||
@list='$(VFolderEntry_DATA)'; test -n "$(VFolderEntrydir)" || list=; \
|
||||
@@ -701,7 +726,7 @@ check-am: all-am
|
||||
check: check-am
|
||||
all-am: Makefile $(DATA)
|
||||
installdirs:
|
||||
for dir in "$(DESTDIR)$(HelpHtmldir)" "$(DESTDIR)$(HelpHtmlTxtdir)" "$(DESTDIR)$(HelpHtmldivdir)" "$(DESTDIR)$(HelpHtmlimagesdir)" "$(DESTDIR)$(HelpHtmlimgdir)" "$(DESTDIR)$(HelpHtmlrootdir)" "$(DESTDIR)$(VFolderEntrydir)" "$(DESTDIR)$(WebHtmldir)" "$(DESTDIR)$(WebHtmlimagesdir)" "$(DESTDIR)$(WebIcon16x16dir)" "$(DESTDIR)$(WebIcon32x32dir)" "$(DESTDIR)$(WebIcon48x48dir)" "$(DESTDIR)$(WebPixmapdir)"; do \
|
||||
for dir in "$(DESTDIR)$(HelpHtmldir)" "$(DESTDIR)$(HelpHtmlTxtdir)" "$(DESTDIR)$(HelpHtmldivdir)" "$(DESTDIR)$(HelpHtmlimagesdir)" "$(DESTDIR)$(HelpHtmlimgdir)" "$(DESTDIR)$(HelpHtmlrootdir)" "$(DESTDIR)$(MetaInfodir)" "$(DESTDIR)$(VFolderEntrydir)" "$(DESTDIR)$(WebHtmldir)" "$(DESTDIR)$(WebHtmlimagesdir)" "$(DESTDIR)$(WebIcon16x16dir)" "$(DESTDIR)$(WebIcon32x32dir)" "$(DESTDIR)$(WebIcon48x48dir)" "$(DESTDIR)$(WebPixmapdir)"; do \
|
||||
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
||||
done
|
||||
install: install-am
|
||||
@@ -757,10 +782,10 @@ info-am:
|
||||
install-data-am: install-HelpHtmlDATA install-HelpHtmlTxtDATA \
|
||||
install-HelpHtmldivDATA install-HelpHtmlimagesDATA \
|
||||
install-HelpHtmlimgDATA install-HelpHtmlrootDATA \
|
||||
install-VFolderEntryDATA install-WebHtmlDATA \
|
||||
install-WebHtmlimagesDATA install-WebIcon16x16DATA \
|
||||
install-WebIcon32x32DATA install-WebIcon48x48DATA \
|
||||
install-WebPixmapDATA
|
||||
install-MetaInfoDATA install-VFolderEntryDATA \
|
||||
install-WebHtmlDATA install-WebHtmlimagesDATA \
|
||||
install-WebIcon16x16DATA install-WebIcon32x32DATA \
|
||||
install-WebIcon48x48DATA install-WebPixmapDATA
|
||||
@$(NORMAL_INSTALL)
|
||||
$(MAKE) $(AM_MAKEFLAGS) install-data-hook
|
||||
install-dvi: install-dvi-am
|
||||
@@ -808,10 +833,10 @@ ps-am:
|
||||
uninstall-am: uninstall-HelpHtmlDATA uninstall-HelpHtmlTxtDATA \
|
||||
uninstall-HelpHtmldivDATA uninstall-HelpHtmlimagesDATA \
|
||||
uninstall-HelpHtmlimgDATA uninstall-HelpHtmlrootDATA \
|
||||
uninstall-VFolderEntryDATA uninstall-WebHtmlDATA \
|
||||
uninstall-WebHtmlimagesDATA uninstall-WebIcon16x16DATA \
|
||||
uninstall-WebIcon32x32DATA uninstall-WebIcon48x48DATA \
|
||||
uninstall-WebPixmapDATA
|
||||
uninstall-MetaInfoDATA uninstall-VFolderEntryDATA \
|
||||
uninstall-WebHtmlDATA uninstall-WebHtmlimagesDATA \
|
||||
uninstall-WebIcon16x16DATA uninstall-WebIcon32x32DATA \
|
||||
uninstall-WebIcon48x48DATA uninstall-WebPixmapDATA
|
||||
|
||||
.MAKE: install-am install-data-am install-strip
|
||||
|
||||
@@ -821,20 +846,21 @@ uninstall-am: uninstall-HelpHtmlDATA uninstall-HelpHtmlTxtDATA \
|
||||
install install-HelpHtmlDATA install-HelpHtmlTxtDATA \
|
||||
install-HelpHtmldivDATA install-HelpHtmlimagesDATA \
|
||||
install-HelpHtmlimgDATA install-HelpHtmlrootDATA \
|
||||
install-VFolderEntryDATA install-WebHtmlDATA \
|
||||
install-WebHtmlimagesDATA install-WebIcon16x16DATA \
|
||||
install-WebIcon32x32DATA install-WebIcon48x48DATA \
|
||||
install-WebPixmapDATA install-am install-data install-data-am \
|
||||
install-data-hook install-dvi install-dvi-am install-exec \
|
||||
install-exec-am install-html install-html-am install-info \
|
||||
install-info-am install-man install-pdf install-pdf-am \
|
||||
install-ps install-ps-am install-strip installcheck \
|
||||
installcheck-am installdirs maintainer-clean \
|
||||
maintainer-clean-generic mostlyclean mostlyclean-generic \
|
||||
mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \
|
||||
uninstall-HelpHtmlDATA uninstall-HelpHtmlTxtDATA \
|
||||
uninstall-HelpHtmldivDATA uninstall-HelpHtmlimagesDATA \
|
||||
uninstall-HelpHtmlimgDATA uninstall-HelpHtmlrootDATA \
|
||||
install-MetaInfoDATA install-VFolderEntryDATA \
|
||||
install-WebHtmlDATA install-WebHtmlimagesDATA \
|
||||
install-WebIcon16x16DATA install-WebIcon32x32DATA \
|
||||
install-WebIcon48x48DATA install-WebPixmapDATA install-am \
|
||||
install-data install-data-am install-data-hook install-dvi \
|
||||
install-dvi-am install-exec install-exec-am install-html \
|
||||
install-html-am install-info install-info-am install-man \
|
||||
install-pdf install-pdf-am install-ps install-ps-am \
|
||||
install-strip installcheck installcheck-am installdirs \
|
||||
maintainer-clean maintainer-clean-generic mostlyclean \
|
||||
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
|
||||
tags-am uninstall uninstall-HelpHtmlDATA \
|
||||
uninstall-HelpHtmlTxtDATA uninstall-HelpHtmldivDATA \
|
||||
uninstall-HelpHtmlimagesDATA uninstall-HelpHtmlimgDATA \
|
||||
uninstall-HelpHtmlrootDATA uninstall-MetaInfoDATA \
|
||||
uninstall-VFolderEntryDATA uninstall-WebHtmlDATA \
|
||||
uninstall-WebHtmlimagesDATA uninstall-WebIcon16x16DATA \
|
||||
uninstall-WebIcon32x32DATA uninstall-WebIcon48x48DATA \
|
||||
|
||||
@@ -8,3 +8,6 @@ Comment=Browse Websites Mirrored by WebHTTrack
|
||||
Keywords=browse mirrored;
|
||||
Exec=webhttrack browse
|
||||
Icon=httrack
|
||||
# Helper launcher for WebHTTrack's browse mode, not a standalone app: keep it
|
||||
# out of software-center catalogs so it doesn't duplicate the main entry.
|
||||
X-AppStream-Ignore=true
|
||||
|
||||
55
html/server/div/com.httrack.WebHTTrack.metainfo.xml
Normal file
55
html/server/div/com.httrack.WebHTTrack.metainfo.xml
Normal file
@@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Copyright 2026 Xavier Roche <roche@httrack.com> -->
|
||||
<component type="desktop-application">
|
||||
<id>com.httrack.WebHTTrack</id>
|
||||
<metadata_license>FSFAP</metadata_license>
|
||||
<project_license>GPL-3.0-or-later</project_license>
|
||||
<name>WebHTTrack Website Copier</name>
|
||||
<summary>Copy websites to your computer for offline browsing</summary>
|
||||
<description>
|
||||
<p>
|
||||
WebHTTrack is the web interface to HTTrack, an offline browser utility.
|
||||
It downloads a website from the Internet to a local directory, fetching
|
||||
the HTML, images, and other files and rebuilding the site's link
|
||||
structure so you can browse it offline.
|
||||
</p>
|
||||
<p>
|
||||
A step-by-step web interface guides you through choosing the addresses
|
||||
to mirror and the options to apply. Mirrors can be updated in place and
|
||||
interrupted downloads resumed.
|
||||
</p>
|
||||
<p>Typical uses include:</p>
|
||||
<ul>
|
||||
<li>Keeping an offline copy of a website for reading without a connection</li>
|
||||
<li>Archiving or preserving sites and capturing them for later reference</li>
|
||||
<li>Updating an existing local mirror without downloading it again</li>
|
||||
</ul>
|
||||
</description>
|
||||
<launchable type="desktop-id">WebHTTrack.desktop</launchable>
|
||||
<icon type="stock">httrack</icon>
|
||||
<categories>
|
||||
<category>Network</category>
|
||||
</categories>
|
||||
<keywords>
|
||||
<keyword>offline browser</keyword>
|
||||
<keyword>website copier</keyword>
|
||||
<keyword>mirror</keyword>
|
||||
<keyword>crawl</keyword>
|
||||
<keyword>archiving</keyword>
|
||||
</keywords>
|
||||
<url type="homepage">https://www.httrack.com/</url>
|
||||
<url type="bugtracker">https://github.com/xroche/httrack/issues</url>
|
||||
<developer id="com.httrack">
|
||||
<name>Xavier Roche</name>
|
||||
</developer>
|
||||
<screenshots>
|
||||
<screenshot type="default">
|
||||
<caption>Choosing the addresses and options for a new mirror</caption>
|
||||
<image>https://www.httrack.com/html/images/screenshot_01b.jpg</image>
|
||||
</screenshot>
|
||||
</screenshots>
|
||||
<content_rating type="oars-1.1"/>
|
||||
<releases>
|
||||
<release version="3.49.8" date="2026-06-07"/>
|
||||
</releases>
|
||||
</component>
|
||||
@@ -202,6 +202,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
|
||||
@@ -20,11 +20,12 @@ AM_CPPFLAGS += -I$(top_srcdir)/src
|
||||
|
||||
# The callback examples reference libc only through libhttrack, so the direct
|
||||
# libc edge gets dropped from DT_NEEDED (library-not-linked-against-libc).
|
||||
# Force libc to be recorded as a dependency.
|
||||
# Force libc back; configure gates the flag since only a GNU-style linker
|
||||
# accepts it (LIBC_FORCE_LINK is empty on e.g. macOS).
|
||||
AM_LDFLAGS = \
|
||||
@DEFAULT_LDFLAGS@ \
|
||||
-L../src \
|
||||
-Wl,--push-state,--no-as-needed,-lc,--pop-state
|
||||
@LIBC_FORCE_LINK@
|
||||
|
||||
# Examples
|
||||
libbaselinks_la_SOURCES = callbacks-example-baselinks.c
|
||||
|
||||
@@ -344,6 +344,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
@@ -453,11 +454,12 @@ AM_CPPFLAGS = @DEFAULT_CFLAGS@ @THREADS_CFLAGS@ @V6_FLAG@ @LFS_FLAG@ \
|
||||
|
||||
# The callback examples reference libc only through libhttrack, so the direct
|
||||
# libc edge gets dropped from DT_NEEDED (library-not-linked-against-libc).
|
||||
# Force libc to be recorded as a dependency.
|
||||
# Force libc back; configure gates the flag since only a GNU-style linker
|
||||
# accepts it (LIBC_FORCE_LINK is empty on e.g. macOS).
|
||||
AM_LDFLAGS = \
|
||||
@DEFAULT_LDFLAGS@ \
|
||||
-L../src \
|
||||
-Wl,--push-state,--no-as-needed,-lc,--pop-state
|
||||
@LIBC_FORCE_LINK@
|
||||
|
||||
|
||||
# Examples
|
||||
|
||||
@@ -173,6 +173,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
|
||||
@@ -203,6 +203,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
|
||||
@@ -86,8 +86,9 @@ libhtsjava_la_SOURCES = htsjava.c htsjava.h
|
||||
libhtsjava_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) libhttrack.la
|
||||
# This thin JNI wrapper reaches libc only through libhttrack, so the direct
|
||||
# libc edge is dropped from DT_NEEDED (library-not-linked-against-libc). Force
|
||||
# libc to be recorded as a dependency.
|
||||
libhtsjava_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(VERSION_INFO) -Wl,--push-state,--no-as-needed,-lc,--pop-state
|
||||
# libc back as a dependency; configure gates the flag since only a GNU-style
|
||||
# linker accepts it (LIBC_FORCE_LINK is empty on e.g. macOS).
|
||||
libhtsjava_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(VERSION_INFO) $(LIBC_FORCE_LINK)
|
||||
|
||||
EXTRA_DIST = httrack.h webhttrack \
|
||||
coucal/murmurhash3.h.diff \
|
||||
@@ -113,5 +114,12 @@ EXTRA_DIST = httrack.h webhttrack \
|
||||
proxy/proxytrack.h \
|
||||
proxy/store.h \
|
||||
proxy/proxytrack.vcproj \
|
||||
coucal/* \
|
||||
*.dsw *.dsp *.vcproj
|
||||
coucal/LICENSE \
|
||||
coucal/Makefile \
|
||||
coucal/README.md \
|
||||
coucal/sample.c \
|
||||
coucal/tests.c \
|
||||
htsjava.vcproj \
|
||||
httrack.dsp httrack.dsw httrack.vcproj \
|
||||
libhttrack.dsp libhttrack.dsw libhttrack.vcproj \
|
||||
webhttrack.dsp webhttrack.dsw webhttrack.vcproj
|
||||
|
||||
@@ -361,6 +361,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
@@ -537,8 +538,9 @@ libhtsjava_la_SOURCES = htsjava.c htsjava.h
|
||||
libhtsjava_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) libhttrack.la
|
||||
# This thin JNI wrapper reaches libc only through libhttrack, so the direct
|
||||
# libc edge is dropped from DT_NEEDED (library-not-linked-against-libc). Force
|
||||
# libc to be recorded as a dependency.
|
||||
libhtsjava_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(VERSION_INFO) -Wl,--push-state,--no-as-needed,-lc,--pop-state
|
||||
# libc back as a dependency; configure gates the flag since only a GNU-style
|
||||
# linker accepts it (LIBC_FORCE_LINK is empty on e.g. macOS).
|
||||
libhtsjava_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(VERSION_INFO) $(LIBC_FORCE_LINK)
|
||||
EXTRA_DIST = httrack.h webhttrack \
|
||||
coucal/murmurhash3.h.diff \
|
||||
coucal/murmurhash3.h.orig \
|
||||
@@ -563,8 +565,15 @@ EXTRA_DIST = httrack.h webhttrack \
|
||||
proxy/proxytrack.h \
|
||||
proxy/store.h \
|
||||
proxy/proxytrack.vcproj \
|
||||
coucal/* \
|
||||
*.dsw *.dsp *.vcproj
|
||||
coucal/LICENSE \
|
||||
coucal/Makefile \
|
||||
coucal/README.md \
|
||||
coucal/sample.c \
|
||||
coucal/tests.c \
|
||||
htsjava.vcproj \
|
||||
httrack.dsp httrack.dsw httrack.vcproj \
|
||||
libhttrack.dsp libhttrack.dsw libhttrack.vcproj \
|
||||
webhttrack.dsp webhttrack.dsw webhttrack.vcproj
|
||||
|
||||
all: all-am
|
||||
|
||||
|
||||
Submodule src/coucal updated: 73ada07555...fadf29bd2a
@@ -41,19 +41,24 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
|
||||
|
||||
// COPY OF cmdl_ins in htsmain.c
|
||||
// Insert a command in the argc/argv
|
||||
#define cmdl_ins(token,argc,argv,buff,ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for(i=argc;i>0;i--)\
|
||||
argv[i]=argv[i-1];\
|
||||
} \
|
||||
argv[0]=(buff+ptr); \
|
||||
strcpybuff(argv[0],token); \
|
||||
ptr += (int) (strlen(argv[0])+1); \
|
||||
// COPY OF cmdl_ins in htscoremain.c
|
||||
/* Bytes left in x_argvblk from offset ptr. The offset can in principle outrun
|
||||
the block (alias/doit.log expansion), so the copy aborts cleanly instead of
|
||||
the subtraction wrapping to a huge unbounded size. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
// Insert a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 1); \
|
||||
argc++
|
||||
// END OF COPY OF cmdl_ins in htsmain.c
|
||||
// END OF COPY OF cmdl_ins in htscoremain.c
|
||||
|
||||
/*
|
||||
Aliases for command-line and config file definitions
|
||||
@@ -468,7 +473,7 @@ const char *optalias_help(const char *token) {
|
||||
*/
|
||||
/* Note: NOT utf-8 */
|
||||
int optinclude_file(const char *name, int *argc, char **argv, char *x_argvblk,
|
||||
int *x_ptr) {
|
||||
size_t x_argvblk_size, int *x_ptr) {
|
||||
FILE *fp;
|
||||
|
||||
fp = fopen(name, "rb");
|
||||
@@ -542,14 +547,15 @@ int optinclude_file(const char *name, int *argc, char **argv, char *x_argvblk,
|
||||
/* temporary argc: Number of parameters after minus insert_after_argc */
|
||||
insert_after_argc = (*argc) - insert_after;
|
||||
cmdl_ins((tmp_argv[2]), insert_after_argc, (argv + insert_after),
|
||||
x_argvblk, (*x_ptr));
|
||||
x_argvblk, x_argvblk_size, (*x_ptr));
|
||||
*argc = insert_after_argc + insert_after;
|
||||
insert_after++;
|
||||
/* Second one */
|
||||
if (return_argc > 1) {
|
||||
insert_after_argc = (*argc) - insert_after;
|
||||
cmdl_ins((tmp_argv[3]), insert_after_argc,
|
||||
(argv + insert_after), x_argvblk, (*x_ptr));
|
||||
(argv + insert_after), x_argvblk, x_argvblk_size,
|
||||
(*x_ptr));
|
||||
*argc = insert_after_argc + insert_after;
|
||||
insert_after++;
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ int optalias_find(const char *token);
|
||||
const char *optalias_help(const char *token);
|
||||
int optreal_find(const char *token);
|
||||
int optinclude_file(const char *name, int *argc, char **argv, char *x_argvblk,
|
||||
int *x_ptr);
|
||||
size_t x_argvblk_size, int *x_ptr);
|
||||
const char *optreal_value(int p);
|
||||
const char *optalias_value(int p);
|
||||
const char *opttype_value(int p);
|
||||
|
||||
@@ -3584,8 +3584,9 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||
back[i].r.is_file = 1;
|
||||
back[i].r.totalsize = back[i].r.size =
|
||||
fsize_utf8(back[i].url_sav);
|
||||
get_httptype(opt, back[i].r.contenttype,
|
||||
back[i].url_sav, 1);
|
||||
get_httptype_sized(opt, back[i].r.contenttype,
|
||||
sizeof(back[i].r.contenttype),
|
||||
back[i].url_sav, 1);
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"Not-modified status without cache guessed: %s%s",
|
||||
back[i].url_adr, back[i].url_fil);
|
||||
|
||||
@@ -939,7 +939,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
FILE *const fp = FOPEN(fconv(catbuff, sizeof(catbuff), previous_save), "rb");
|
||||
|
||||
if (fp != NULL) {
|
||||
r.adr = (char *) malloct((int) r.size + 4);
|
||||
r.adr = (char *) malloct((int) r.size + 1);
|
||||
if (r.adr != NULL) {
|
||||
if (r.size > 0
|
||||
&& fread(r.adr, 1, (int) r.size, fp) != r.size) {
|
||||
@@ -948,7 +948,8 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
r.statuscode = STATUSCODE_INVALID;
|
||||
sprintf(r.msg, "Read error in cache disk data: %s",
|
||||
strerror(last_errno));
|
||||
}
|
||||
} else if (r.size >= 0)
|
||||
*(r.adr + r.size) = '\0';
|
||||
} else {
|
||||
r.statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(r.msg,
|
||||
@@ -965,7 +966,7 @@ static htsblk cache_readex_new(httrackp * opt, cache_back * cache,
|
||||
// Data in cache.
|
||||
else {
|
||||
// lire fichier (d'un coup)
|
||||
r.adr = (char *) malloct((int) r.size + 4);
|
||||
r.adr = (char *) malloct((int) r.size + 1);
|
||||
if (r.adr != NULL) {
|
||||
if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (int) r.size) != r.size) { // erreur
|
||||
freet(r.adr);
|
||||
@@ -1245,13 +1246,14 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
|
||||
FILE *fp = FOPEN(fconv(catbuff, sizeof(catbuff), return_save), "rb");
|
||||
|
||||
if (fp != NULL) {
|
||||
r.adr = (char *) malloct((size_t) r.size + 4);
|
||||
r.adr = (char *) malloct((size_t) r.size + 1);
|
||||
if (r.adr != NULL) {
|
||||
if (r.size > 0
|
||||
&& fread(r.adr, 1, (size_t) r.size, fp) != r.size) {
|
||||
r.statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(r.msg, "Read error in cache disk data");
|
||||
}
|
||||
} else if (r.size >= 0)
|
||||
*(r.adr + r.size) = '\0';
|
||||
} else {
|
||||
r.statuscode = STATUSCODE_INVALID;
|
||||
strcpybuff(r.msg,
|
||||
@@ -1266,7 +1268,7 @@ static htsblk cache_readex_old(httrackp * opt, cache_back * cache,
|
||||
}
|
||||
} else {
|
||||
// lire fichier (d'un coup)
|
||||
r.adr = (char *) malloct((size_t) r.size + 4);
|
||||
r.adr = (char *) malloct((size_t) r.size + 1);
|
||||
if (r.adr != NULL) {
|
||||
if (fread(r.adr, 1, (size_t) r.size, cache->olddat) != r.size) { // erreur
|
||||
freet(r.adr);
|
||||
@@ -1369,10 +1371,11 @@ int cache_readdata(cache_back * cache, const char *str1, const char *str2,
|
||||
|
||||
cache_rint(cache->olddat, &len);
|
||||
if (len > 0) {
|
||||
char *mem_buff = (char *) malloct(len + 4); /* Plus byte 0 */
|
||||
char *mem_buff = (char *) malloct(len + 1); /* trailing \0 */
|
||||
|
||||
if (mem_buff) {
|
||||
if (fread(mem_buff, 1, len, cache->olddat) == len) { // lire tout (y compris statuscode etc)*/
|
||||
mem_buff[len] = '\0';
|
||||
*inbuff = mem_buff;
|
||||
*inlen = len;
|
||||
return 1;
|
||||
|
||||
@@ -182,6 +182,16 @@ static int check_entry(httrackp *opt, cache_back *cache, const char *adr,
|
||||
fail++;
|
||||
}
|
||||
|
||||
/* The loaded body must be NUL-terminated at [size]: cache_readex's strlen()
|
||||
consumers (htscore.c:1046, htscache.c) rely on it, and a missing
|
||||
terminator is a heap over-read. The buffer is malloc(size + slack), so
|
||||
reading [size] is in bounds. */
|
||||
if (r.adr != NULL && r.adr[r.size] != '\0') {
|
||||
fprintf(stderr, "cache-selftest: %s%s: body not NUL-terminated at [size]\n",
|
||||
adr, fil);
|
||||
fail++;
|
||||
}
|
||||
|
||||
#undef CHECK_STR
|
||||
|
||||
if (r.adr != NULL) {
|
||||
@@ -208,6 +218,107 @@ static void gen_body(char *buf, size_t len, int kind) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Exercise the disk-fallback read path: a record stored with X-In-Cache: 0
|
||||
keeps its body on disk (not in the ZIP), and cache_readex must load it from
|
||||
there. The one-shot crawl tests never re-read such a body into memory, so
|
||||
this path otherwise has no runtime coverage. We store the header with
|
||||
all_in_cache=0 and a non-hypertext content-type (-> X-In-Cache: 0), create
|
||||
the body at the exact fconv()-resolved path the reader uses, then read it
|
||||
back and assert it round-trips and is NUL-terminated. */
|
||||
static int disk_fallback_selftest(httrackp *opt) {
|
||||
int fail = 0;
|
||||
cache_back cache;
|
||||
htsblk r;
|
||||
char catbuff[HTS_URLMAXSIZE * 2];
|
||||
char *path;
|
||||
char *locbuf;
|
||||
FILE *fp;
|
||||
const char *const adr = "example.com";
|
||||
const char *const fil = "/blob.bin";
|
||||
char save[HTS_URLMAXSIZE * 2];
|
||||
/* no embedded NUL: were the read to leave this un-terminated, a later
|
||||
strlen() would run off the end (the bug this guards) */
|
||||
static const char body[] = "BINARY-on-disk-body-0123456789-no-trailing-nul";
|
||||
const size_t body_len = sizeof(body) - 1;
|
||||
|
||||
/* X-Save must start with path_html_utf8 so the reader resolves it verbatim
|
||||
(otherwise it re-roots it as a pre-3.40 relative path); then the body we
|
||||
create at fconv(save) is exactly where cache_readex looks for it. */
|
||||
fconcat(save, sizeof(save), StringBuff(opt->path_html_utf8),
|
||||
"example.com/blob.bin");
|
||||
|
||||
/* write only the header (X-In-Cache: 0); the body stays on disk */
|
||||
selftest_open_for_write(&cache, opt);
|
||||
{
|
||||
htsblk w;
|
||||
char locw[4];
|
||||
char *bodycopy = malloct(body_len);
|
||||
|
||||
hts_init_htsblk(&w);
|
||||
w.statuscode = 200;
|
||||
w.size = (LLint) body_len;
|
||||
strcpybuff(w.msg, "OK");
|
||||
strcpybuff(w.contenttype, "application/octet-stream");
|
||||
locw[0] = '\0';
|
||||
w.location = locw;
|
||||
w.is_write = 0;
|
||||
memcpy(bodycopy, body, body_len);
|
||||
w.adr = bodycopy;
|
||||
cache_add(opt, &cache, &w, adr, fil, save, 0 /* all_in_cache */, NULL);
|
||||
freet(bodycopy);
|
||||
}
|
||||
selftest_close(&cache);
|
||||
|
||||
/* create the on-disk body where the reader will look for it */
|
||||
path = fconv(catbuff, sizeof(catbuff), save);
|
||||
(void) structcheck(path);
|
||||
fp = FOPEN(path, "wb");
|
||||
if (fp == NULL) {
|
||||
fprintf(stderr, "cache-selftest: disk-fallback: cannot create '%s'\n",
|
||||
path);
|
||||
return 1;
|
||||
}
|
||||
if (fwrite(body, 1, body_len, fp) != body_len) {
|
||||
fprintf(stderr, "cache-selftest: disk-fallback: short write to '%s'\n",
|
||||
path);
|
||||
fail++;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
/* read it back: takes the X-In-Cache: 0 disk-fallback branch */
|
||||
selftest_open_for_read(&cache, opt);
|
||||
locbuf = malloct(HTS_URLMAXSIZE * 2);
|
||||
locbuf[0] = '\0';
|
||||
r = cache_readex(opt, &cache, adr, fil, "", locbuf, NULL, 1);
|
||||
if (r.statuscode != 200) {
|
||||
fprintf(stderr,
|
||||
"cache-selftest: disk-fallback: statuscode %d, expected 200"
|
||||
" (path not taken or read failed)\n",
|
||||
r.statuscode);
|
||||
fail++;
|
||||
}
|
||||
if (r.size != (LLint) body_len) {
|
||||
fprintf(stderr,
|
||||
"cache-selftest: disk-fallback: size " LLintP ", expected %d\n",
|
||||
(LLint) r.size, (int) body_len);
|
||||
fail++;
|
||||
} else if (r.adr == NULL || memcmp(r.adr, body, body_len) != 0) {
|
||||
fprintf(stderr, "cache-selftest: disk-fallback: body mismatch\n");
|
||||
fail++;
|
||||
}
|
||||
/* the loaded body must be NUL-terminated at [size] */
|
||||
if (r.adr != NULL && r.adr[r.size] != '\0') {
|
||||
fprintf(stderr, "cache-selftest: disk-fallback: body not NUL-terminated\n");
|
||||
fail++;
|
||||
}
|
||||
if (r.adr != NULL) {
|
||||
freet(r.adr);
|
||||
}
|
||||
freet(locbuf);
|
||||
selftest_close(&cache);
|
||||
return fail;
|
||||
}
|
||||
|
||||
int cache_selftests(httrackp *opt, const char *dir) {
|
||||
int failures = 0;
|
||||
cache_back cache;
|
||||
@@ -257,6 +368,10 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
strcatbuff(base, "/");
|
||||
}
|
||||
StringCopy(opt->path_log, base);
|
||||
/* the disk-fallback pass resolves on-disk body paths through fconv(), which
|
||||
is rooted at path_html; keep it inside the test directory too */
|
||||
StringCopy(opt->path_html, base);
|
||||
StringCopy(opt->path_html_utf8, base);
|
||||
}
|
||||
opt->cache = 1;
|
||||
|
||||
@@ -366,6 +481,9 @@ int cache_selftests(httrackp *opt, const char *dir) {
|
||||
"", body_updated, strlen(body_updated));
|
||||
selftest_close(&cache);
|
||||
|
||||
/* pass 5: the disk-fallback read path (X-In-Cache: 0, body on disk) */
|
||||
failures += disk_fallback_selftest(opt);
|
||||
|
||||
for (i = 0; i < large_count; i++) {
|
||||
freet(large_body[i]);
|
||||
}
|
||||
|
||||
@@ -633,13 +633,12 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
// c'est plus propre et plus logique que d'entrer à la main les liens dans la pile
|
||||
// on bénéficie ainsi des vérifications et des tests du robot pour les liens "primaires"
|
||||
primary = (char *) malloct(primary_len);
|
||||
if (primary) {
|
||||
primary[0] = '\0';
|
||||
} else {
|
||||
if (!primary) {
|
||||
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
|
||||
XH_extuninit;
|
||||
return 0;
|
||||
}
|
||||
htsbuff primarybuff = htsbuff_ptr(primary, primary_len);
|
||||
|
||||
while(*a) {
|
||||
int i;
|
||||
@@ -687,11 +686,11 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
strcatbuff(tempo, "*"); // ajouter un *
|
||||
}
|
||||
}
|
||||
if (type)
|
||||
strcpybuff(filters[filptr], "+");
|
||||
else
|
||||
strcpybuff(filters[filptr], "-");
|
||||
strcatbuff(filters[filptr], tempo);
|
||||
{
|
||||
htsbuff fb = htsbuff_ptr(filters[filptr], HTS_URLMAXSIZE * 2);
|
||||
htsbuff_cpy(&fb, type ? "+" : "-");
|
||||
htsbuff_cat(&fb, tempo);
|
||||
}
|
||||
filptr++;
|
||||
|
||||
/* sanity check */
|
||||
@@ -726,12 +725,10 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
}
|
||||
url[i++] = '\0';
|
||||
|
||||
//strcatbuff(primary,"<PRIMARY=\"");
|
||||
if (strstr(url, ":/") == NULL)
|
||||
strcatbuff(primary, "http://");
|
||||
strcatbuff(primary, url);
|
||||
//strcatbuff(primary,"\">");
|
||||
strcatbuff(primary, "\n");
|
||||
htsbuff_cat(&primarybuff, "http://");
|
||||
htsbuff_cat(&primarybuff, url);
|
||||
htsbuff_cat(&primarybuff, "\n");
|
||||
}
|
||||
} // while
|
||||
|
||||
@@ -762,7 +759,6 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
int filelist_ptr = 0;
|
||||
int n = 0;
|
||||
char BIGSTK line[HTS_URLMAXSIZE * 2];
|
||||
char *primary_ptr = primary + strlen(primary);
|
||||
|
||||
while(filelist_ptr < filelist_sz) {
|
||||
int count =
|
||||
@@ -771,13 +767,10 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
if (count && line[0]) {
|
||||
n++;
|
||||
if (strstr(line, ":/") == NULL) {
|
||||
strcpybuff(primary_ptr, "http://");
|
||||
primary_ptr += strlen(primary_ptr);
|
||||
htsbuff_cat(&primarybuff, "http://");
|
||||
}
|
||||
strcpybuff(primary_ptr, line);
|
||||
primary_ptr += strlen(primary_ptr);
|
||||
strcpybuff(primary_ptr, "\n");
|
||||
primary_ptr += 1;
|
||||
htsbuff_cat(&primarybuff, line);
|
||||
htsbuff_cat(&primarybuff, "\n");
|
||||
}
|
||||
}
|
||||
// fclose(fp);
|
||||
@@ -1741,7 +1734,7 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
{
|
||||
char buff[256];
|
||||
|
||||
guess_httptype(opt, buff, urlfil());
|
||||
guess_httptype_sized(opt, buff, sizeof(buff), urlfil());
|
||||
if (strcmp(buff, "image/gif") == 0)
|
||||
create_gif_warning = 1;
|
||||
}
|
||||
@@ -2193,16 +2186,19 @@ int httpmirror(char *url1, httrackp * opt) {
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log),
|
||||
"hts-cache/new.lst"), "rb");
|
||||
if (new_lst != NULL && sz != (size_t) -1) {
|
||||
char *adr = (char *) malloct(sz);
|
||||
/* +1 for the NUL below: new.lst is read raw, and the strstr()
|
||||
that follows needs a terminated C string. */
|
||||
char *adr = (char *) malloct(sz + 1);
|
||||
|
||||
if (adr) {
|
||||
if (fread(adr, 1, sz, new_lst) == sz) {
|
||||
adr[sz] = '\0';
|
||||
char line[1100];
|
||||
int purge = 0;
|
||||
|
||||
while(!feof(old_lst)) {
|
||||
linput(old_lst, line, 1000);
|
||||
if (!strstr(adr, line)) { // fichier non trouvé dans le nouveau?
|
||||
if (!strstr(adr, line)) { // not found in the new list?
|
||||
char BIGSTK file[HTS_URLMAXSIZE * 2];
|
||||
|
||||
strcpybuff(file, StringBuff(opt->path_html));
|
||||
@@ -2450,9 +2446,10 @@ void host_ban(httrackp * opt, int ptr,
|
||||
// interdire host
|
||||
assertf((*_FILTERS_PTR) < opt->maxfilter);
|
||||
if (*_FILTERS_PTR < opt->maxfilter) {
|
||||
strcpybuff(_FILTERS[*_FILTERS_PTR], "-");
|
||||
strcatbuff(_FILTERS[*_FILTERS_PTR], host);
|
||||
strcatbuff(_FILTERS[*_FILTERS_PTR], "/*"); // host/ * interdit
|
||||
htsbuff fb = htsbuff_ptr(_FILTERS[*_FILTERS_PTR], HTS_URLMAXSIZE * 2);
|
||||
htsbuff_cpy(&fb, "-");
|
||||
htsbuff_cat(&fb, host);
|
||||
htsbuff_cat(&fb, "/*"); // forbid host/*
|
||||
(*_FILTERS_PTR)++;
|
||||
}
|
||||
// oups
|
||||
@@ -3153,7 +3150,7 @@ static void postprocess_file(httrackp * opt, const char *save, const char *adr,
|
||||
/* CID */
|
||||
make_content_id(adr, fil, cid, sizeof(cid));
|
||||
|
||||
guess_httptype(opt, mimebuff, save);
|
||||
guess_httptype_sized(opt, mimebuff, sizeof(mimebuff), save);
|
||||
fprintf(opt->state.mimefp, "--%s\r\n",
|
||||
StringBuff(opt->state.mimemid));
|
||||
/*if (first)
|
||||
@@ -3515,7 +3512,7 @@ char *next_token(char *p, int flag) {
|
||||
p--;
|
||||
do {
|
||||
p++;
|
||||
if (flag && (*p == '\\')) { // sauter \x ou \"
|
||||
if (flag && (*p == '\\')) { // skip \x or \"
|
||||
if (quote) {
|
||||
char c = '\0';
|
||||
|
||||
@@ -3524,20 +3521,14 @@ char *next_token(char *p, int flag) {
|
||||
else if (*(p + 1) == '"')
|
||||
c = '"';
|
||||
if (c) {
|
||||
char BIGSTK tempo[8192];
|
||||
|
||||
tempo[0] = c;
|
||||
tempo[1] = '\0';
|
||||
strcatbuff(tempo, p + 2);
|
||||
strcpybuff(p, tempo);
|
||||
/* unescape the 2 chars to one, shifting left in place */
|
||||
*p = c;
|
||||
memmove(p + 1, p + 2, strlen(p + 2) + 1);
|
||||
}
|
||||
}
|
||||
} else if (*p == 34) { // guillemets (de fin)
|
||||
char BIGSTK tempo[8192];
|
||||
|
||||
tempo[0] = '\0';
|
||||
strcatbuff(tempo, p + 1);
|
||||
strcpybuff(p, tempo); /* wipe "" */
|
||||
} else if (*p == 34) { // closing quote
|
||||
/* drop the quote, shifting the rest left in place */
|
||||
memmove(p, p + 1, strlen(p + 1) + 1);
|
||||
p--;
|
||||
/* */
|
||||
quote = !quote;
|
||||
@@ -3871,13 +3862,14 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
|
||||
opt->savename_83 = b;
|
||||
if (r != -1 && !forbidden_url) {
|
||||
if (savename()) {
|
||||
if (lienrelatif(tempo, afs.save, savename()) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), afs.save, savename()) ==
|
||||
0) {
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"(module): relative link at %s build with %s and %s: %s",
|
||||
afs.af.adr, afs.save, savename(), tempo);
|
||||
if (str->localLink
|
||||
&& str->localLinkSize > (int) strlen(tempo) + 1) {
|
||||
strcpybuff(str->localLink, tempo);
|
||||
strlcpybuff(str->localLink, tempo, str->localLinkSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3889,11 +3881,11 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
|
||||
lien);
|
||||
if (str->localLink
|
||||
&& str->localLinkSize > (int) (strlen(afs.af.adr) + strlen(afs.af.fil) + 8)) {
|
||||
str->localLink[0] = '\0';
|
||||
htsbuff lb = htsbuff_ptr(str->localLink, str->localLinkSize);
|
||||
if (!link_has_authority(afs.af.adr))
|
||||
strcpybuff(str->localLink, "http://");
|
||||
strcatbuff(str->localLink, afs.af.adr);
|
||||
strcatbuff(str->localLink, afs.af.fil);
|
||||
htsbuff_cat(&lb, "http://");
|
||||
htsbuff_cat(&lb, afs.af.adr);
|
||||
htsbuff_cat(&lb, afs.af.fil);
|
||||
}
|
||||
r = -1;
|
||||
}
|
||||
|
||||
@@ -69,23 +69,29 @@ Please visit our Website: http://www.httrack.com
|
||||
/* Resolver */
|
||||
extern int IPV6_resolver;
|
||||
|
||||
// Add a command in the argc/argv
|
||||
#define cmdl_add(token,argc,argv,buff,ptr) \
|
||||
argv[argc]=(buff+ptr); \
|
||||
strcpybuff(argv[argc],token); \
|
||||
ptr += (int) (strlen(argv[argc])+2); \
|
||||
/* Remaining room in the argv block; 0 once it is exhausted (alias expansion or
|
||||
doit.log insertion can outrun the +32768 slack), so the copy aborts cleanly
|
||||
instead of the subtraction wrapping to a huge unbounded size. */
|
||||
#define cmdl_room(bufsize, ptr) \
|
||||
((ptr) < (size_t) (bufsize) ? (size_t) (bufsize) - (ptr) : 0)
|
||||
|
||||
// Add a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_add(token, argc, argv, buff, bufsize, ptr) \
|
||||
argv[argc] = (buff + ptr); \
|
||||
strlcpybuff(argv[argc], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[argc]) + 2); \
|
||||
argc++
|
||||
|
||||
// Insert a command in the argc/argv
|
||||
#define cmdl_ins(token,argc,argv,buff,ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for(i=argc;i>0;i--)\
|
||||
argv[i]=argv[i-1];\
|
||||
} \
|
||||
argv[0]=(buff+ptr); \
|
||||
strcpybuff(argv[0],token); \
|
||||
ptr += (int) (strlen(argv[0])+2); \
|
||||
// Insert a command in the argc/argv (buff has total capacity bufsize)
|
||||
#define cmdl_ins(token, argc, argv, buff, bufsize, ptr) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = argc; i > 0; i--) \
|
||||
argv[i] = argv[i - 1]; \
|
||||
} \
|
||||
argv[0] = (buff + ptr); \
|
||||
strlcpybuff(argv[0], token, cmdl_room(bufsize, ptr)); \
|
||||
ptr += (int) (strlen(argv[0]) + 2); \
|
||||
argc++
|
||||
|
||||
#define htsmain_free() do { \
|
||||
@@ -236,6 +242,245 @@ static void basic_selftests(void) {
|
||||
}
|
||||
freet(slots);
|
||||
}
|
||||
// next_token(): in-place token scanner. Strips surrounding quotes, unescapes
|
||||
// \" and \\ when flag is set, and returns the token terminator (the space, or
|
||||
// NULL at end of string). The unquote/unescape rewrites the string in place
|
||||
// by shifting left, so the result is always shorter -- regression for that
|
||||
// compaction.
|
||||
{
|
||||
char tok[64];
|
||||
|
||||
// plain token: unchanged, returns a pointer AT the separating space (exact
|
||||
// position, not just any space -- a strchr-style impl would land elsewhere
|
||||
// once quotes shift the content)
|
||||
strcpybuff(tok, "abc def");
|
||||
{
|
||||
char *const end = next_token(tok, 0);
|
||||
assertf(end == tok + 3 && *end == ' ' && strcmp(tok, "abc def") == 0);
|
||||
}
|
||||
// surrounding quotes stripped, returns the (post-shift) trailing space
|
||||
strcpybuff(tok, "\"ab\" cd");
|
||||
{
|
||||
char *const end = next_token(tok, 1);
|
||||
assertf(end == tok + 2 && *end == ' ' && strcmp(tok, "ab cd") == 0);
|
||||
}
|
||||
// a space inside quotes does not end the token; end of string returns NULL
|
||||
strcpybuff(tok, "\"a b\"c");
|
||||
{
|
||||
char *const end = next_token(tok, 1);
|
||||
assertf(end == NULL && strcmp(tok, "a bc") == 0);
|
||||
}
|
||||
// \" and \\ are unescaped to literal " and \ in place
|
||||
strcpybuff(tok, "\"a\\\"b\\\\c\"");
|
||||
{
|
||||
char *const end = next_token(tok, 1);
|
||||
assertf(end == NULL && strcmp(tok, "a\"b\\c") == 0);
|
||||
}
|
||||
// unterminated quote: the opening quote is dropped, the rest survives, and
|
||||
// the scan runs to the NUL (returns NULL)
|
||||
strcpybuff(tok, "\"ab");
|
||||
{
|
||||
char *const end = next_token(tok, 1);
|
||||
assertf(end == NULL && strcmp(tok, "ab") == 0);
|
||||
}
|
||||
// trailing lone backslash in a quote: *(p+1) is the NUL, not an escape, so
|
||||
// the backslash is kept intact (and there is no over-read past the NUL)
|
||||
strcpybuff(tok, "\"a\\");
|
||||
{
|
||||
char *const end = next_token(tok, 1);
|
||||
assertf(end == NULL && strcmp(tok, "a\\") == 0);
|
||||
}
|
||||
}
|
||||
// fil_normalized(): canonicalizes a URL path. Query arguments are sorted
|
||||
// alphabetically (by the text after each '?'/'&') and the query is rebuilt
|
||||
// through a bounded builder; outside the query, "//" collapses to "/".
|
||||
// Regression for that builder.
|
||||
{
|
||||
char norm[256];
|
||||
|
||||
assertf(strcmp(fil_normalized("/p?b=2&a=1&c=3", norm), "/p?a=1&b=2&c=3") ==
|
||||
0);
|
||||
assertf(strcmp(fil_normalized("/a//b", norm), "/a/b") == 0);
|
||||
// "//" is collapsed only before the query; inside the query it is kept
|
||||
assertf(strcmp(fil_normalized("/a//b?x=c//d", norm), "/a/b?x=c//d") == 0);
|
||||
}
|
||||
// give_mimext(): mime type -> file extension, bounded into the caller buffer.
|
||||
// Returns 1 when an extension was written, 0 otherwise.
|
||||
{
|
||||
char ext[16];
|
||||
|
||||
assertf(give_mimext(ext, sizeof(ext), "image/gif") == 1);
|
||||
assertf(strcmp(ext, "gif") == 0);
|
||||
assertf(give_mimext(ext, sizeof(ext), "text/html") == 1);
|
||||
assertf(strcmp(ext, "html") == 0);
|
||||
assertf(give_mimext(ext, sizeof(ext), "no/such-mime-type") == 0);
|
||||
assertf(ext[0] == '\0');
|
||||
}
|
||||
// convtolower(): lower-cases into the caller buffer (bounded by its size).
|
||||
{
|
||||
char low[64];
|
||||
|
||||
assertf(strcmp(convtolower(low, sizeof(low), "ABC/Def.HTML"),
|
||||
"abc/def.html") == 0);
|
||||
}
|
||||
// cut_path(): splits a path into directory (with trailing '/') and basename,
|
||||
// each bounded by its buffer size.
|
||||
{
|
||||
char path[256];
|
||||
char pname[256];
|
||||
|
||||
{
|
||||
char full[] = "/dir/sub/file.html";
|
||||
|
||||
cut_path(full, path, sizeof(path), pname, sizeof(pname));
|
||||
assertf(strcmp(path, "/dir/sub/") == 0);
|
||||
assertf(strcmp(pname, "file.html") == 0);
|
||||
}
|
||||
{ // a trailing slash is trimmed before the split
|
||||
char full[] = "/dir/sub/";
|
||||
|
||||
cut_path(full, path, sizeof(path), pname, sizeof(pname));
|
||||
assertf(strcmp(path, "/dir/") == 0);
|
||||
assertf(strcmp(pname, "sub") == 0);
|
||||
}
|
||||
{ // a path of length <= 1 yields empty results
|
||||
char full[] = "/";
|
||||
|
||||
cut_path(full, path, sizeof(path), pname, sizeof(pname));
|
||||
assertf(path[0] == '\0' && pname[0] == '\0');
|
||||
}
|
||||
}
|
||||
// get_httptype_sized(): a long MIME type (Office OOXML reaches 73 chars) is
|
||||
// written whole into a contenttype-sized buffer; returns 1 on a match, 0 when
|
||||
// flag==0 and nothing matched. Regression for the old contenttype[64]
|
||||
// overflow.
|
||||
{
|
||||
httrackp *opt = hts_create_opt();
|
||||
htsblk r; // write into the real struct field, not a stand-in
|
||||
|
||||
assertf(opt != NULL);
|
||||
// a long MIME (Office OOXML reaches 73 chars) must fit htsblk.contenttype
|
||||
// whole: a [64] field would make this bounded copy abort.
|
||||
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
|
||||
"deck.pptx", 0) == 1);
|
||||
assertf(strcmp(r.contenttype,
|
||||
"application/vnd.openxmlformats-officedocument."
|
||||
"presentationml.presentation") == 0);
|
||||
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
|
||||
"x.gif", 0) == 1);
|
||||
assertf(strcmp(r.contenttype, "image/gif") == 0);
|
||||
// no extension and flag==0: nothing written, returns 0
|
||||
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
|
||||
"noextfile", 0) == 0);
|
||||
assertf(r.contenttype[0] == '\0');
|
||||
// no extension and flag==1: octet-stream fallback, returns 1
|
||||
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
|
||||
"noextfile", 1) == 1);
|
||||
assertf(strcmp(r.contenttype, "application/octet-stream") == 0);
|
||||
// a user --assume rule with an empty value matches but writes nothing:
|
||||
// get_userhttptype returns 1 with the buffer empty, so get_httptype_sized
|
||||
// must still report 0 (callers test the return like the old
|
||||
// strnotempty(s)).
|
||||
StringCopy(opt->mimedefs, "\ncgi=\n");
|
||||
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
|
||||
"/x.cgi", 0) == 0);
|
||||
assertf(r.contenttype[0] == '\0');
|
||||
StringCopy(opt->mimedefs, "\ncgi=text/html\n");
|
||||
assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
|
||||
"/x.cgi", 0) == 1);
|
||||
assertf(strcmp(r.contenttype, "text/html") == 0);
|
||||
hts_free_opt(opt);
|
||||
}
|
||||
// adr_normalized_sized(): bounded host normalization (passthrough when
|
||||
// already normal).
|
||||
{
|
||||
char n[HTS_URLMAXSIZE];
|
||||
|
||||
assertf(strcmp(adr_normalized_sized("example.com", n, sizeof(n)),
|
||||
"example.com") == 0);
|
||||
}
|
||||
// standard_name(): builds "<name><md5?>.<ext>" into a bounded buffer. The md5
|
||||
// is appended (4 chars) only when the URL has a query string (see url_md5),
|
||||
// so test both; pin the structure (name + ext, lengths), not the md5 chars.
|
||||
{
|
||||
char b[HTS_URLMAXSIZE * 2];
|
||||
const char *nom = "index.html"; // name part
|
||||
const char *dot = nom + 5; // points at ".html"
|
||||
size_t len;
|
||||
|
||||
// no query -> no md5: "index" + ".html"
|
||||
standard_name(b, sizeof(b), dot, nom, "http://example.com/index.html", 0);
|
||||
assertf(strcmp(b, "index.html") == 0);
|
||||
// query -> 4 md5 chars between name and ext: "index" + md5(4) + ".html"
|
||||
standard_name(b, sizeof(b), dot, nom, "http://example.com/index.html?v=1",
|
||||
0);
|
||||
len = strlen(b);
|
||||
assertf(len == 5 + 4 + 5);
|
||||
assertf(strncmp(b, "index", 5) == 0);
|
||||
assertf(strcmp(b + len - 5, ".html") == 0);
|
||||
// short names: name kept (<=8), the extension is clamped to 3 -> ".htm"
|
||||
standard_name(b, sizeof(b), dot, nom, "http://example.com/index.html?v=1",
|
||||
1);
|
||||
len = strlen(b);
|
||||
assertf(len == 5 + 4 + 4);
|
||||
assertf(strcmp(b + len - 4, ".htm") == 0);
|
||||
// short names with a >8-char name: the name is clamped to 8 ("indexpag")
|
||||
{
|
||||
const char *lnom = "indexpage.html";
|
||||
const char *ldot = lnom + 9; // points at ".html"
|
||||
|
||||
standard_name(b, sizeof(b), ldot, lnom,
|
||||
"http://example.com/indexpage.html?v=1", 1);
|
||||
len = strlen(b);
|
||||
assertf(len == 8 + 4 + 4);
|
||||
assertf(strncmp(b, "indexpag", 8) == 0);
|
||||
assertf(strcmp(b + len - 4, ".htm") == 0);
|
||||
}
|
||||
}
|
||||
// longfile_to_83(): single-name 8-3 (mode 1) / ISO9660 (mode 2) conversion;
|
||||
// uppercases, clamps the name (8 / 31) and the extension (3). It rewrites
|
||||
// 'save' in place, so pass a mutable array.
|
||||
{
|
||||
char n83[256];
|
||||
|
||||
{
|
||||
char save[] = "longfilename.html";
|
||||
|
||||
longfile_to_83(1, n83, sizeof(n83), save); // 8-3: name->8, ext->3
|
||||
assertf(strcmp(n83, "LONGFILE.HTM") == 0);
|
||||
}
|
||||
{
|
||||
char save[] = "longfilename.html";
|
||||
|
||||
longfile_to_83(2, n83, sizeof(n83), save); // ISO9660: name->31, ext->3
|
||||
assertf(strcmp(n83, "LONGFILENAME.HTM") == 0);
|
||||
}
|
||||
{ // sanitization: leading '.'->'_', interior dots
|
||||
char save[] = ".a b.c.d e"; // collapse to '_', spaces/specials -> '_'
|
||||
// (only the last dot stays as the separator)
|
||||
longfile_to_83(1, n83, sizeof(n83), save);
|
||||
assertf(strcmp(n83, "_A_B_C.D_E") == 0);
|
||||
}
|
||||
}
|
||||
// long_to_83(): per-segment 8-3 conversion of a whole path.
|
||||
{
|
||||
char n83[HTS_URLMAXSIZE * 2];
|
||||
char save[] = "dir/longfilename.html";
|
||||
|
||||
long_to_83(1, n83, sizeof(n83), save);
|
||||
assertf(strcmp(n83, "DIR/LONGFILE.HTM") == 0);
|
||||
}
|
||||
// lienrelatif(): relative path from the directory of curr_fil to link.
|
||||
{
|
||||
char s[HTS_URLMAXSIZE * 2];
|
||||
|
||||
// same directory -> just the basename
|
||||
assertf(lienrelatif(s, sizeof(s), "dir/page.html", "dir/index.html") == 0);
|
||||
assertf(strcmp(s, "page.html") == 0);
|
||||
// link one level up -> a "../" prefix
|
||||
assertf(lienrelatif(s, sizeof(s), "a.html", "dir/index.html") == 0);
|
||||
assertf(strcmp(s, "../a.html") == 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
|
||||
@@ -353,6 +598,7 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) {
|
||||
static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
char **x_argv = NULL; // Patch pour argv et argc: en cas de récupération de ligne de commande
|
||||
char *x_argvblk = NULL; // (reprise ou update)
|
||||
size_t x_argvblk_size = 0; // total capacity of x_argvblk
|
||||
int x_ptr = 0; // offset
|
||||
|
||||
//
|
||||
@@ -430,7 +676,8 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
*a = ' ';
|
||||
/* equivalent to "empty parameter" */
|
||||
if ((strcmp(argv[na], HTS_NOPARAM) == 0) || (strcmp(argv[na], HTS_NOPARAM2) == 0)) // (none)
|
||||
strcpybuff(argv[na], "\"\"");
|
||||
/* replacing "(none)"/"\"(none)\"" with "\"\"" always fits in place */
|
||||
strlcpybuff(argv[na], "\"\"", strlen(argv[na]) + 1);
|
||||
if (strncmp(argv[na], "-&", 2) == 0)
|
||||
argv[na][1] = '%';
|
||||
}
|
||||
@@ -452,6 +699,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
htsmain_free();
|
||||
return -1;
|
||||
}
|
||||
x_argvblk_size = (size_t) (current_size + 32768);
|
||||
x_argvblk[0] = '\0';
|
||||
x_ptr = 0;
|
||||
|
||||
@@ -473,7 +721,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
//
|
||||
argv_url = 0; /* pour comptage */
|
||||
//
|
||||
cmdl_add(argv[0], x_argc, x_argv, x_argvblk, x_ptr);
|
||||
cmdl_add(argv[0], x_argc, x_argv, x_argvblk, x_argvblk_size, x_ptr);
|
||||
na = 1; /* commencer après nom_prg */
|
||||
while(na < argc) {
|
||||
int result = 1;
|
||||
@@ -494,9 +742,10 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
}
|
||||
|
||||
/* Copier */
|
||||
cmdl_add(tmp_argv[0], x_argc, x_argv, x_argvblk, x_ptr);
|
||||
cmdl_add(tmp_argv[0], x_argc, x_argv, x_argvblk, x_argvblk_size, x_ptr);
|
||||
if (tmp_argc > 1) {
|
||||
cmdl_add(tmp_argv[1], x_argc, x_argv, x_argvblk, x_ptr);
|
||||
cmdl_add(tmp_argv[1], x_argc, x_argv, x_argvblk, x_argvblk_size,
|
||||
x_ptr);
|
||||
}
|
||||
|
||||
/* Compter URLs et détecter -i,-q.. */
|
||||
@@ -568,7 +817,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
char BIGSTK tempo[HTS_CDLMAXSIZE];
|
||||
|
||||
strcpybuff(tempo, argv[na] + 1);
|
||||
if (tempo[strlen(tempo) - 1] != '"') {
|
||||
if (tempo[0] == '\0' || tempo[strlen(tempo) - 1] != '"') {
|
||||
char BIGSTK s[HTS_CDLMAXSIZE];
|
||||
|
||||
sprintf(s, "Missing quote in %s", argv[na]);
|
||||
@@ -577,7 +826,9 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return -1;
|
||||
}
|
||||
tempo[strlen(tempo) - 1] = '\0';
|
||||
strcpybuff(argv[na], tempo);
|
||||
/* tempo is argv[na] minus its surrounding quotes, so it fits in place
|
||||
*/
|
||||
strlcpybuff(argv[na], tempo, strlen(argv[na]) + 1);
|
||||
}
|
||||
|
||||
if (cmdl_opt(argv[na])) { // option
|
||||
@@ -678,18 +929,19 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log),
|
||||
"hts-cache/doit.log"))) || (argv_url > 0)) {
|
||||
if (!optinclude_file
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), HTS_HTTRACKRC),
|
||||
&argc, argv, x_argvblk, &x_ptr))
|
||||
if (!optinclude_file(HTS_HTTRACKRC, &argc, argv, x_argvblk, &x_ptr)) {
|
||||
if (!optinclude_file
|
||||
(fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
hts_gethome(), "/" HTS_HTTRACKRC),
|
||||
&argc, argv, x_argvblk, &x_ptr)) {
|
||||
if (!optinclude_file(
|
||||
fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_log), HTS_HTTRACKRC),
|
||||
&argc, argv, x_argvblk, x_argvblk_size, &x_ptr))
|
||||
if (!optinclude_file(HTS_HTTRACKRC, &argc, argv, x_argvblk,
|
||||
x_argvblk_size, &x_ptr)) {
|
||||
if (!optinclude_file(
|
||||
fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
hts_gethome(), "/" HTS_HTTRACKRC),
|
||||
&argc, argv, x_argvblk, x_argvblk_size, &x_ptr)) {
|
||||
#ifdef HTS_HTTRACKCNF
|
||||
optinclude_file(HTS_HTTRACKCNF, &argc, argv, x_argvblk, &x_ptr);
|
||||
optinclude_file(HTS_HTTRACKCNF, &argc, argv, x_argvblk,
|
||||
x_argvblk_size, &x_ptr);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -742,7 +994,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (strnotempty(lastp)) {
|
||||
insert_after_argc = argc - insert_after;
|
||||
cmdl_ins(lastp, insert_after_argc, (argv + insert_after), x_argvblk,
|
||||
x_ptr);
|
||||
x_argvblk_size, x_ptr);
|
||||
argc = insert_after_argc + insert_after;
|
||||
insert_after++;
|
||||
}
|
||||
@@ -862,7 +1114,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (argv[i][0] == '-') {
|
||||
if (argv[i][1] == '-') { // --xxx
|
||||
if ((strfield2(argv[i] + 2, "clean")) || (strfield2(argv[i] + 2, "tide"))) { // nettoyer
|
||||
strcpybuff(argv[i] + 1, "");
|
||||
argv[i][1] = '\0';
|
||||
if (fexist
|
||||
(fconcat
|
||||
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), StringBuff(opt->path_log), "hts-log.txt")))
|
||||
@@ -971,7 +1223,8 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
//
|
||||
} else if (strfield2(argv[i] + 2, "catchurl")) { // capture d'URL via proxy temporaire!
|
||||
argv_url = 1; // forcer a passer les parametres
|
||||
strcpybuff(argv[i] + 1, "#P");
|
||||
/* argv[i] is "--catchurl"; "#P" fits after its first char */
|
||||
strlcpybuff(argv[i] + 1, "#P", strlen(argv[i] + 1) + 1);
|
||||
//
|
||||
} else if (strfield2(argv[i] + 2, "updatehttrack")) {
|
||||
#ifdef _WIN32
|
||||
@@ -1299,7 +1552,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
char BIGSTK tempo[HTS_CDLMAXSIZE + 256];
|
||||
|
||||
strcpybuff(tempo, argv[na] + 1);
|
||||
if (tempo[strlen(tempo) - 1] != '"') {
|
||||
if (tempo[0] == '\0' || tempo[strlen(tempo) - 1] != '"') {
|
||||
char s[HTS_CDLMAXSIZE + 256];
|
||||
|
||||
sprintf(s, "Missing quote in %s", argv[na]);
|
||||
@@ -1308,7 +1561,9 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return -1;
|
||||
}
|
||||
tempo[strlen(tempo) - 1] = '\0';
|
||||
strcpybuff(argv[na], tempo);
|
||||
/* tempo is argv[na] minus its surrounding quotes, so it fits in place
|
||||
*/
|
||||
strlcpybuff(argv[na], tempo, strlen(argv[na]) + 1);
|
||||
}
|
||||
|
||||
if (cmdl_opt(argv[na])) { // option
|
||||
@@ -2549,15 +2804,12 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
// initialiser mimedefs
|
||||
//get_userhttptype(opt,1,opt->mimedefs,NULL);
|
||||
// check
|
||||
mime[0] = '\0';
|
||||
get_httptype(opt, mime, argv[na + 1], 0);
|
||||
if (mime[0] != '\0') {
|
||||
if (get_httptype_sized(opt, mime, sizeof(mime), argv[na + 1],
|
||||
0)) {
|
||||
char ext[256];
|
||||
|
||||
printf("%s is '%s'\n", argv[na + 1], mime);
|
||||
ext[0] = '\0';
|
||||
give_mimext(ext, mime);
|
||||
if (ext[0]) {
|
||||
if (give_mimext(ext, sizeof(ext), mime)) {
|
||||
printf("and its local type is '.%s'\n", ext);
|
||||
}
|
||||
} else {
|
||||
@@ -2970,7 +3222,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
if (urlSize < HTS_URLMAXSIZE) {
|
||||
ensureUrlCapacity(url, url_sz, capa);
|
||||
if (strnotempty(url))
|
||||
strcatbuff(url, " "); // espace de séparation
|
||||
strlcatbuff(url, " ", url_sz); // separator space
|
||||
append_escape_spc_url(unescape_http_unharm(catbuff, sizeof(catbuff), argv[na], 1), url, url_sz);
|
||||
}
|
||||
} // if argv=- etc.
|
||||
|
||||
@@ -145,8 +145,13 @@ int hts_unescapeEntitiesWithCharset(const char *src, char *dest, const size_t ma
|
||||
if (!hex) {
|
||||
if (src[i] >= '0' && src[i] <= '9') {
|
||||
const int h = src[i] - '0';
|
||||
uc *= 10;
|
||||
uc += h;
|
||||
/* Guard before multiplying: a codepoint past the Unicode max
|
||||
(0x10FFFF) is invalid anyway, so stop rather than overflow uc. */
|
||||
if (uc > (0x10FFFF - h) / 10) {
|
||||
ampStart = (size_t) -1;
|
||||
} else {
|
||||
uc = uc * 10 + h;
|
||||
}
|
||||
} else {
|
||||
/* abandon */
|
||||
ampStart = (size_t) -1;
|
||||
@@ -156,8 +161,11 @@ int hts_unescapeEntitiesWithCharset(const char *src, char *dest, const size_t ma
|
||||
else {
|
||||
const int h = get_hex_value(src[i]);
|
||||
if (h != -1) {
|
||||
uc *= 16;
|
||||
uc += h;
|
||||
if (uc > (0x10FFFF - h) / 16) {
|
||||
ampStart = (size_t) -1;
|
||||
} else {
|
||||
uc = uc * 16 + h;
|
||||
}
|
||||
} else {
|
||||
/* abandon */
|
||||
ampStart = (size_t) -1;
|
||||
|
||||
@@ -197,10 +197,13 @@ Please visit our Website: http://www.httrack.com
|
||||
|
||||
#endif
|
||||
|
||||
/* Taille max d'une URL */
|
||||
/* Max URL length */
|
||||
#define HTS_URLMAXSIZE 1024
|
||||
/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */
|
||||
/* Max command-line length (>=HTS_URLMAXSIZE*2) */
|
||||
#define HTS_CDLMAXSIZE 1024
|
||||
/* MIME-type buffer contract (htsblk.contenttype/charset/contentencoding); holds
|
||||
the longest registered MIME type, the Office OOXML ones reaching 73 chars */
|
||||
#define HTS_MIMETYPE_SIZE 128
|
||||
|
||||
/* Copyright (C) 1998 Xavier Roche and other contributors */
|
||||
#define HTTRACK_AFF_AUTHORS "[XR&CO'2014]"
|
||||
@@ -250,6 +253,22 @@ Please visit our Website: http://www.httrack.com
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Mark a function deprecated, with a message pointing at the replacement.
|
||||
* Placed before the declaration so both the GCC/Clang attribute and the MSVC
|
||||
* __declspec sit in a position both accept. Degrades to nothing elsewhere.
|
||||
*/
|
||||
#if defined(__GNUC__) && \
|
||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
|
||||
#define HTS_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
||||
#elif defined(__GNUC__)
|
||||
#define HTS_DEPRECATED(msg) __attribute__((deprecated))
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
#define HTS_DEPRECATED(msg) __declspec(deprecated(msg))
|
||||
#else
|
||||
#define HTS_DEPRECATED(msg)
|
||||
#endif
|
||||
|
||||
#ifndef HTS_LONGLONG
|
||||
#ifdef HTS_NO_64_BIT
|
||||
#define HTS_LONGLONG 0
|
||||
|
||||
@@ -76,7 +76,7 @@ static coucal_key key_duphandler(void *arg, coucal_key_const name) {
|
||||
/* Key sav hashes are using case-insensitive version */
|
||||
static coucal_hashkeys key_sav_hashes(void *arg, coucal_key_const key) {
|
||||
hash_struct *const hash = (hash_struct*) arg;
|
||||
convtolower(hash->catbuff, (const char*) key);
|
||||
convtolower(hash->catbuff, sizeof(hash->catbuff), (const char *) key);
|
||||
return coucal_hash_string(hash->catbuff);
|
||||
}
|
||||
|
||||
|
||||
@@ -334,7 +334,7 @@ void index_finish(const char *indexpath, int mode) {
|
||||
if (fp_tmpproject) {
|
||||
tab = (char **) malloct(sizeof(char *) * (hts_primindex_size + 2));
|
||||
if (tab) {
|
||||
blk = malloct(size + 4);
|
||||
blk = malloct(size + 1);
|
||||
if (blk) {
|
||||
fseek(fp_tmpproject, 0, SEEK_SET);
|
||||
if ((INTsys) fread(blk, 1, size, fp_tmpproject) == size) {
|
||||
@@ -343,6 +343,7 @@ void index_finish(const char *indexpath, int mode) {
|
||||
int i;
|
||||
FILE *fp;
|
||||
|
||||
blk[size] = '\0';
|
||||
while((b = strchr(a, '\n')) && (index < hts_primindex_size)) {
|
||||
tab[index++] = a;
|
||||
*b = '\0';
|
||||
|
||||
@@ -472,9 +472,8 @@ static int tris(httrackp * opt, char *buffer) {
|
||||
{
|
||||
char type[256];
|
||||
|
||||
type[0] = '\0';
|
||||
get_httptype(opt, type, buffer, 0);
|
||||
if (strnotempty(type)) // type reconnu!
|
||||
if (get_httptype_sized(opt, type, sizeof(type), buffer,
|
||||
0)) // recognized type
|
||||
return 1;
|
||||
// ajout RX 05/2001
|
||||
else if (is_dyntype(get_ext(catbuff, sizeof(catbuff), buffer))) // asp,cgi...
|
||||
|
||||
127
src/htslib.c
127
src/htslib.c
@@ -754,7 +754,8 @@ T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect,
|
||||
if (soc != INVALID_SOCKET) {
|
||||
retour->statuscode = HTTP_OK; // OK
|
||||
strcpybuff(retour->msg, "OK");
|
||||
guess_httptype(opt, retour->contenttype, fil);
|
||||
guess_httptype_sized(opt, retour->contenttype,
|
||||
sizeof(retour->contenttype), fil);
|
||||
} else if (strnotempty(retour->msg) == 0)
|
||||
strcpybuff(retour->msg, "Unable to open local file");
|
||||
return soc; // renvoyer
|
||||
@@ -1530,8 +1531,9 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
|
||||
if (retour->location) {
|
||||
while(is_realspace(*(rcvd + p)))
|
||||
p++; // sauter espaces
|
||||
if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // pas trop long?
|
||||
strcpybuff(retour->location, rcvd + p);
|
||||
if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // not too long?
|
||||
/* location aliases location_buffer[HTS_URLMAXSIZE * 2] */
|
||||
strlcpybuff(retour->location, rcvd + p, HTS_URLMAXSIZE * 2);
|
||||
else // erreur.. ignorer
|
||||
retour->location[0] = '\0';
|
||||
}
|
||||
@@ -3444,16 +3446,17 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
|
||||
/* Replace query by sorted query */
|
||||
copyBuff = malloct(qLen + 1);
|
||||
assertf(copyBuff != NULL);
|
||||
copyBuff[0] = '\0';
|
||||
for(i = 0; i < ampargs; i++) {
|
||||
if (i == 0)
|
||||
strcatbuff(copyBuff, "?");
|
||||
else
|
||||
strcatbuff(copyBuff, "&");
|
||||
strcatbuff(copyBuff, amps[i] + 1);
|
||||
{
|
||||
htsbuff cb = htsbuff_ptr(copyBuff, qLen + 1);
|
||||
|
||||
for (i = 0; i < ampargs; i++) {
|
||||
htsbuff_cat(&cb, i == 0 ? "?" : "&");
|
||||
htsbuff_cat(&cb, amps[i] + 1);
|
||||
}
|
||||
assertf(cb.len == qLen);
|
||||
}
|
||||
assertf(strlen(copyBuff) == qLen);
|
||||
strcpybuff(query, copyBuff);
|
||||
/* query points into dest where the original qLen-byte query was */
|
||||
strlcpybuff(query, copyBuff, qLen + 1);
|
||||
|
||||
/* Cleanup */
|
||||
freet(amps);
|
||||
@@ -3464,12 +3467,19 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
|
||||
}
|
||||
|
||||
#define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
|
||||
HTSEXT_API char *adr_normalized(const char *source, char *dest) {
|
||||
HTSEXT_API char *adr_normalized_sized(const char *source, char *dest,
|
||||
size_t destsize) {
|
||||
/* not yet too aggressive (no com<->net<->org checkings) */
|
||||
strcpybuff(dest, jump_normalized_const(source));
|
||||
strlcpybuff(dest, jump_normalized_const(source), destsize);
|
||||
return dest;
|
||||
}
|
||||
|
||||
// deprecated variant; kept for ABI compatibility. Bounds to the implicit
|
||||
// contract the old callers relied on (an HTS_URLMAXSIZE*2 URL buffer).
|
||||
HTSEXT_API char *adr_normalized(const char *source, char *dest) {
|
||||
return adr_normalized_sized(source, dest, HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
|
||||
#undef endwith
|
||||
|
||||
// find port (:80) or NULL if not found
|
||||
@@ -3894,9 +3904,9 @@ HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const de
|
||||
|
||||
#undef ADD_CHAR
|
||||
|
||||
// conversion minuscules, avec buffer
|
||||
char *convtolower(char *catbuff, const char *a) {
|
||||
strcpybuff(catbuff, a);
|
||||
// lower-case conversion into caller buffer (capacity catbuffsize)
|
||||
char *convtolower(char *catbuff, size_t catbuffsize, const char *a) {
|
||||
strlcpybuff(catbuff, a, catbuffsize);
|
||||
hts_lowcase(catbuff); // lower case
|
||||
return catbuff;
|
||||
}
|
||||
@@ -3919,22 +3929,34 @@ void hts_replace(char *s, char from, char to) {
|
||||
}
|
||||
}
|
||||
|
||||
// deviner type d'un fichier local..
|
||||
// ex: fil="toto.gif" -> s="image/gif"
|
||||
void guess_httptype(httrackp * opt, char *s, const char *fil) {
|
||||
get_httptype(opt, s, fil, 1);
|
||||
// guess a local file's mime type (e.g. fil="toto.gif" -> s="image/gif")
|
||||
// returns 1 if a type was written to s, 0 otherwise
|
||||
int guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil) {
|
||||
return get_httptype_sized(opt, s, ssize, fil, 1);
|
||||
}
|
||||
|
||||
// idem
|
||||
// flag: 1 si toujours renvoyer un type
|
||||
HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag) {
|
||||
// userdef overrides get_httptype
|
||||
// deprecated variant; kept for ABI compatibility. Bounds to the implicit
|
||||
// contract the old callers relied on (a contenttype-sized buffer).
|
||||
void guess_httptype(httrackp * opt, char *s, const char *fil) {
|
||||
(void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, 1);
|
||||
}
|
||||
|
||||
// write the mime type for fil into s (capacity ssize)
|
||||
// flag: 1 to always return a type (the "application/..." / octet-stream
|
||||
// fallback) returns 1 if a type was written to s, 0 otherwise
|
||||
HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil, int flag) {
|
||||
// userdef overrides get_httptype (a rule with an empty value, e.g. "--assume
|
||||
// cgi=", matches but writes nothing: report it as "no type" like the old
|
||||
// code, whose callers tested strnotempty(s))
|
||||
if (get_userhttptype(opt, s, fil)) {
|
||||
return;
|
||||
return s[0] != '\0';
|
||||
}
|
||||
// regular tests
|
||||
if (ishtml(opt, fil) == 1) {
|
||||
strcpybuff(s, "text/html");
|
||||
strlcpybuff(s, "text/html", ssize);
|
||||
return 1;
|
||||
} else {
|
||||
/* Check html -> text/html */
|
||||
const char *a = fil + strlen(fil) - 1;
|
||||
@@ -3947,21 +3969,33 @@ HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag)
|
||||
a++;
|
||||
while(strnotempty(hts_mime[j][1])) {
|
||||
if (strfield2(hts_mime[j][1], a)) {
|
||||
if (hts_mime[j][0][0] != '*') { // Une correspondance existe
|
||||
strcpybuff(s, hts_mime[j][0]);
|
||||
return;
|
||||
if (hts_mime[j][0][0] != '*') { // a match exists
|
||||
strlcpybuff(s, hts_mime[j][0], ssize);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
j++;
|
||||
}
|
||||
|
||||
if (flag)
|
||||
sprintf(s, "application/%s", a);
|
||||
if (flag) {
|
||||
snprintf(s, ssize, "application/%s", a);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (flag)
|
||||
strcpybuff(s, "application/octet-stream");
|
||||
if (flag) {
|
||||
strlcpybuff(s, "application/octet-stream", ssize);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// deprecated variant; kept for ABI compatibility. Bounds to the implicit
|
||||
// contract the old callers relied on (a contenttype-sized buffer).
|
||||
HTSEXT_API void get_httptype(httrackp *opt, char *s, const char *fil,
|
||||
int flag) {
|
||||
(void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, flag);
|
||||
}
|
||||
|
||||
// get type of fil (php)
|
||||
@@ -4071,17 +4105,17 @@ int get_userhttptype(httrackp * opt, char *s, const char *fil) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// renvoyer extesion d'un type mime..
|
||||
// ex: "image/gif" -> gif
|
||||
void give_mimext(char *s, const char *st) {
|
||||
// give the file extension for a mime type (e.g. "image/gif" -> "gif")
|
||||
// returns 1 if an extension was found (and written to s), 0 otherwise
|
||||
int give_mimext(char *s, size_t ssize, const char *st) {
|
||||
int ok = 0;
|
||||
int j = 0;
|
||||
|
||||
s[0] = '\0';
|
||||
while((!ok) && (strnotempty(hts_mime[j][1]))) {
|
||||
if (strfield2(hts_mime[j][0], st)) {
|
||||
if (hts_mime[j][1][0] != '*') { // Une correspondance existe
|
||||
strcpybuff(s, hts_mime[j][1]);
|
||||
if (hts_mime[j][1][0] != '*') { // a match exists
|
||||
strlcpybuff(s, hts_mime[j][1], ssize);
|
||||
ok = 1;
|
||||
}
|
||||
}
|
||||
@@ -4102,12 +4136,13 @@ void give_mimext(char *s, const char *st) {
|
||||
if (a) {
|
||||
if ((int) strlen(a) >= 1) {
|
||||
if ((int) strlen(a) <= 4) {
|
||||
strcpybuff(s, a);
|
||||
strlcpybuff(s, a, ssize);
|
||||
ok = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
// extension connue?..
|
||||
@@ -4205,9 +4240,8 @@ int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) {
|
||||
if (strfield2(hts_mime_bogus_multiple[j], mime)) { /* found mime type in suspicious list */
|
||||
char ext[64];
|
||||
|
||||
ext[0] = '\0';
|
||||
give_mimext(ext, mime);
|
||||
if (ext[0] != 0) { /* we have an extension for that */
|
||||
if (give_mimext(ext, sizeof(ext),
|
||||
mime)) { /* we have an extension for that */
|
||||
const size_t ext_size = strlen(ext);
|
||||
const char *file = strrchr(filename, '/'); /* fetch terminal filename */
|
||||
|
||||
@@ -4930,7 +4964,8 @@ void hts_freeall(void) {
|
||||
|
||||
// cut path and project name
|
||||
// patch also initial path
|
||||
void cut_path(char *fullpath, char *path, char *pname) {
|
||||
void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
|
||||
size_t pname_size) {
|
||||
path[0] = pname[0] = '\0';
|
||||
if (strnotempty(fullpath)) {
|
||||
if ((fullpath[strlen(fullpath) - 1] == '/')
|
||||
@@ -4946,8 +4981,8 @@ void cut_path(char *fullpath, char *path, char *pname) {
|
||||
a--;
|
||||
if (*a == '/')
|
||||
a++;
|
||||
strcpybuff(pname, a);
|
||||
strncatbuff(path, fullpath, (int) (a - fullpath));
|
||||
strlcpybuff(pname, a, pname_size);
|
||||
strlncatbuff(path, fullpath, path_size, (size_t) (a - fullpath));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
16
src/htslib.h
16
src/htslib.h
@@ -252,7 +252,7 @@ int ishtml_ext(const char *a);
|
||||
int ishttperror(int err);
|
||||
|
||||
int get_userhttptype(httrackp * opt, char *s, const char *fil);
|
||||
void give_mimext(char *s, const char *st);
|
||||
int give_mimext(char *s, size_t ssize, const char *st);
|
||||
|
||||
int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename);
|
||||
int may_unknown2(httrackp * opt, const char *mime, const char *filename);
|
||||
@@ -264,7 +264,7 @@ void code64(unsigned char *a, int size_a, unsigned char *b, int crlf);
|
||||
|
||||
#define copychar(catbuff,a) concat(catbuff,(a),NULL)
|
||||
|
||||
char *convtolower(char *catbuff, const char *a);
|
||||
char *convtolower(char *catbuff, size_t catbuffsize, const char *a);
|
||||
void hts_lowcase(char *s);
|
||||
void hts_replace(char *s, char from, char to);
|
||||
int multipleStringMatch(const char *s, const char *match);
|
||||
@@ -276,7 +276,8 @@ void fprintfio(FILE * fp, const char *buff, const char *prefix);
|
||||
int sig_ignore_flag(int setflag); // flag ignore
|
||||
#endif
|
||||
|
||||
void cut_path(char *fullpath, char *path, char *pname);
|
||||
void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
|
||||
size_t pname_size);
|
||||
int fexist(const char *s);
|
||||
int fexist_utf8(const char *s);
|
||||
|
||||
@@ -499,7 +500,8 @@ HTS_STATIC int is_hypertext_mime(httrackp * opt, const char *mime,
|
||||
char guessed[256];
|
||||
|
||||
guessed[0] = '\0';
|
||||
guess_httptype(opt, guessed, file);
|
||||
if (!guess_httptype_sized(opt, guessed, sizeof(guessed), file))
|
||||
return 0;
|
||||
return is_hypertext_mime__(guessed);
|
||||
}
|
||||
return 0;
|
||||
@@ -514,7 +516,8 @@ HTS_STATIC int may_be_hypertext_mime(httrackp * opt, const char *mime,
|
||||
char guessed[256];
|
||||
|
||||
guessed[0] = '\0';
|
||||
guess_httptype(opt, guessed, file);
|
||||
if (!guess_httptype_sized(opt, guessed, sizeof(guessed), file))
|
||||
return 0;
|
||||
return may_be_hypertext_mime__(guessed);
|
||||
}
|
||||
return 0;
|
||||
@@ -529,7 +532,8 @@ HTS_STATIC int compare_mime(httrackp * opt, const char *mime, const char *file,
|
||||
char guessed[256];
|
||||
|
||||
guessed[0] = '\0';
|
||||
guess_httptype(opt, guessed, file);
|
||||
if (!guess_httptype_sized(opt, guessed, sizeof(guessed), file))
|
||||
return 0;
|
||||
return strfield2(guessed, reference);
|
||||
}
|
||||
return 0;
|
||||
|
||||
@@ -51,12 +51,13 @@ Please visit our Website: http://www.httrack.com
|
||||
url_savename_addstr(afs->save, buff);\
|
||||
}
|
||||
|
||||
#define ADD_STANDARD_NAME(shortname) \
|
||||
{ /* ajout nom */\
|
||||
char BIGSTK buff[HTS_URLMAXSIZE*2];\
|
||||
standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\
|
||||
url_savename_addstr(afs->save, buff);\
|
||||
}
|
||||
#define ADD_STANDARD_NAME(shortname) \
|
||||
{ /* add name */ \
|
||||
char BIGSTK buff[HTS_URLMAXSIZE * 2]; \
|
||||
standard_name(buff, sizeof(buff), dot_pos, nom_pos, fil_complete, \
|
||||
(shortname)); \
|
||||
url_savename_addstr(afs->save, buff); \
|
||||
}
|
||||
|
||||
/* Avoid stupid DOS system folders/file such as 'nul' */
|
||||
/* Based on linux/fs/umsdos/mangle.c */
|
||||
@@ -200,7 +201,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
// foo.com/bar//foobar -> foo.com/bar/foobar
|
||||
if (opt->urlhack) {
|
||||
// copy of adr (without protocol), used for lookups (see urlhack)
|
||||
normadr = adr_normalized(adr, normadr_);
|
||||
normadr = adr_normalized_sized(adr, normadr_, sizeof(normadr_));
|
||||
normfil = fil_normalized(fil_complete, normfil_);
|
||||
} else {
|
||||
if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder
|
||||
@@ -344,8 +345,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
mime[0] = ext[0] = '\0';
|
||||
get_userhttptype(opt, mime, fil);
|
||||
if (strnotempty(mime)) {
|
||||
give_mimext(ext, mime);
|
||||
if (strnotempty(ext)) {
|
||||
if (give_mimext(ext, sizeof(ext), mime)) {
|
||||
ext_chg = 1;
|
||||
}
|
||||
}
|
||||
@@ -378,8 +378,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, r.cdispo);
|
||||
} else if (!may_unknown2(opt, r.contenttype, fil)) { // on peut patcher à priori?
|
||||
give_mimext(s, r.contenttype); // obtenir extension
|
||||
if (strnotempty(s) > 0) { // on a reconnu l'extension
|
||||
if (give_mimext(s, sizeof(s),
|
||||
r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
strcpybuff(ext, s);
|
||||
}
|
||||
@@ -403,8 +403,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
mime[0] = ext[0] = '\0';
|
||||
get_userhttptype(opt, mime, fil);
|
||||
if (strnotempty(mime)) {
|
||||
give_mimext(ext, mime);
|
||||
if (strnotempty(ext)) {
|
||||
if (give_mimext(ext, sizeof(ext), mime)) {
|
||||
ext_chg = 1;
|
||||
}
|
||||
}
|
||||
@@ -420,9 +419,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
strcpybuff(ext, headers->r.cdispo);
|
||||
} else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
char s[16];
|
||||
s[0] = '\0';
|
||||
give_mimext(s, headers->r.contenttype); // obtenir extension
|
||||
if (strnotempty(s) > 0) { // on a reconnu l'extension
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
headers->r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
strcpybuff(ext, s);
|
||||
}
|
||||
@@ -431,13 +430,14 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
else if (mime_type != NULL) {
|
||||
ext[0] = '\0';
|
||||
if (*mime_type) {
|
||||
give_mimext(ext, mime_type);
|
||||
give_mimext(ext, sizeof(ext), mime_type);
|
||||
}
|
||||
if (strnotempty(ext)) {
|
||||
char mime_from_file[128];
|
||||
|
||||
mime_from_file[0] = 0;
|
||||
get_httptype(opt, mime_from_file, fil, 1);
|
||||
get_httptype_sized(opt, mime_from_file, sizeof(mime_from_file),
|
||||
fil, 1);
|
||||
if (!strnotempty(mime_from_file) || strcasecmp(mime_type, mime_from_file) != 0) { /* different mime for this type */
|
||||
/* type change not forbidden (or no extension at all) */
|
||||
if (!may_unknown2(opt, mime_type, fil)) {
|
||||
@@ -646,8 +646,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
ext_chg = 2; /* change filename */
|
||||
strcpybuff(ext, back[b].r.cdispo);
|
||||
} else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
|
||||
give_mimext(s, back[b].r.contenttype); // obtenir extension
|
||||
if (strnotempty(s) > 0) { // on a reconnu l'extension
|
||||
if (give_mimext(
|
||||
s, sizeof(s),
|
||||
back[b].r.contenttype)) { // recognized extension
|
||||
ext_chg = 1;
|
||||
strcpybuff(ext, s);
|
||||
}
|
||||
@@ -924,7 +925,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
|
||||
pth[0] = n83[0] = '\0';
|
||||
strncatbuff(pth, fil, (int) (nom_pos - fil) - 1);
|
||||
long_to_83(opt->savename_83, n83, pth);
|
||||
long_to_83(opt->savename_83, n83, sizeof(n83), pth);
|
||||
htsbuff_cat(&sb, n83);
|
||||
}
|
||||
}
|
||||
@@ -1306,7 +1307,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (opt->savename_83) {
|
||||
char BIGSTK n83[HTS_URLMAXSIZE * 2];
|
||||
|
||||
long_to_83(opt->savename_83, n83, afs->save);
|
||||
long_to_83(opt->savename_83, n83, sizeof(n83), afs->save);
|
||||
strcpybuff(afs->save, n83);
|
||||
}
|
||||
// enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson)
|
||||
@@ -1377,7 +1378,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (lastDot == NULL) {
|
||||
strcatbuff(afs->save, "." DELAYED_EXT);
|
||||
} else if (!IS_DELAYED_EXT(afs->save)) {
|
||||
strcatbuff(lastDot, "." DELAYED_EXT);
|
||||
/* lastDot points within afs->save; bound by the remaining capacity */
|
||||
strlcatbuff(lastDot, "." DELAYED_EXT,
|
||||
sizeof(afs->save) - (size_t) (lastDot - afs->save));
|
||||
}
|
||||
}
|
||||
// enforce 260-character path limit before inserting destination path
|
||||
@@ -1582,41 +1585,41 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* nom avec md5 urilisé partout */
|
||||
void standard_name(char *b, const char *dot_pos, const char *nom_pos, const char *fil,
|
||||
int short_ver) {
|
||||
/* md5-based name used everywhere; builds into b (capacity bsize) */
|
||||
void standard_name(char *b, size_t bsize, const char *dot_pos,
|
||||
const char *nom_pos, const char *fil, int short_ver) {
|
||||
char md5[32 + 2];
|
||||
htsbuff bb = htsbuff_ptr(b, bsize);
|
||||
|
||||
b[0] = '\0';
|
||||
/* Nom */
|
||||
/* Name */
|
||||
if (dot_pos) {
|
||||
if (!short_ver) // Noms longs
|
||||
strncatbuff(b, nom_pos, (dot_pos - nom_pos));
|
||||
if (!short_ver) // long names
|
||||
htsbuff_catn(&bb, nom_pos, (size_t) (dot_pos - nom_pos));
|
||||
else
|
||||
strncatbuff(b, nom_pos, min(dot_pos - nom_pos, 8));
|
||||
htsbuff_catn(&bb, nom_pos, (size_t) min(dot_pos - nom_pos, 8));
|
||||
} else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcatbuff(b, nom_pos);
|
||||
if (!short_ver) // long names
|
||||
htsbuff_cat(&bb, nom_pos);
|
||||
else
|
||||
strncatbuff(b, nom_pos, 8);
|
||||
htsbuff_catn(&bb, nom_pos, 8);
|
||||
}
|
||||
/* MD5 - 16 bits */
|
||||
strncatbuff(b, url_md5(md5, fil), 4);
|
||||
htsbuff_catn(&bb, url_md5(md5, fil), 4);
|
||||
/* Ext */
|
||||
if (dot_pos) {
|
||||
strcatbuff(b, ".");
|
||||
if (!short_ver) // Noms longs
|
||||
strcatbuff(b, dot_pos + 1);
|
||||
htsbuff_catc(&bb, '.');
|
||||
if (!short_ver) // long names
|
||||
htsbuff_cat(&bb, dot_pos + 1);
|
||||
else
|
||||
strncatbuff(b, dot_pos + 1, 3);
|
||||
htsbuff_catn(&bb, dot_pos + 1, 3);
|
||||
}
|
||||
// Allow extensionless
|
||||
#ifdef DO_NOT_ALLOW_EXTENSIONLESS
|
||||
else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcatbuff(b, DEFAULT_EXT);
|
||||
if (!short_ver) // long names
|
||||
htsbuff_cat(&bb, DEFAULT_EXT);
|
||||
else
|
||||
strcatbuff(b, DEFAULT_EXT_SHORT);
|
||||
htsbuff_cat(&bb, DEFAULT_EXT_SHORT);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -96,8 +96,8 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
httrackp * opt, struct_back * sback, cache_back * cache,
|
||||
hash_struct * hash, int ptr, int numero_passe,
|
||||
const lien_back * headers);
|
||||
void standard_name(char *b, const char *dot_pos, const char *nom_pos,
|
||||
const char *fil_complete,
|
||||
void standard_name(char *b, size_t bsize, const char *dot_pos,
|
||||
const char *nom_pos, const char *fil_complete,
|
||||
int short_ver);
|
||||
void url_savename_addstr(char *d, const char *s);
|
||||
char *url_md5(char *digest_buffer, const char *fil_complete);
|
||||
|
||||
@@ -499,9 +499,9 @@ struct htsblk {
|
||||
FILE *out; // écriture directe sur disque (si is_write=1)
|
||||
LLint size; // taille fichier
|
||||
char msg[80]; // message éventuel si échec ("\0"=non précisé)
|
||||
char contenttype[64]; // content-type ("text/html" par exemple)
|
||||
char charset[64]; // charset ("iso-8859-1" par exemple)
|
||||
char contentencoding[64]; // content-encoding ("gzip" par exemple)
|
||||
char contenttype[HTS_MIMETYPE_SIZE]; // content-type (e.g. "text/html")
|
||||
char charset[HTS_MIMETYPE_SIZE]; // charset (e.g. "iso-8859-1")
|
||||
char contentencoding[HTS_MIMETYPE_SIZE]; // content-encoding (e.g. "gzip")
|
||||
char *location; // on copie dedans éventuellement la véritable 'location'
|
||||
LLint totalsize; // taille totale à télécharger (-1=inconnue)
|
||||
short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1
|
||||
|
||||
@@ -610,20 +610,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
b = strchr(a, '<'); // prochain tag
|
||||
}
|
||||
}
|
||||
if (lienrelatif
|
||||
(tempo, heap(ptr)->sav,
|
||||
concat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_html_utf8),
|
||||
"index.html")) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), heap(ptr)->sav,
|
||||
concat(OPT_GET_BUFF(opt),
|
||||
OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_html_utf8),
|
||||
"index.html")) == 0) {
|
||||
detect_title = 1; // ok détecté pour cette page!
|
||||
makeindex_links++; // un de plus
|
||||
strcpybuff(makeindex_firstlink, tempo);
|
||||
strlcpybuff(makeindex_firstlink, tempo,
|
||||
HTS_URLMAXSIZE * 2);
|
||||
//
|
||||
|
||||
/* Hack */
|
||||
if (opt->mimehtml) {
|
||||
strcpybuff(makeindex_firstlink,
|
||||
"cid:primary/primary");
|
||||
strlcpybuff(makeindex_firstlink,
|
||||
"cid:primary/primary",
|
||||
HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
|
||||
if ((b == a) || (a == NULL) || (b == NULL)) { // pas de titre
|
||||
@@ -1649,8 +1651,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
}
|
||||
// Prendre si extension reconnue
|
||||
if (!url_ok) {
|
||||
get_httptype(opt, type, tempo, 0);
|
||||
if (strnotempty(type)) // type reconnu!
|
||||
if (get_httptype_sized(opt, type,
|
||||
sizeof(type), tempo,
|
||||
0)) // recognized type
|
||||
url_ok = 1;
|
||||
else if (is_dyntype(get_ext(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), tempo))) // reconnu php,cgi,asp..
|
||||
url_ok = 1;
|
||||
@@ -2318,12 +2321,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
switch (p_type) {
|
||||
case 2:{
|
||||
//if (*lien!='/') strcatbuff(base,"/");
|
||||
strcpybuff(base, lien);
|
||||
strlcpybuff(base, lien, HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
break; // base
|
||||
case -2:{
|
||||
//if (*lien!='/') strcatbuff(codebase,"/");
|
||||
strcpybuff(codebase, lien);
|
||||
strlcpybuff(codebase, lien, HTS_URLMAXSIZE * 2);
|
||||
}
|
||||
break; // base
|
||||
}
|
||||
@@ -2719,7 +2722,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
|
||||
strcpybuff(save, StringBuff(opt->path_html_utf8));
|
||||
strcatbuff(save, cat_name);
|
||||
if (lienrelatif(tempo, save, relativesavename()) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), save,
|
||||
relativesavename()) == 0) {
|
||||
/* Never escape high-chars (we don't know the encoding!!) */
|
||||
inplace_escape_uri_utf(tempo, sizeof(tempo)); // escape with %xx
|
||||
//if (!no_esc_utf)
|
||||
@@ -2949,7 +2953,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
tempo[0] = '\0';
|
||||
// calculer le lien relatif
|
||||
|
||||
if (lienrelatif(tempo, afs.save, relativesavename()) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), afs.save,
|
||||
relativesavename()) == 0) {
|
||||
if (!in_media) { // In media (such as real audio): don't patch
|
||||
/* Never escape high-chars (we don't know the encoding!!) */
|
||||
inplace_escape_uri_utf(tempo, sizeof(tempo));
|
||||
@@ -3416,8 +3421,17 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) == 1) {
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"engine: postprocess-html: callback modified data, applying %d bytes", cSize);
|
||||
TypedArraySize(output_buffer) = 0;
|
||||
TypedArrayAppend(output_buffer, cAddr, cSize);
|
||||
/* The callback either edits output_buffer in place (cAddr
|
||||
unchanged) or hands back its own buffer (cAddr changed). Only
|
||||
the latter needs a copy: re-appending output_buffer onto itself
|
||||
would read freed memory, as the append's realloc can relocate
|
||||
the block out from under cAddr. */
|
||||
if (cAddr != TypedArrayElts(output_buffer)) {
|
||||
TypedArraySize(output_buffer) = 0;
|
||||
TypedArrayAppend(output_buffer, cAddr, cSize);
|
||||
} else {
|
||||
TypedArraySize(output_buffer) = (size_t) cSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3498,9 +3512,9 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
|
||||
char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2];
|
||||
|
||||
n_adr[0] = n_fil[0] = '\0';
|
||||
(void) adr_normalized(moved->adr, n_adr);
|
||||
(void) adr_normalized_sized(moved->adr, n_adr, sizeof(n_adr));
|
||||
(void) fil_normalized(moved->fil, n_fil);
|
||||
(void) adr_normalized(urladr(), pn_adr);
|
||||
(void) adr_normalized_sized(urladr(), pn_adr, sizeof(pn_adr));
|
||||
(void) fil_normalized(urlfil(), pn_fil);
|
||||
if (strcasecmp(n_adr, pn_adr) == 0
|
||||
&& strcasecmp(n_fil, pn_fil) == 0) {
|
||||
@@ -4385,7 +4399,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
|
||||
memcpy(r, &(back[b].r), sizeof(htsblk));
|
||||
r->location = stre->loc_; // ne PAS copier location!! adresse, pas de buffer
|
||||
if (back[b].r.location)
|
||||
strcpybuff(r->location, back[b].r.location);
|
||||
strlcpybuff(r->location, back[b].r.location, HTS_URLMAXSIZE * 2);
|
||||
back[b].r.adr = NULL; // ne pas faire de desalloc ensuite
|
||||
|
||||
// libérer emplacement backing
|
||||
|
||||
@@ -237,6 +237,15 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
* Append at most "N" characters of "B" to "A", "A" having a maximum capacity
|
||||
* of "S".
|
||||
*/
|
||||
#define strlncatbuff(A, B, S, N) \
|
||||
strncat_safe_(A, S, B, HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||
N, "overflow while appending '" #B "' to '" #A "'", __FILE__, \
|
||||
__LINE__)
|
||||
|
||||
/**
|
||||
* Copy characters of "B" to "A", "A" having a maximum capacity of "S".
|
||||
*/
|
||||
|
||||
@@ -274,7 +274,9 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
|
||||
char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a));
|
||||
if (idna != NULL) {
|
||||
if (strlen(idna) < HTS_URLMAXSIZE) {
|
||||
strcpybuff(a, idna);
|
||||
/* a points within adrfil->adr; bound by the remaining capacity */
|
||||
strlcpybuff(a, idna,
|
||||
sizeof(adrfil->adr) - (size_t) (a - adrfil->adr));
|
||||
}
|
||||
free(idna);
|
||||
}
|
||||
@@ -286,7 +288,7 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
|
||||
|
||||
// créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu)
|
||||
// un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif
|
||||
int lienrelatif(char *s, const char *link, const char *curr_fil) {
|
||||
int lienrelatif(char *s, size_t ssize, const char *link, const char *curr_fil) {
|
||||
char BIGSTK _curr[HTS_URLMAXSIZE * 2];
|
||||
char BIGSTK newcurr_fil[HTS_URLMAXSIZE * 2], newlink[HTS_URLMAXSIZE * 2];
|
||||
char *curr;
|
||||
@@ -314,9 +316,9 @@ int lienrelatif(char *s, const char *link, const char *curr_fil) {
|
||||
}
|
||||
}
|
||||
|
||||
// recopier uniquement le chemin courant
|
||||
// copy only the current path
|
||||
curr = _curr;
|
||||
strcpybuff(curr, curr_fil);
|
||||
strlcpybuff(curr, curr_fil, sizeof(_curr));
|
||||
if ((a = strchr(curr, '?')) == NULL) // couper au ? (params)
|
||||
a = curr + strlen(curr) - 1; // pas de params: aller à la fin
|
||||
while((*a != '/') && (a > curr))
|
||||
@@ -359,14 +361,14 @@ int lienrelatif(char *s, const char *link, const char *curr_fil) {
|
||||
a++;
|
||||
while(*a)
|
||||
if (*(a++) == '/')
|
||||
strcatbuff(s, "../");
|
||||
strlcatbuff(s, "../", ssize);
|
||||
//if (strlen(s)==0) strcatbuff(s,"/");
|
||||
|
||||
if (slash)
|
||||
strcatbuff(s, "/"); // garder absolu!!
|
||||
strlcatbuff(s, "/", ssize); // keep it absolute!
|
||||
|
||||
// on est dans le répertoire de départ, copier
|
||||
strcatbuff(s, link + ((*link == '/') ? 1 : 0));
|
||||
// we are in the starting directory, copy
|
||||
strlcatbuff(s, link + ((*link == '/') ? 1 : 0), ssize);
|
||||
|
||||
/* Security check */
|
||||
if (strlen(s) >= HTS_URLMAXSIZE)
|
||||
@@ -410,7 +412,7 @@ int link_has_authorization(const char *lien) {
|
||||
}
|
||||
|
||||
// conversion chemin de fichier/dossier vers 8-3 ou ISO9660
|
||||
void long_to_83(int mode, char *n83, char *save) {
|
||||
void long_to_83(int mode, char *n83, size_t n83size, char *save) {
|
||||
n83[0] = '\0';
|
||||
|
||||
while(*save) {
|
||||
@@ -425,19 +427,19 @@ void long_to_83(int mode, char *n83, char *save) {
|
||||
}
|
||||
fnl[j] = '\0';
|
||||
// conversion
|
||||
longfile_to_83(mode, fn83, fnl);
|
||||
strcatbuff(n83, fn83);
|
||||
longfile_to_83(mode, fn83, sizeof(fn83), fnl);
|
||||
strlcatbuff(n83, fn83, n83size);
|
||||
|
||||
save += i;
|
||||
if (*save == '/') {
|
||||
strcatbuff(n83, "/");
|
||||
strlcatbuff(n83, "/", n83size);
|
||||
save++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660
|
||||
void longfile_to_83(int mode, char *n83, char *save) {
|
||||
void longfile_to_83(int mode, char *n83, size_t n83size, char *save) {
|
||||
int j = 0, max = 0;
|
||||
int i = 0;
|
||||
char nom[256];
|
||||
@@ -526,10 +528,10 @@ void longfile_to_83(int mode, char *n83, char *save) {
|
||||
}
|
||||
// corriger vers 8-3
|
||||
n83[0] = '\0';
|
||||
strncatbuff(n83, nom, max);
|
||||
strlncatbuff(n83, nom, n83size, max);
|
||||
if (strnotempty(ext)) {
|
||||
strcatbuff(n83, ".");
|
||||
strncatbuff(n83, ext, 3);
|
||||
strlcatbuff(n83, ".", n83size);
|
||||
strlncatbuff(n83, ext, n83size, 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -61,11 +61,11 @@ typedef struct lien_adrfilsave lien_adrfilsave;
|
||||
int ident_url_relatif(const char *lien, const char *origin_adr,
|
||||
const char *origin_fil,
|
||||
lien_adrfil* const adrfil);
|
||||
int lienrelatif(char *s, const char *link, const char *curr);
|
||||
int lienrelatif(char *s, size_t ssize, const char *link, const char *curr);
|
||||
int link_has_authority(const char *lien);
|
||||
int link_has_authorization(const char *lien);
|
||||
void long_to_83(int mode, char *n83, char *save);
|
||||
void longfile_to_83(int mode, char *n83, char *save);
|
||||
void long_to_83(int mode, char *n83, size_t n83size, char *save);
|
||||
void longfile_to_83(int mode, char *n83, size_t n83size, char *save);
|
||||
HTS_INLINE int __rech_tageq(const char *adr, const char *s);
|
||||
HTS_INLINE int __rech_tageqbegdigits(const char *adr, const char *s);
|
||||
HTS_INLINE int rech_tageq_all(const char *adr, const char *s);
|
||||
|
||||
@@ -223,8 +223,9 @@ static int hts_acceptlink_(httrackp * opt, int ptr,
|
||||
// note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
|
||||
// ex: si on descend 2 fois on peut remonter 1 fois
|
||||
|
||||
if (lienrelatif(tempo, fil, heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, fil, heap(ptr)->fil) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), fil,
|
||||
heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, sizeof(tempo2), fil, heap(ptr)->fil) == 0) {
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"build relative links to test: %s %s (with %s and %s)",
|
||||
tempo, tempo2, heap(heap(ptr)->premier)->fil,
|
||||
@@ -326,8 +327,9 @@ static int hts_acceptlink_(httrackp * opt, int ptr,
|
||||
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
|
||||
char BIGSTK tempo2[HTS_URLMAXSIZE * 2];
|
||||
|
||||
if (lienrelatif(tempo, fil, heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, fil, heap(ptr)->fil) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), fil,
|
||||
heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, sizeof(tempo2), fil, heap(ptr)->fil) == 0) {
|
||||
} else {
|
||||
hts_log_print(opt, LOG_ERROR,
|
||||
"Error building relative link %s and %s", fil,
|
||||
@@ -336,7 +338,6 @@ static int hts_acceptlink_(httrackp * opt, int ptr,
|
||||
} else {
|
||||
hts_log_print(opt, LOG_ERROR, "Error building relative link %s and %s",
|
||||
fil, heap(heap(ptr)->premier)->fil);
|
||||
|
||||
}
|
||||
} // fin tester interdiction de monter
|
||||
|
||||
|
||||
@@ -207,6 +207,9 @@ HTSEXT_API const char *jump_normalized_const(const char *);
|
||||
HTSEXT_API char *jump_toport(char *);
|
||||
HTSEXT_API const char *jump_toport_const(const char *);
|
||||
HTSEXT_API char *fil_normalized(const char *source, char *dest);
|
||||
HTSEXT_API char *adr_normalized_sized(const char *source, char *dest,
|
||||
size_t destsize);
|
||||
HTS_DEPRECATED("use adr_normalized_sized(source, dest, destsize)")
|
||||
HTSEXT_API char *adr_normalized(const char *source, char *dest);
|
||||
HTSEXT_API const char *hts_rootdir(char *file);
|
||||
|
||||
@@ -244,6 +247,9 @@ HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size, co
|
||||
HTSEXT_API char *antislash_unescaped(char *catbuff, const char *s);
|
||||
|
||||
HTSEXT_API void escape_remove_control(char *s);
|
||||
HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil, int flag);
|
||||
HTS_DEPRECATED("use get_httptype_sized(opt, s, ssize, fil, flag)")
|
||||
HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil,
|
||||
int flag);
|
||||
HTSEXT_API int is_knowntype(httrackp * opt, const char *fil);
|
||||
@@ -251,6 +257,9 @@ HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil);
|
||||
HTSEXT_API int is_dyntype(const char *fil);
|
||||
HTSEXT_API const char *get_ext(char *catbuff, size_t size, const char *fil);
|
||||
HTSEXT_API int may_unknown(httrackp * opt, const char *st);
|
||||
HTSEXT_API int guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||
const char *fil);
|
||||
HTS_DEPRECATED("use guess_httptype_sized(opt, s, ssize, fil)")
|
||||
HTSEXT_API void guess_httptype(httrackp * opt, char *s, const char *fil);
|
||||
|
||||
/* Ugly string tools */
|
||||
|
||||
@@ -1162,7 +1162,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
|
||||
FILE *fp = fopen(file_convert(catbuff, sizeof(catbuff), previous_save), "rb");
|
||||
|
||||
if (fp != NULL) {
|
||||
r->adr = (char *) malloc(r->size + 4);
|
||||
r->adr = (char *) malloc(r->size + 1);
|
||||
if (r->adr != NULL) {
|
||||
if (r->size > 0
|
||||
&& fread(r->adr, 1, r->size, fp) != r->size) {
|
||||
@@ -1172,6 +1172,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
|
||||
sprintf(r->msg, "Read error in cache disk data: %s",
|
||||
strerror(last_errno));
|
||||
}
|
||||
r->adr[r->size] = '\0';
|
||||
} else {
|
||||
r->statuscode = STATUSCODE_INVALID;
|
||||
strcpy(r->msg,
|
||||
|
||||
@@ -202,6 +202,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Keep this POSIX-portable: the harness runs it via $(BASH), which is a plain
|
||||
# POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms and GNU-only
|
||||
# tool flags despite the #!/bin/bash above.
|
||||
|
||||
# Cache create/read/update logic (driven by 'httrack -#A <dir>').
|
||||
#
|
||||
@@ -38,9 +41,12 @@ test -e "$dir/hts-cache/new.zip" || {
|
||||
# Sanity-check the cache footprint: the few-thousand-entry pass is expected to
|
||||
# weigh ~1-2 MB. Fail if it balloons well past that (e.g. a per-entry overhead
|
||||
# regression or runaway growth), so the cache size stays bounded.
|
||||
ceiling=$((4 * 1024 * 1024))
|
||||
bytes=$(du -sb "$dir/hts-cache" | cut -f1)
|
||||
test "$bytes" -le "$ceiling" || {
|
||||
echo "cache footprint $bytes bytes exceeds ${ceiling} ceiling" >&2
|
||||
# du -sk (1024-byte units) is portable; GNU's -b (apparent bytes) is rejected
|
||||
# by BSD/macOS du. Block-allocated size is an upper bound on apparent size,
|
||||
# which is all a ceiling check needs.
|
||||
ceiling=$((4 * 1024)) # KiB
|
||||
kbytes=$(du -sk "$dir/hts-cache" | cut -f1)
|
||||
test "$kbytes" -le "$ceiling" || {
|
||||
echo "cache footprint ${kbytes} KiB exceeds ${ceiling} KiB ceiling" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -30,6 +30,17 @@ run() {
|
||||
RC=$?
|
||||
}
|
||||
|
||||
# crawl using exactly the given args as the only URL(s), no implicit primary URL;
|
||||
# leaves the exit status in RC
|
||||
run_only() {
|
||||
local out="$1"
|
||||
shift
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
httrack -O "$out" --quiet -n "$@" >"$out/.log" 2>&1
|
||||
RC=$?
|
||||
}
|
||||
|
||||
# assert the value was accepted: clean exit and the fixture was mirrored
|
||||
accepted() {
|
||||
{ test "$RC" -eq 0 && test -n "$(find "$1" -type f -path '*/index.html' -print -quit)"; } ||
|
||||
@@ -68,4 +79,15 @@ refused "#152: over-cap -F not refused cleanly"
|
||||
run "$tmp/ov-l" --user-agent "$over"
|
||||
refused "#152: over-cap --user-agent not refused cleanly"
|
||||
|
||||
# Quote handling on the sole URL (run_only, so the quoted arg is the only URL and
|
||||
# can't be masked by an implicit one). A fully "-quoted URL has its surrounding
|
||||
# quotes stripped in place and is mirrored; a dangling opening quote, and a lone
|
||||
# quote (empty after the opening "), are refused cleanly and never crash.
|
||||
run_only "$tmp/q-ok" "\"file://$tmp/index.html\""
|
||||
accepted "$tmp/q-ok" "quoted URL not stripped/mirrored"
|
||||
run_only "$tmp/q-bad" '"foo'
|
||||
refused "dangling-quote argument not refused cleanly"
|
||||
run_only "$tmp/q-lone" '"'
|
||||
refused "lone-quote argument not refused cleanly"
|
||||
|
||||
exit 0
|
||||
|
||||
91
tests/01_engine-rcfile.test
Executable file
91
tests/01_engine-rcfile.test
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# Config-file alias loading (no network). A .httrackrc in the working directory
|
||||
# is read by optinclude_file(), whose cmdl_ins macro inserts each alias-expanded
|
||||
# token into the x_argvblk block. That macro used to copy with an unbounded
|
||||
# strcpy on a bare char*; it is now bounded (strlcpybuff + cmdl_room over the
|
||||
# block capacity). Two properties are checked:
|
||||
# 1. The bound does not truncate: a long user-agent alias reaches doit.log
|
||||
# intact. user-agent expands to two tokens (-F <value>), so it exercises
|
||||
# both cmdl_ins insertions.
|
||||
# 2. The bound holds under exhaustion: a pathological .httrackrc whose alias
|
||||
# expansions overflow the block aborts cleanly through the htssafe bounds
|
||||
# check (a message naming htsalias.c) instead of overrunning the heap. The
|
||||
# unbounded version segfaulted here.
|
||||
|
||||
# set -e with the intentional-nonzero httrack runs guarded explicitly (the
|
||||
# crawls below are expected to fail/abort and their status is inspected by hand).
|
||||
set -euo pipefail
|
||||
|
||||
# Resolve httrack to an absolute path before we cd: PATH may hold a build-relative
|
||||
# entry that would not resolve from the temp directory.
|
||||
bin=$(command -v httrack) || {
|
||||
echo "FAIL: httrack not found on PATH"
|
||||
exit 1
|
||||
}
|
||||
case "$bin" in
|
||||
/*) ;;
|
||||
*) bin="$(cd "$(dirname "$bin")" && pwd)/$(basename "$bin")" ;;
|
||||
esac
|
||||
|
||||
tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_rcfile.XXXXXX") || exit 1
|
||||
trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
|
||||
|
||||
# --- 1. alias token survives the bound intact -------------------------------
|
||||
d1="$tmp/intact"
|
||||
mkdir -p "$d1"
|
||||
echo '<html><body>hello</body></html>' >"$d1/index.html"
|
||||
|
||||
# optinclude_file() lowercases each config line, so the marker is lowercase to
|
||||
# survive the comparison verbatim.
|
||||
marker='zzz_rcfile_marker_0123456789_abcdefghijklmnopqrstuvwxyz_intact'
|
||||
printf 'user-agent=%s\n' "$marker" >"$d1/.httrackrc"
|
||||
|
||||
# Run with no -O so the working-directory .httrackrc is loaded (an -O path makes
|
||||
# the engine skip the rc files). Output lands in the temp dir. Guard the run so a
|
||||
# nonzero exit is captured for the assertion instead of tripping set -e.
|
||||
rc=0
|
||||
(cd "$d1" && "$bin" "file://$d1/index.html" --quiet -n >.log 2>&1) || rc=$?
|
||||
|
||||
test "$rc" -eq 0 || {
|
||||
echo "FAIL: rc-file crawl exited $rc"
|
||||
exit 1
|
||||
}
|
||||
test -f "$d1/hts-cache/doit.log" || {
|
||||
echo "FAIL: doit.log not written (rc file not processed)"
|
||||
exit 1
|
||||
}
|
||||
# A truncated copy would cut the token; require the full -F value.
|
||||
grep -q -- "-F $marker" "$d1/hts-cache/doit.log" || {
|
||||
echo "FAIL: user-agent alias missing or truncated in doit.log"
|
||||
head -1 "$d1/hts-cache/doit.log"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# --- 2. block exhaustion aborts through the bound, not the heap -------------
|
||||
d2="$tmp/exhaust"
|
||||
mkdir -p "$d2"
|
||||
echo '<html><body>hi</body></html>' >"$d2/index.html"
|
||||
|
||||
# Each line inserts ~two tokens of ~200 bytes; 400 lines overflow the block's
|
||||
# fixed slack (current_size + 32768) many times over, deterministically.
|
||||
val=$(printf 'a%.0s' $(seq 1 200))
|
||||
for _ in $(seq 1 400); do
|
||||
printf 'user-agent=%s\n' "$val"
|
||||
done >"$d2/.httrackrc"
|
||||
|
||||
# The process aborts (httrack turns the fatal signal into exit 134 either way),
|
||||
# so the exit code does not distinguish the bounded abort from a heap overflow;
|
||||
# the stderr diagnostic does. The htssafe bounds check names the offending file.
|
||||
# Expected to fail, so the nonzero exit is swallowed; only the log is inspected.
|
||||
(cd "$d2" && "$bin" "file://$d2/index.html" --quiet -n >.log 2>&1) || true
|
||||
|
||||
grep -Eq "overflow while copying.*htsalias\.c" "$d2/.log" || {
|
||||
echo "FAIL: exhausted rc file did not abort through the htsalias.c bound"
|
||||
echo "(an unbounded copy would overrun the heap here)"
|
||||
tail -3 "$d2/.log"
|
||||
exit 1
|
||||
}
|
||||
|
||||
exit 0
|
||||
@@ -3,6 +3,10 @@
|
||||
# The committed man/httrack.1 must match what man/makeman.sh produces from the
|
||||
# current "httrack --help" output. This catches a --help change that was not
|
||||
# followed by "make -C man regen-man".
|
||||
#
|
||||
# Keep this POSIX-portable: the harness runs it via $(BASH), which is a plain
|
||||
# POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms (such as
|
||||
# process substitution) despite the #!/bin/bash above.
|
||||
|
||||
: "${top_srcdir:=..}"
|
||||
|
||||
@@ -20,7 +24,9 @@ command -v httrack >/dev/null 2>&1 || {
|
||||
}
|
||||
|
||||
tmp=$(mktemp) || exit 1
|
||||
trap 'rm -f "$tmp"' EXIT
|
||||
committed_clean=$(mktemp) || exit 1
|
||||
generated_clean=$(mktemp) || exit 1
|
||||
trap 'rm -f "$tmp" "$committed_clean" "$generated_clean"' EXIT
|
||||
|
||||
README="$top_srcdir/README" bash "$gen" httrack >"$tmp" 2>/dev/null || {
|
||||
echo "makeman.sh failed" >&2
|
||||
@@ -28,12 +34,15 @@ README="$top_srcdir/README" bash "$gen" httrack >"$tmp" 2>/dev/null || {
|
||||
}
|
||||
|
||||
# Ignore the two intentionally date-dependent lines (page date, copyright year).
|
||||
# Temp files, not process substitution, so this works under a POSIX /bin/sh.
|
||||
strip_volatile() { grep -vE '^\.TH httrack |^Copyright \(C\) 1998-'; }
|
||||
strip_volatile <"$committed" >"$committed_clean"
|
||||
strip_volatile <"$tmp" >"$generated_clean"
|
||||
|
||||
if diff <(strip_volatile <"$committed") <(strip_volatile <"$tmp") >/dev/null; then
|
||||
if diff "$committed_clean" "$generated_clean" >/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "man/httrack.1 is out of date. Regenerate with: make -C man regen-man" >&2
|
||||
diff <(strip_volatile <"$committed") <(strip_volatile <"$tmp") | head -40 >&2
|
||||
diff "$committed_clean" "$generated_clean" | head -40 >&2
|
||||
exit 1
|
||||
|
||||
@@ -25,6 +25,7 @@ TESTS = \
|
||||
01_engine-idna.test \
|
||||
01_engine-mime.test \
|
||||
01_engine-parse.test \
|
||||
01_engine-rcfile.test \
|
||||
01_engine-simplify.test \
|
||||
01_engine-strsafe.test \
|
||||
02_manpage-regen.test \
|
||||
|
||||
@@ -380,6 +380,7 @@ LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LDFLAGS_PIE = @LDFLAGS_PIE@
|
||||
LFS_FLAG = @LFS_FLAG@
|
||||
LIBC_FORCE_LINK = @LIBC_FORCE_LINK@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
@@ -498,6 +499,7 @@ TESTS = \
|
||||
01_engine-idna.test \
|
||||
01_engine-mime.test \
|
||||
01_engine-parse.test \
|
||||
01_engine-rcfile.test \
|
||||
01_engine-simplify.test \
|
||||
01_engine-strsafe.test \
|
||||
02_manpage-regen.test \
|
||||
|
||||
@@ -118,7 +118,10 @@ main() {
|
||||
git -C "$repo/src/coucal" archive --format=tar --prefix=src/coucal/ HEAD |
|
||||
tar -x -C "$export_dir"
|
||||
|
||||
# Refresh build system and man page, then build and validate the tarball.
|
||||
# Refresh build system and man page, then build the tarball. We build here
|
||||
# only because regen-man needs the compiled binaries; the test suite is not
|
||||
# run in this pass. debuild (below) runs the full suite once, with the online
|
||||
# tests enabled, so a check here would just be a slower, offline-only repeat.
|
||||
info "regenerating build system and man page"
|
||||
(
|
||||
cd "$export_dir"
|
||||
@@ -126,8 +129,6 @@ main() {
|
||||
./configure --quiet
|
||||
make -s -j"$(nproc)"
|
||||
make -s -C man regen-man
|
||||
info "running test suite"
|
||||
make -s check
|
||||
# Build the tarball from a clean tree so no object files leak into it.
|
||||
make -s clean
|
||||
make -s dist
|
||||
|
||||
Reference in New Issue
Block a user