mirror of
https://github.com/xroche/httrack.git
synced 2026-06-14 22:33:54 +03:00
Compare commits
13 Commits
cleanup/ht
...
cleanup/in
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
348a7d8cb2 | ||
|
|
5f81741ac5 | ||
|
|
0cf14c4e88 | ||
|
|
29a07ff487 | ||
|
|
f987083f14 | ||
|
|
eb565f0bd8 | ||
|
|
71398d510e | ||
|
|
75fc040f06 | ||
|
|
c4ef18f5a5 | ||
|
|
d76dad47f7 | ||
|
|
055e17b057 | ||
|
|
d7bb97d697 | ||
|
|
ca810ef7e3 |
27
.clang-format
Normal file
27
.clang-format
Normal file
@@ -0,0 +1,27 @@
|
||||
# clang-format 19 config for the HTTrack C engine.
|
||||
#
|
||||
# IMPORTANT: this is applied to TOUCHED LINES ONLY (via git-clang-format / the
|
||||
# CI format check). The engine was originally formatted by GNU indent / by hand
|
||||
# and does NOT round-trip through clang-format, so a whole-tree reformat is
|
||||
# intentionally never done. Format the lines you change; leave the rest.
|
||||
#
|
||||
# Reverse-engineered from src/*.c: 2-space indent, no tabs, 80 columns, pointers
|
||||
# bound to the name (char *x), attached braces, un-indented case labels, and a
|
||||
# space after C-style casts ((int) x). Most of that is LLVM's defaults; the
|
||||
# lines below are the deliberate deviations.
|
||||
|
||||
BasedOnStyle: LLVM
|
||||
|
||||
# Engine specifics / deviations from LLVM:
|
||||
SpaceAfterCStyleCast: true # "(int) x", overwhelmingly dominant (542 vs 7)
|
||||
SortIncludes: false # C include order can be significant; never reorder
|
||||
IncludeBlocks: Preserve # do not merge/reflow include groups
|
||||
|
||||
# Stated explicitly for robustness against base-style drift (these match LLVM):
|
||||
IndentWidth: 2
|
||||
UseTab: Never
|
||||
ColumnLimit: 80
|
||||
PointerAlignment: Right
|
||||
IndentCaseLabels: false
|
||||
SpaceBeforeParens: ControlStatements
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
35
.githooks/README.md
Normal file
35
.githooks/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Git hooks
|
||||
|
||||
Versioned hooks for this repo. Enable them once per clone:
|
||||
|
||||
```sh
|
||||
git config core.hooksPath .githooks
|
||||
```
|
||||
|
||||
## pre-commit: auto-format changed C lines
|
||||
|
||||
Runs `git-clang-format` (clang-format 19, using the repo `.clang-format`) on the
|
||||
**staged lines only** and re-stages the result, so every commit is
|
||||
clang-format-clean and the CI `format` check passes. It never reformats the
|
||||
whole tree, only the lines you changed.
|
||||
|
||||
- Disable for a single commit: `HTTRACK_NO_AUTOFORMAT=1 git commit ...`
|
||||
- If clang-format 19 isn't installed, the hook skips silently (CI still
|
||||
enforces). Install it with your distro's `clang-format-19`, or from
|
||||
apt.llvm.org.
|
||||
- If a file has *both* staged and unstaged changes, the hook does not
|
||||
auto-mutate it (that would commit the unstaged part); it instead reports
|
||||
whether its staged lines need formatting and asks you to stage/stash the rest.
|
||||
|
||||
### noexec working trees
|
||||
|
||||
Git executes the hook directly, so if your working tree is on a `noexec` mount
|
||||
git cannot run `.githooks/pre-commit`. Point `core.hooksPath` at a copy on an
|
||||
exec filesystem instead:
|
||||
|
||||
```sh
|
||||
mkdir -p ~/.httrack-hooks && cp .githooks/pre-commit ~/.httrack-hooks/
|
||||
chmod +x ~/.httrack-hooks/pre-commit
|
||||
git config core.hooksPath ~/.httrack-hooks
|
||||
```
|
||||
</content>
|
||||
71
.githooks/pre-commit
Executable file
71
.githooks/pre-commit
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Auto-format the staged C lines with clang-format (touched lines only), then
|
||||
# re-stage them, so commits stay clang-format-clean and CI's format check passes.
|
||||
#
|
||||
# Enable once per clone: git config core.hooksPath .githooks
|
||||
# Skip for one commit: HTTRACK_NO_AUTOFORMAT=1 git commit ...
|
||||
#
|
||||
# Matches the CI gate (.clang-format, clang-format 19). It only ever touches the
|
||||
# lines a commit changes; it never reformats the whole tree.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
[ "${HTTRACK_NO_AUTOFORMAT:-}" = "1" ] && exit 0
|
||||
|
||||
# Staged C/H files (added/copied/modified/renamed).
|
||||
mapfile -t files < <(git diff --cached --name-only --diff-filter=ACMR -- '*.c' '*.h')
|
||||
[ "${#files[@]}" -eq 0 ] && exit 0
|
||||
|
||||
# Locate clang-format 19 and the git driver; if absent, skip (CI is the backstop).
|
||||
cf=""
|
||||
for c in clang-format-19 clang-format; do
|
||||
if command -v "$c" >/dev/null 2>&1; then
|
||||
case "$("$c" --version)" in *"version 19."*)
|
||||
cf="$c"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
done
|
||||
gcf=""
|
||||
for g in git-clang-format-19 git-clang-format; do
|
||||
command -v "$g" >/dev/null 2>&1 && {
|
||||
gcf="$g"
|
||||
break
|
||||
}
|
||||
done
|
||||
if [ -z "$cf" ] || [ -z "$gcf" ]; then
|
||||
echo "pre-commit: clang-format 19 not found; skipping auto-format (CI still checks)." >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Files that are staged AND also have unstaged changes: re-staging them would
|
||||
# pull in the unstaged work, so don't auto-mutate. Check instead and let the
|
||||
# author resolve it.
|
||||
partial=()
|
||||
for f in "${files[@]}"; do
|
||||
if ! git diff --quiet -- "$f"; then partial+=("$f"); fi
|
||||
done
|
||||
|
||||
if [ "${#partial[@]}" -ne 0 ]; then
|
||||
d="$("$gcf" --binary "$cf" --style=file --staged --diff --extensions c,h || true)"
|
||||
case "$d" in
|
||||
"" | "no modified files to format" | *"did not modify any files"*)
|
||||
exit 0
|
||||
;; # staged lines already clean
|
||||
*)
|
||||
echo "pre-commit: these files have both staged and unstaged changes, so" >&2
|
||||
echo "auto-format was skipped to avoid committing unstaged work:" >&2
|
||||
printf ' %s\n' "${partial[@]}" >&2
|
||||
echo "Their staged lines need formatting. Stage the rest (or stash it)," >&2
|
||||
echo "or run: $gcf --binary $cf --staged" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Clean-staged files: format the staged lines in the working tree, then re-stage.
|
||||
"$gcf" --binary "$cf" --style=file --staged --extensions c,h >/dev/null || true
|
||||
git add -- "${files[@]}"
|
||||
exit 0
|
||||
62
.github/workflows/ci.yml
vendored
62
.github/workflows/ci.yml
vendored
@@ -81,7 +81,65 @@ jobs:
|
||||
|
||||
# Lint the scripts we maintain; the legacy scripts are a separate cleanup.
|
||||
- name: shellcheck
|
||||
run: shellcheck man/makeman.sh tools/mkdeb.sh tests/*.test tests/check-network.sh
|
||||
run: shellcheck man/makeman.sh tools/mkdeb.sh .githooks/pre-commit tests/*.test tests/check-network.sh
|
||||
|
||||
- name: shfmt
|
||||
run: shfmt -d -i 4 man/makeman.sh tools/mkdeb.sh
|
||||
run: shfmt -d -i 4 man/makeman.sh tools/mkdeb.sh .githooks/pre-commit
|
||||
|
||||
# Check clang-format on CHANGED LINES ONLY. The engine predates clang-format
|
||||
# (it was shaped by an old Visual Studio formatter) and does not round-trip,
|
||||
# so we never reformat the whole tree -- only the lines a PR touches.
|
||||
format:
|
||||
name: format (clang-format-19, changed lines)
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install clang-format 19 (pinned, from apt.llvm.org)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# ubuntu-24.04's native clang-format is 18; pin 19 to match local dev.
|
||||
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key \
|
||||
| sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc >/dev/null
|
||||
echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main" \
|
||||
| sudo tee /etc/apt/sources.list.d/llvm-19.list >/dev/null
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends clang-format-19
|
||||
# git-clang-format driver, pinned to an immutable release tag (not a
|
||||
# moving branch) since we curl and then execute it.
|
||||
sudo curl -fsSL -o /usr/local/bin/git-clang-format \
|
||||
https://raw.githubusercontent.com/llvm/llvm-project/llvmorg-19.1.7/clang/tools/clang-format/git-clang-format
|
||||
sudo chmod 0755 /usr/local/bin/git-clang-format
|
||||
clang-format-19 --version
|
||||
|
||||
- name: Check formatting of changed lines
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git fetch --no-tags origin \
|
||||
"+refs/heads/${{ github.base_ref }}:refs/remotes/origin/${{ github.base_ref }}"
|
||||
base="origin/${{ github.base_ref }}"
|
||||
set +e
|
||||
diff="$(git clang-format --binary clang-format-19 --style=file \
|
||||
--diff --extensions c,h "$base")"
|
||||
rc=$?
|
||||
set -e
|
||||
# Classify by output first: a non-empty diff means "not clean",
|
||||
# regardless of the driver's exit convention (the release-tag driver
|
||||
# exits 0 and signals via stdout; some packaged drivers exit 1 on a
|
||||
# diff). A nonzero exit with clean output is a real checker error.
|
||||
case "$diff" in
|
||||
"" | "no modified files to format" | *"did not modify any files"*)
|
||||
if [ "$rc" -ne 0 ]; then
|
||||
echo "::error::git clang-format failed (exit $rc): checker error."
|
||||
exit 1
|
||||
fi
|
||||
echo "Formatting OK: changed C lines are clang-format-clean." ;;
|
||||
*)
|
||||
echo "$diff"
|
||||
echo "::error::Changed C lines are not clang-format-clean."
|
||||
echo "Fix locally with: git clang-format --binary clang-format-19 $base"
|
||||
exit 1 ;;
|
||||
esac
|
||||
|
||||
@@ -193,6 +193,41 @@ static int string_safety_selftests(void) {
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
/* htsbuff: bounded builder over a fixed array (append, truncating append,
|
||||
reset, and length tracking) */
|
||||
{
|
||||
char dst[8];
|
||||
htsbuff b = htsbuff_array(dst);
|
||||
|
||||
htsbuff_cat(&b, "ab");
|
||||
htsbuff_cat(&b, "cd");
|
||||
if (strcmp(htsbuff_str(&b), "abcd") != 0 || b.len != 4)
|
||||
return 1;
|
||||
|
||||
htsbuff_catn(&b, "efghij", 2); /* append at most 2 */
|
||||
if (strcmp(htsbuff_str(&b), "abcdef") != 0)
|
||||
return 1;
|
||||
|
||||
htsbuff_cpy(&b, "xyz"); /* reset */
|
||||
if (strcmp(htsbuff_str(&b), "xyz") != 0 || b.len != 3)
|
||||
return 1;
|
||||
|
||||
htsbuff_catc(&b, '!'); /* single character */
|
||||
if (strcmp(htsbuff_str(&b), "xyz!") != 0 || b.len != 4)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* boundary: filling to exactly cap-1 must succeed (one more aborts, which the
|
||||
-#8 overflow-buff mode checks) */
|
||||
{
|
||||
char d2[4];
|
||||
htsbuff c = htsbuff_array(d2);
|
||||
|
||||
htsbuff_cat(&c, "abc");
|
||||
if (strcmp(htsbuff_str(&c), "abc") != 0 || c.len != 3)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2494,16 +2529,23 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 0;
|
||||
break;
|
||||
case '8': /* string-safety selftest: httrack -#8 [overflow <bigstr>] */
|
||||
if (na + 1 < argc && strcmp(argv[na + 1], "overflow") == 0) {
|
||||
/* Deliberately exceed a sized buffer: the bounded macro must
|
||||
if (na + 1 < argc
|
||||
&& strncmp(argv[na + 1], "overflow", 8) == 0) {
|
||||
/* Deliberately exceed a sized buffer: the bounded op must
|
||||
abort. The source comes from argv so its length is opaque
|
||||
to the compiler (no static -Wstringop-overflow, genuine
|
||||
runtime check). */
|
||||
runtime check). "overflow-buff" exercises htsbuff. */
|
||||
char small[4];
|
||||
const char *const src =
|
||||
(na + 2 < argc) ? argv[na + 2] : "overflowing";
|
||||
|
||||
strcpybuff(small, src);
|
||||
if (strcmp(argv[na + 1], "overflow-buff") == 0) {
|
||||
htsbuff b = htsbuff_array(small);
|
||||
|
||||
htsbuff_cat(&b, src);
|
||||
} else {
|
||||
strcpybuff(small, src);
|
||||
}
|
||||
printf("strsafe: NOT aborted\n"); /* must be unreachable */
|
||||
htsmain_free();
|
||||
return 1;
|
||||
|
||||
147
src/htslib.c
147
src/htslib.c
@@ -1660,138 +1660,107 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
|
||||
}
|
||||
}
|
||||
|
||||
// transforme le message statuscode en chaîne
|
||||
HTSEXT_API void infostatuscode(char *msg, int statuscode) {
|
||||
// HTTP status code -> reason phrase (per RFC), or NULL if unknown.
|
||||
HTSEXT_API const char *infostatuscode_const(int statuscode) {
|
||||
// O(1) dispatch (the compiler builds a jump table); the phrases are static.
|
||||
switch (statuscode) {
|
||||
// Erreurs HTTP, selon RFC
|
||||
case 100:
|
||||
strcpybuff(msg, "Continue");
|
||||
break;
|
||||
return "Continue";
|
||||
case 101:
|
||||
strcpybuff(msg, "Switching Protocols");
|
||||
break;
|
||||
return "Switching Protocols";
|
||||
case 200:
|
||||
strcpybuff(msg, "OK");
|
||||
break;
|
||||
return "OK";
|
||||
case 201:
|
||||
strcpybuff(msg, "Created");
|
||||
break;
|
||||
return "Created";
|
||||
case 202:
|
||||
strcpybuff(msg, "Accepted");
|
||||
break;
|
||||
return "Accepted";
|
||||
case 203:
|
||||
strcpybuff(msg, "Non-Authoritative Information");
|
||||
break;
|
||||
return "Non-Authoritative Information";
|
||||
case 204:
|
||||
strcpybuff(msg, "No Content");
|
||||
break;
|
||||
return "No Content";
|
||||
case 205:
|
||||
strcpybuff(msg, "Reset Content");
|
||||
break;
|
||||
return "Reset Content";
|
||||
case 206:
|
||||
strcpybuff(msg, "Partial Content");
|
||||
break;
|
||||
return "Partial Content";
|
||||
case 300:
|
||||
strcpybuff(msg, "Multiple Choices");
|
||||
break;
|
||||
return "Multiple Choices";
|
||||
case 301:
|
||||
strcpybuff(msg, "Moved Permanently");
|
||||
break;
|
||||
return "Moved Permanently";
|
||||
case 302:
|
||||
strcpybuff(msg, "Moved Temporarily");
|
||||
break;
|
||||
return "Moved Temporarily";
|
||||
case 303:
|
||||
strcpybuff(msg, "See Other");
|
||||
break;
|
||||
return "See Other";
|
||||
case 304:
|
||||
strcpybuff(msg, "Not Modified");
|
||||
break;
|
||||
return "Not Modified";
|
||||
case 305:
|
||||
strcpybuff(msg, "Use Proxy");
|
||||
break;
|
||||
return "Use Proxy";
|
||||
case 306:
|
||||
strcpybuff(msg, "Undefined 306 error");
|
||||
break;
|
||||
return "Undefined 306 error";
|
||||
case 307:
|
||||
strcpybuff(msg, "Temporary Redirect");
|
||||
break;
|
||||
return "Temporary Redirect";
|
||||
case 400:
|
||||
strcpybuff(msg, "Bad Request");
|
||||
break;
|
||||
return "Bad Request";
|
||||
case 401:
|
||||
strcpybuff(msg, "Unauthorized");
|
||||
break;
|
||||
return "Unauthorized";
|
||||
case 402:
|
||||
strcpybuff(msg, "Payment Required");
|
||||
break;
|
||||
return "Payment Required";
|
||||
case 403:
|
||||
strcpybuff(msg, "Forbidden");
|
||||
break;
|
||||
return "Forbidden";
|
||||
case 404:
|
||||
strcpybuff(msg, "Not Found");
|
||||
break;
|
||||
return "Not Found";
|
||||
case 405:
|
||||
strcpybuff(msg, "Method Not Allowed");
|
||||
break;
|
||||
return "Method Not Allowed";
|
||||
case 406:
|
||||
strcpybuff(msg, "Not Acceptable");
|
||||
break;
|
||||
return "Not Acceptable";
|
||||
case 407:
|
||||
strcpybuff(msg, "Proxy Authentication Required");
|
||||
break;
|
||||
return "Proxy Authentication Required";
|
||||
case 408:
|
||||
strcpybuff(msg, "Request Time-out");
|
||||
break;
|
||||
return "Request Time-out";
|
||||
case 409:
|
||||
strcpybuff(msg, "Conflict");
|
||||
break;
|
||||
return "Conflict";
|
||||
case 410:
|
||||
strcpybuff(msg, "Gone");
|
||||
break;
|
||||
return "Gone";
|
||||
case 411:
|
||||
strcpybuff(msg, "Length Required");
|
||||
break;
|
||||
return "Length Required";
|
||||
case 412:
|
||||
strcpybuff(msg, "Precondition Failed");
|
||||
break;
|
||||
return "Precondition Failed";
|
||||
case 413:
|
||||
strcpybuff(msg, "Request Entity Too Large");
|
||||
break;
|
||||
return "Request Entity Too Large";
|
||||
case 414:
|
||||
strcpybuff(msg, "Request-URI Too Large");
|
||||
break;
|
||||
return "Request-URI Too Large";
|
||||
case 415:
|
||||
strcpybuff(msg, "Unsupported Media Type");
|
||||
break;
|
||||
return "Unsupported Media Type";
|
||||
case 416:
|
||||
strcpybuff(msg, "Requested Range Not Satisfiable");
|
||||
break;
|
||||
return "Requested Range Not Satisfiable";
|
||||
case 417:
|
||||
strcpybuff(msg, "Expectation Failed");
|
||||
break;
|
||||
return "Expectation Failed";
|
||||
case 500:
|
||||
strcpybuff(msg, "Internal Server Error");
|
||||
break;
|
||||
return "Internal Server Error";
|
||||
case 501:
|
||||
strcpybuff(msg, "Not Implemented");
|
||||
break;
|
||||
return "Not Implemented";
|
||||
case 502:
|
||||
strcpybuff(msg, "Bad Gateway");
|
||||
break;
|
||||
return "Bad Gateway";
|
||||
case 503:
|
||||
strcpybuff(msg, "Service Unavailable");
|
||||
break;
|
||||
return "Service Unavailable";
|
||||
case 504:
|
||||
strcpybuff(msg, "Gateway Time-out");
|
||||
break;
|
||||
return "Gateway Time-out";
|
||||
case 505:
|
||||
strcpybuff(msg, "HTTP Version Not Supported");
|
||||
break;
|
||||
//
|
||||
return "HTTP Version Not Supported";
|
||||
default:
|
||||
if (strnotempty(msg) == 0)
|
||||
strcpybuff(msg, "Unknown error");
|
||||
break;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Write the status code's reason phrase into msg. For an unknown code, keep any
|
||||
// caller-provided message, otherwise fall back to a default. Callers provide a
|
||||
// buffer of at least 64 bytes (the longest reason phrase is 31).
|
||||
HTSEXT_API void infostatuscode(char *msg, int statuscode) {
|
||||
const char *const text = infostatuscode_const(statuscode);
|
||||
|
||||
if (text != NULL) {
|
||||
strlcpybuff(msg, text, 64);
|
||||
} else if (strnotempty(msg) == 0) {
|
||||
strlcpybuff(msg, "Unknown error", 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
186
src/htsname.c
186
src/htsname.c
@@ -767,7 +767,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
// ajouter nom du site éventuellement en premier
|
||||
if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t)
|
||||
const char *a = StringBuff(opt->savename_userdef);
|
||||
char *b = afs->save;
|
||||
htsbuff sb = htsbuff_array(afs->save);
|
||||
|
||||
/*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */
|
||||
char tok;
|
||||
@@ -787,17 +787,16 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
*/
|
||||
|
||||
// Construire nom
|
||||
while((*a) && (((int) (b - afs->save)) < HTS_URLMAXSIZE)) { // parser, et pas trop long..
|
||||
// build the name
|
||||
while ((*a) && (sb.len < HTS_URLMAXSIZE)) { // parse, but not too long
|
||||
if (*a == '%') {
|
||||
int short_ver = 0;
|
||||
|
||||
a++;
|
||||
if (*a == 's') {
|
||||
if (*a == 's') { // '%s...' selects the short (8.3) form
|
||||
short_ver = 1;
|
||||
a++;
|
||||
}
|
||||
*b = '\0';
|
||||
switch (tok = *a++) {
|
||||
case '[': // %[param:prefix_if_not_empty:suffix_if_not_empty:empty_replacement:notfound_replacement]
|
||||
if (strchr(a, ']')) {
|
||||
@@ -834,8 +833,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
}
|
||||
if (cp) {
|
||||
c = cp + strlen(name[0]); /* jumps "param=" */
|
||||
strcpybuff(b, name[1]); /* prefix */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, name[1]); /* prefix */
|
||||
if (*c != '\0' && *c != '&') {
|
||||
char *d = name[0];
|
||||
|
||||
@@ -846,110 +844,90 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
*d = '\0';
|
||||
d = unescape_http(catbuff, sizeof(catbuff), name[0]);
|
||||
if (d && *d) {
|
||||
strcpybuff(b, d); /* value */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, d); /* value */
|
||||
} else {
|
||||
strcpybuff(b, name[3]); /* empty replacement if any */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, name[3]); /* empty replacement if any */
|
||||
}
|
||||
} else {
|
||||
strcpybuff(b, name[3]); /* empty replacement if any */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, name[3]); /* empty replacement if any */
|
||||
}
|
||||
strcpybuff(b, name[2]); /* suffix */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, name[2]); /* suffix */
|
||||
} else {
|
||||
strcpybuff(b, name[4]); /* not found replacement if any */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, name[4]); /* not found replacement if any */
|
||||
}
|
||||
} else {
|
||||
strcpybuff(b, name[4]); /* not found replacement if any */
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, name[4]); /* not found replacement if any */
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
*b++ = '%';
|
||||
htsbuff_catc(&sb, '%');
|
||||
break;
|
||||
case 'n': // nom sans ext
|
||||
*b = '\0';
|
||||
case 'n': // name without extension
|
||||
if (dot_pos) {
|
||||
if (!short_ver) // Noms longs
|
||||
strncatbuff(b, nom_pos, (int) (dot_pos - nom_pos));
|
||||
if (!short_ver)
|
||||
htsbuff_catn(&sb, nom_pos, (int) (dot_pos - nom_pos));
|
||||
else
|
||||
strncatbuff(b, nom_pos, min((int) (dot_pos - nom_pos), 8));
|
||||
htsbuff_catn(&sb, nom_pos, min((int) (dot_pos - nom_pos), 8));
|
||||
} else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, nom_pos);
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, nom_pos);
|
||||
else
|
||||
strncatbuff(b, nom_pos, 8);
|
||||
htsbuff_catn(&sb, nom_pos, 8);
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
break;
|
||||
case 'N': // nom avec ext
|
||||
// RECOPIE NOM + EXT
|
||||
*b = '\0';
|
||||
case 'N': // name with extension
|
||||
if (dot_pos) {
|
||||
if (!short_ver) // Noms longs
|
||||
strncatbuff(b, nom_pos, (int) (dot_pos - nom_pos));
|
||||
if (!short_ver)
|
||||
htsbuff_catn(&sb, nom_pos, (int) (dot_pos - nom_pos));
|
||||
else
|
||||
strncatbuff(b, nom_pos, min((int) (dot_pos - nom_pos), 8));
|
||||
htsbuff_catn(&sb, nom_pos, min((int) (dot_pos - nom_pos), 8));
|
||||
} else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, nom_pos);
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, nom_pos);
|
||||
else
|
||||
strncatbuff(b, nom_pos, 8);
|
||||
htsbuff_catn(&sb, nom_pos, 8);
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
*b = '.';
|
||||
++b;
|
||||
// RECOPIE NOM + EXT
|
||||
*b = '\0';
|
||||
htsbuff_catc(&sb, '.');
|
||||
if (dot_pos) {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, dot_pos + 1);
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, dot_pos + 1);
|
||||
else
|
||||
strncatbuff(b, dot_pos + 1, 3);
|
||||
htsbuff_catn(&sb, dot_pos + 1, 3);
|
||||
} else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, DEFAULT_EXT + 1); // pas de..
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, DEFAULT_EXT + 1); // skip the leading dot
|
||||
else
|
||||
strcpybuff(b, DEFAULT_EXT_SHORT + 1); // pas de..
|
||||
htsbuff_cat(&sb, DEFAULT_EXT_SHORT + 1); // skip the leading dot
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
//
|
||||
break;
|
||||
case 't': // ext
|
||||
*b = '\0';
|
||||
case 't': // extension
|
||||
if (dot_pos) {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, dot_pos + 1);
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, dot_pos + 1);
|
||||
else
|
||||
strncatbuff(b, dot_pos + 1, 3);
|
||||
htsbuff_catn(&sb, dot_pos + 1, 3);
|
||||
} else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, DEFAULT_EXT + 1); // pas de..
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, DEFAULT_EXT + 1); // skip the leading dot
|
||||
else
|
||||
strcpybuff(b, DEFAULT_EXT_SHORT + 1); // pas de..
|
||||
htsbuff_cat(&sb, DEFAULT_EXT_SHORT + 1); // skip the leading dot
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
break;
|
||||
case 'p': // path sans dernier /
|
||||
*b = '\0';
|
||||
if (nom_pos != fil + 1) { // pas: /index.html (chemin nul)
|
||||
if (!short_ver) { // Noms longs
|
||||
strncatbuff(b, fil, (int) (nom_pos - fil) - 1);
|
||||
case 'p': // path without trailing /
|
||||
if (nom_pos !=
|
||||
fil + 1) { // skip when the path is empty (e.g. /index.html)
|
||||
if (!short_ver) {
|
||||
htsbuff_catn(&sb, fil, (int) (nom_pos - fil) - 1);
|
||||
} else {
|
||||
char BIGSTK pth[HTS_URLMAXSIZE * 2], n83[HTS_URLMAXSIZE * 2];
|
||||
|
||||
pth[0] = n83[0] = '\0';
|
||||
//
|
||||
strncatbuff(pth, fil, (int) (nom_pos - fil) - 1);
|
||||
long_to_83(opt->savename_83, n83, pth);
|
||||
strcpybuff(b, n83);
|
||||
htsbuff_cat(&sb, n83);
|
||||
}
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
break;
|
||||
case 'h': // host (IDNA decoded if suitable)
|
||||
// IDNA / RFC 3492 (Punycode) handling for HTTP(s)
|
||||
@@ -957,62 +935,50 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
DECLARE_ADR(final_adr);
|
||||
|
||||
/* Copy address */
|
||||
*b = '\0';
|
||||
if (!short_ver)
|
||||
strcpybuff(b, final_adr);
|
||||
htsbuff_cat(&sb, final_adr);
|
||||
else
|
||||
strcpybuff(b, final_adr);
|
||||
htsbuff_cat(&sb, final_adr);
|
||||
|
||||
/* release */
|
||||
RELEASE_ADR();
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
break;
|
||||
case 'H': // host, raw (old mode)
|
||||
*b = '\0';
|
||||
case 'H': // host, raw (old mode)
|
||||
if (protocol == PROTOCOL_FILE) {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, "localhost");
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, "localhost");
|
||||
else
|
||||
strcpybuff(b, "local");
|
||||
htsbuff_cat(&sb, "local");
|
||||
} else {
|
||||
if (!short_ver) // Noms longs
|
||||
strcpybuff(b, print_adr);
|
||||
if (!short_ver)
|
||||
htsbuff_cat(&sb, print_adr);
|
||||
else
|
||||
strncatbuff(b, print_adr, 8);
|
||||
htsbuff_catn(&sb, print_adr, 8);
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
break;
|
||||
case 'M': /* host/address?query MD5 (128-bits) */
|
||||
*b = '\0';
|
||||
{
|
||||
char digest[32 + 2];
|
||||
char BIGSTK buff[HTS_URLMAXSIZE * 2];
|
||||
case 'M': /* host/address?query MD5 (128-bits) */
|
||||
{
|
||||
char digest[32 + 2];
|
||||
char BIGSTK buff[HTS_URLMAXSIZE * 2];
|
||||
|
||||
digest[0] = buff[0] = '\0';
|
||||
strcpybuff(buff, adr);
|
||||
strcatbuff(buff, fil_complete);
|
||||
domd5mem(buff, strlen(buff), digest, 1);
|
||||
strcpybuff(b, digest);
|
||||
}
|
||||
b += strlen(b); // pointer à la fin
|
||||
break;
|
||||
digest[0] = buff[0] = '\0';
|
||||
strcpybuff(buff, adr);
|
||||
strcatbuff(buff, fil_complete);
|
||||
domd5mem(buff, strlen(buff), digest, 1);
|
||||
htsbuff_cat(&sb, digest);
|
||||
} break;
|
||||
case 'Q':
|
||||
case 'q': /* query MD5 (128-bits/16-bits)
|
||||
GENERATED ONLY IF query string exists! */
|
||||
{
|
||||
char md5[32 + 2];
|
||||
case 'q': /* query MD5 (128-bits/16-bits)
|
||||
GENERATED ONLY IF query string exists! */
|
||||
{
|
||||
char md5[32 + 2];
|
||||
|
||||
*b = '\0';
|
||||
strncatbuff(b, url_md5(md5, fil_complete), (tok == 'Q') ? 32 : 4);
|
||||
b += strlen(b); // pointer à la fin
|
||||
}
|
||||
break;
|
||||
htsbuff_catn(&sb, url_md5(md5, fil_complete), (tok == 'Q') ? 32 : 4);
|
||||
} break;
|
||||
case 'r':
|
||||
case 'R': // protocol
|
||||
*b = '\0';
|
||||
strcatbuff(b, protocol_str[protocol]);
|
||||
b += strlen(b); // pointer à la fin
|
||||
htsbuff_cat(&sb, protocol_str[protocol]);
|
||||
break;
|
||||
|
||||
/* Patch by Juan Fco Rodriguez to get the full query string */
|
||||
@@ -1021,19 +987,17 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
char *d = strchr(fil_complete, '?');
|
||||
|
||||
if (d != NULL) {
|
||||
strcatbuff(b, d);
|
||||
b += strlen(b);
|
||||
htsbuff_cat(&sb, d);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
} else
|
||||
*b++ = *a++;
|
||||
htsbuff_catc(&sb, *a++);
|
||||
}
|
||||
*b++ = '\0';
|
||||
//
|
||||
// Types prédéfinis
|
||||
// predefined types
|
||||
//
|
||||
|
||||
}
|
||||
|
||||
@@ -287,6 +287,88 @@ static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t s
|
||||
return strncat_safe_(dest, sizeof_dest, source, sizeof_source, (size_t) -1, exp, file, line);
|
||||
}
|
||||
|
||||
/**
|
||||
* htsbuff: a non-owning bounded string builder over a fixed buffer.
|
||||
*
|
||||
* Companion to the strcpybuff()/strcatbuff() macros for the common case of a
|
||||
* cursor walking a buffer of known capacity (building a name into a fixed
|
||||
* array, assembling a status line, etc.). It tracks the write position, bounds
|
||||
* every write against the real capacity, and aborts on overflow (same contract
|
||||
* as the *_safe_ helpers), so the error-prone manual "p += strlen(p)" dance
|
||||
* goes away.
|
||||
*
|
||||
* Build one from an in-scope array with htsbuff_array() (capacity via sizeof,
|
||||
* so pass an array, not a pointer), or from a pointer of known capacity with
|
||||
* htsbuff_ptr(). The buffer is kept NUL-terminated; htsbuff_str() returns it.
|
||||
*/
|
||||
typedef struct {
|
||||
char *buf; /* backing buffer (kept NUL-terminated) */
|
||||
size_t cap; /* total capacity of buf, including the NUL */
|
||||
size_t len; /* current length, excluding the NUL */
|
||||
} htsbuff;
|
||||
|
||||
static HTS_INLINE HTS_UNUSED htsbuff htsbuff_ptr_(char *buf, size_t cap) {
|
||||
htsbuff b;
|
||||
b.buf = buf;
|
||||
b.cap = cap;
|
||||
b.len = 0;
|
||||
assertf(cap != 0);
|
||||
buf[0] = '\0';
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder over the in-scope array ARR (capacity = sizeof(ARR)).
|
||||
* On GCC/Clang this rejects a non-array (e.g. a char* pointer), whose sizeof
|
||||
* would be the pointer size and silently wrong; use htsbuff_ptr() for pointers.
|
||||
* On other compilers there is no such guard, so pass only true arrays there.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
/* 0 for an array, a -1 array-size compile error for a pointer. */
|
||||
#define htsbuff_must_be_array_(A) \
|
||||
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), typeof(&(A)[0]))]) - 1)
|
||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
|
||||
#else
|
||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR))
|
||||
#endif
|
||||
/** Builder over pointer P of known capacity N (N includes the NUL). */
|
||||
#define htsbuff_ptr(P, N) htsbuff_ptr_((P), (N))
|
||||
|
||||
/** Append at most n characters of s (stopping at its NUL). Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_catn(htsbuff *b, const char *s, size_t n) {
|
||||
const size_t add = strnlen(s, n);
|
||||
/* Overflow-safe: keep the (potentially huge) 'add' alone on one side. The
|
||||
maintained invariant len < cap makes 'cap - len' >= 1 (no underflow), so
|
||||
'add < cap - len' cannot wrap the way 'len + add < cap' could. */
|
||||
assertf__(add < b->cap - b->len, "htsbuff append overflow", __FILE__, __LINE__);
|
||||
memcpy(b->buf + b->len, s, add);
|
||||
b->len += add;
|
||||
b->buf[b->len] = '\0';
|
||||
}
|
||||
|
||||
/** Append s. Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_cat(htsbuff *b, const char *s) {
|
||||
htsbuff_catn(b, s, (size_t) -1);
|
||||
}
|
||||
|
||||
/** Append a single character (including '\0' as data). Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_catc(htsbuff *b, char c) {
|
||||
assertf__(1 < b->cap - b->len, "htsbuff append overflow", __FILE__, __LINE__);
|
||||
b->buf[b->len++] = c;
|
||||
b->buf[b->len] = '\0';
|
||||
}
|
||||
|
||||
/** Reset content to s. Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_cpy(htsbuff *b, const char *s) {
|
||||
b->len = 0;
|
||||
htsbuff_catn(b, s, (size_t) -1);
|
||||
}
|
||||
|
||||
/** Current NUL-terminated content. */
|
||||
static HTS_INLINE HTS_UNUSED const char *htsbuff_str(const htsbuff *b) {
|
||||
return b->buf;
|
||||
}
|
||||
|
||||
#define malloct(A) malloc(A)
|
||||
#define calloct(A,B) calloc((A), (B))
|
||||
#define freet(A) do { if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
|
||||
|
||||
@@ -193,6 +193,7 @@ HTSEXT_API int structcheck(const char *path);
|
||||
HTSEXT_API int structcheck_utf8(const char *path);
|
||||
HTSEXT_API int dir_exists(const char *path);
|
||||
HTSEXT_API void infostatuscode(char *msg, int statuscode);
|
||||
HTSEXT_API const char *infostatuscode_const(int statuscode);
|
||||
HTSEXT_API TStamp mtime_local(void);
|
||||
HTSEXT_API void qsec2str(char *st, TStamp t);
|
||||
HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n);
|
||||
|
||||
@@ -99,17 +99,25 @@ grep -Eq 'srcset="j\.gif 2x"' "$saved" ||
|
||||
! grep -Eq 'srcset="[^"]*file://' "$saved" ||
|
||||
! echo "FAIL: a file:// URL survived inside a rewritten srcset attribute" || exit 1
|
||||
|
||||
# xlink:href (#298) and inline background-image (#237): detected and rewritten
|
||||
# to local; no-detect attributes (title, alt, ...) left untouched. Asserted by
|
||||
# rewrite (deterministic), not download. data-* (#201/#203) is omitted: its
|
||||
# detection is currently nondeterministic and can't be locked yet.
|
||||
# xlink:href (#298) and CSS background-image (#237): detected and rewritten to
|
||||
# local. background-image is covered in both an external <style> block and an
|
||||
# inline style attribute, with the URL unquoted, double-quoted and single-quoted
|
||||
# (the quote style is preserved on rewrite). No-detect attributes (title, alt,
|
||||
# ...) are left untouched. Asserted by rewrite (deterministic), not download.
|
||||
# data-* (#201/#203) is omitted: its detection is currently nondeterministic and
|
||||
# can't be locked yet.
|
||||
site2="$tmp/attrs"
|
||||
mkdir -p "$site2"
|
||||
for f in xl ibg tt; do gif "$site2/$f.gif"; done
|
||||
for f in xl ibg ibgs cex cexd cexs tt; do gif "$site2/$f.gif"; done
|
||||
cat >"$site2/index.html" <<EOF
|
||||
<html><body>
|
||||
<html><head><style>
|
||||
.a { background-image: url(file://$site2/cex.gif); }
|
||||
.b { background-image: url("file://$site2/cexd.gif"); }
|
||||
.c { background-image: url('file://$site2/cexs.gif'); }
|
||||
</style></head><body>
|
||||
<a xlink:href="file://$site2/xl.gif">xlink:href (#298)</a>
|
||||
<div style="background-image:url(file://$site2/ibg.gif)"></div>
|
||||
<div style="background-image:url('file://$site2/ibgs.gif')"></div>
|
||||
<span title="file://$site2/tt.gif">excluded attribute</span>
|
||||
</body></html>
|
||||
EOF
|
||||
@@ -121,8 +129,24 @@ test -n "$saved2" || ! echo "FAIL: saved attrs page not found" || exit 1
|
||||
# detected attributes: the absolute URL is rewritten to a local link
|
||||
grep -Eq 'xlink:href="xl\.gif"' "$saved2" ||
|
||||
! echo "FAIL #298: xlink:href not detected/rewritten" || exit 1
|
||||
|
||||
# #237 external <style> block, each quoting form, quote style preserved
|
||||
grep -Eq 'url\(cex\.gif\)' "$saved2" ||
|
||||
! echo "FAIL #237: unquoted background-image in <style> not rewritten" || exit 1
|
||||
grep -Eq 'url\("cexd\.gif"\)' "$saved2" ||
|
||||
! echo "FAIL #237: double-quoted background-image in <style> not rewritten" || exit 1
|
||||
grep -Eq "url\('cexs\.gif'\)" "$saved2" ||
|
||||
! echo "FAIL #237: single-quoted background-image in <style> not rewritten" || exit 1
|
||||
|
||||
# #237 inline style attribute, unquoted and single-quoted url()
|
||||
grep -Eq 'style="background-image:url\(ibg\.gif\)"' "$saved2" ||
|
||||
! echo "FAIL #237: inline background-image url() not detected/rewritten" || exit 1
|
||||
! echo "FAIL #237: inline unquoted background-image not rewritten" || exit 1
|
||||
grep -Eq "style=\"background-image:url\('ibgs\.gif'\)\"" "$saved2" ||
|
||||
! echo "FAIL #237: inline single-quoted background-image not rewritten" || exit 1
|
||||
|
||||
# no file:// URL survived inside any rewritten background-image
|
||||
! grep -Eq 'background-image:[^;"]*file://' "$saved2" ||
|
||||
! echo "FAIL #237: a file:// URL survived inside a rewritten background-image" || exit 1
|
||||
|
||||
# excluded attribute: title is on the no-detect list, so its value is left as-is
|
||||
grep -q 'title="file://' "$saved2" ||
|
||||
|
||||
@@ -21,3 +21,14 @@ case "$err" in
|
||||
*"overflow while copying"*) ;;
|
||||
*) echo "expected htssafe overflow abort, got: $err" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
# Same guarantee for the htsbuff builder. The source is exactly the buffer
|
||||
# capacity (4 bytes into a 4-byte buffer), so this also pins the boundary: a
|
||||
# '<=' off-by-one in the capacity check would let it through (and print "NOT
|
||||
# aborted"). Match the specific htsbuff abort message, not just any assert.
|
||||
err=$(httrack -#8 overflow-buff "abcd" 2>&1)
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "htsbuff over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"htsbuff append overflow"*) ;;
|
||||
*) echo "expected htsbuff overflow abort, got: $err" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
Reference in New Issue
Block a user