mirror of
https://github.com/xroche/httrack.git
synced 2026-06-24 19:17:31 +03:00
Compare commits
3 Commits
dns-coucal
...
fix-delaye
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6da794fdb6 | ||
|
|
594cf0da39 | ||
|
|
3845cd1fb3 |
32
.github/workflows/ci.yml
vendored
32
.github/workflows/ci.yml
vendored
@@ -232,30 +232,42 @@ jobs:
|
||||
deb:
|
||||
name: deb package (lintian)
|
||||
runs-on: ubuntu-24.04
|
||||
# Build and gate inside Debian sid, the upload target. A Debian dpkg-deb
|
||||
# produces archive-legal xz members (an Ubuntu host defaults to zstd, which
|
||||
# the archive's lintian rejects), and sid's lintian carries the same
|
||||
# data-driven checks (embedded-lib fingerprints and the like) the buildds and
|
||||
# UDD apply -- so issues surface here instead of after upload.
|
||||
container: debian:sid
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install packaging toolchain
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates git \
|
||||
build-essential autoconf automake libtool autoconf-archive \
|
||||
zlib1g-dev libssl-dev \
|
||||
debhelper devscripts lintian fakeroot
|
||||
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
# --unsigned: CI has no GPG key (also skips the release sig/checksums).
|
||||
# debuild builds every package, then lintian gates on errors.
|
||||
# mkdeb builds every package then runs the lintian gate (--fail-on=error,
|
||||
# warning); debuild runs the packaged test pass.
|
||||
#
|
||||
# DEB_BUILD_OPTIONS trims work CI does not need (release builds via
|
||||
# mkdeb.sh are untouched): noautodbgsym drops the -dbgsym packages whose
|
||||
# LTO payloads are slow to compress and that CI never ships; parallel uses
|
||||
# every core. We let debuild run its test pass -- the only one now that
|
||||
# mkdeb no longer runs its own -- so CI exercises the packaged tests.
|
||||
- name: Build Debian packages
|
||||
# every core.
|
||||
- name: Build and lint Debian packages
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# The workspace volume is owned by the host runner uid, but the
|
||||
# container runs as root, so mkdeb's git calls (superproject and the
|
||||
# coucal submodule) trip "dubious ownership"; mark them all safe.
|
||||
git config --global --add safe.directory "*"
|
||||
export DEB_BUILD_OPTIONS="noautodbgsym parallel=$(nproc)"
|
||||
bash tools/mkdeb.sh --unsigned --no-release-artifacts
|
||||
|
||||
|
||||
5
debian/libhttrack3.lintian-overrides
vendored
5
debian/libhttrack3.lintian-overrides
vendored
@@ -1,3 +1,8 @@
|
||||
# The shared libraries ship without a versioned symbols control file (ABI is
|
||||
# tracked via the SONAME plus a >= upstream-version dependency, see debian/rules).
|
||||
libhttrack3: no-symbols-control-file usr/lib/*
|
||||
|
||||
# Bundled, locally patched minizip (src/minizip): it adds a zipFlush() API the
|
||||
# system libminizip lacks (htscache.c flushes the cache .zip so an interrupted
|
||||
# crawl leaves a valid archive), plus Android/old-zlib portability fixes.
|
||||
libhttrack3: embedded-library *libminizip*
|
||||
|
||||
3
debian/proxytrack.lintian-overrides
vendored
Normal file
3
debian/proxytrack.lintian-overrides
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# Statically linked against httrack's bundled, patched minizip (see src/minizip
|
||||
# and libhttrack3's override): the zipFlush() API is absent from the system one.
|
||||
proxytrack: embedded-library *libminizip*
|
||||
@@ -2468,6 +2468,44 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
htsmain_free();
|
||||
return err;
|
||||
} break;
|
||||
case 'N': { // url_savename name resolution: httrack -#N <fil>
|
||||
// <content-type>
|
||||
if (na + 2 < argc) {
|
||||
lien_adrfilsave afs;
|
||||
cache_back cache;
|
||||
struct_back *sback;
|
||||
hash_struct hash;
|
||||
lien_back headers;
|
||||
|
||||
memset(&afs, 0, sizeof(afs));
|
||||
strcpybuff(afs.af.adr, "www.example.com");
|
||||
strcpybuff(afs.af.fil, argv[na + 1]);
|
||||
|
||||
memset(&cache, 0, sizeof(cache));
|
||||
cache.hashtable = (void *) coucal_new(0);
|
||||
|
||||
sback = back_new(opt, opt->maxsoc * 32 + 1024);
|
||||
hash_init(opt, &hash, opt->urlhack);
|
||||
|
||||
memset(&headers, 0, sizeof(headers));
|
||||
headers.status = 0;
|
||||
headers.r.statuscode = HTTP_OK;
|
||||
strcpybuff(headers.r.contenttype, argv[na + 2]);
|
||||
strcpybuff(headers.url_fil, argv[na + 1]);
|
||||
|
||||
url_savename(&afs, NULL, NULL, NULL, opt, sback, &cache,
|
||||
&hash, 0, 0, &headers);
|
||||
printf("savename: %s\n", afs.save);
|
||||
htsmain_free();
|
||||
return 0;
|
||||
} else {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Option #N requires <fil> <content-type> arguments\n");
|
||||
htsmain_free();
|
||||
return 1;
|
||||
}
|
||||
} break;
|
||||
case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
|
||||
{
|
||||
int hasFilter = 0;
|
||||
|
||||
@@ -760,9 +760,9 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
strcatbuff(fil, DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http)
|
||||
}
|
||||
}
|
||||
// Changer extension?
|
||||
// par exemple, php3 sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
|
||||
if (ext_chg && !opt->no_type_change) { // changer ext
|
||||
// Change the extension? e.g. php3 saved as html, cgi as html or gif/xbm
|
||||
// depending on the resolved type.
|
||||
if (ext_chg && !opt->no_type_change) {
|
||||
char *a = fil + strlen(fil) - 1;
|
||||
|
||||
if ((opt->debug > 1) && (opt->log != NULL)) {
|
||||
@@ -774,11 +774,18 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
adr_complete, fil_complete, ext);
|
||||
}
|
||||
if (ext_chg == 1) {
|
||||
// Cut the old extension only when it is the new one or a recognized one;
|
||||
// an unknown trailing ".token" (e.g. /article-1.884291, #115) is part of
|
||||
// the name, not an extension to replace.
|
||||
const char *const old_ext = get_ext(catbuff, sizeof(catbuff), fil);
|
||||
const int known_ext = strfield2(old_ext, ext) || is_knowntype(opt, fil) ||
|
||||
is_dyntype(old_ext) || ishtml_ext(old_ext) != -1;
|
||||
|
||||
while((a > fil) && (*a != '.') && (*a != '/'))
|
||||
a--;
|
||||
if (*a == '.')
|
||||
*a = '\0'; // couper
|
||||
strcatbuff(fil, "."); // recopier point
|
||||
if (*a == '.' && known_ext)
|
||||
*a = '\0'; // cut
|
||||
strcatbuff(fil, "."); // re-add the dot
|
||||
} else {
|
||||
while((a > fil) && (*a != '/'))
|
||||
a--;
|
||||
@@ -786,7 +793,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
a++;
|
||||
*a = '\0';
|
||||
}
|
||||
strcatbuff(fil, ext); // copier ext/nom
|
||||
strcatbuff(fil, ext); // append ext/name
|
||||
}
|
||||
// Rechercher premier / et dernier .
|
||||
{
|
||||
|
||||
35
tests/01_engine-savename.test
Executable file
35
tests/01_engine-savename.test
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Local save-name extension resolution (url_savename via -#N <fil> <content-type>).
|
||||
# Asserts on the basename of "savename: <path>".
|
||||
|
||||
name() {
|
||||
out="$(httrack -O /dev/null -#N "$1" "$2" | sed -n 's/^savename: //p')"
|
||||
test "${out##*/}" == "$3" || {
|
||||
echo "FAIL: '$1' '$2' -> '$out' (want '$3')"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
# #115: an unknown trailing ".token" is part of the name, keep it and append the type.
|
||||
name '/article-1.884291' 'text/html' 'article-1.884291.html'
|
||||
name '/news/story-12345.987654' 'text/html' 'story-12345.987654.html'
|
||||
|
||||
# Recognized extensions still collapse to the resolved type.
|
||||
name '/page.php' 'text/html' 'page.html'
|
||||
name '/page.asp' 'text/html' 'page.html'
|
||||
name '/foo' 'text/html' 'foo.html'
|
||||
|
||||
# Soft-404 (#267/#408): a binary URL served as HTML is named .html.
|
||||
name '/x.pdf' 'text/html' 'x.html'
|
||||
name '/x.gif' 'text/html' 'x.html'
|
||||
|
||||
# Type agrees with the extension: keep it, no churn, no double extension.
|
||||
name '/x.pdf' 'application/pdf' 'x.pdf'
|
||||
name '/x.jpg' 'image/jpeg' 'x.jpg'
|
||||
name '/x.html' 'text/html' 'x.html'
|
||||
name '/x.js' 'application/x-javascript' 'x.js'
|
||||
name '/types/data.json' 'application/json' 'data.json'
|
||||
@@ -40,6 +40,7 @@ TESTS = \
|
||||
01_engine-parse.test \
|
||||
01_engine-rcfile.test \
|
||||
01_engine-relative.test \
|
||||
01_engine-savename.test \
|
||||
01_engine-simplify.test \
|
||||
01_engine-strsafe.test \
|
||||
02_manpage-regen.test \
|
||||
|
||||
Reference in New Issue
Block a user