mirror of
https://github.com/xroche/httrack.git
synced 2026-06-25 03:27:22 +03:00
#409 distinguished "the server declared text/html" from "no Content-Type, defaulted to text/html" with a new htsblk.contenttype_given flag, so a binary-looking URL that really serves HTML is saved .html while a typeless response keeps its URL extension. That worked on a fresh crawl but had two costs: the flag was never persisted, so on --update the cache read it as unset and the names reverted (report.html became report.pdf again, and the two passes disagreed); and it was an installed-struct ABI break (soname 4, libhttrack4). Replace the flag with a sentinel: when no Content-Type is received, store "unknown/unknown" as the type instead of text/html. The sentinel is treated as html for every type test (added to is_html_mime_type), so parsing, storage and filtering of a typeless response are unchanged; only the naming code (wire_patches_ext) reads it as "no declared type" and keeps the URL extension. Because the type string rides the cache, an update reads the same sentinel and names consistently -- the revert is fixed at the source. The sentinel never reaches a consumer as a real type: a single helper, hts_effective_mime(), maps it back to text/html wherever a stored type is derived (give_mimext) or emitted/persisted -- the httrack stdout serve, the ProxyTrack live serve, and the ProxyTrack .arc export (both the replayed response header and the index record). The .arc export was caught by an adversarial spill audit; without the map a typeless page archived via proxytrack would carry Content-Type: unknown/unknown. Since the sentinel makes contenttype_given unnecessary, #409's ABI break is undone: the field is removed, soname returns to 3, and the Debian package reverts libhttrack4 -> libhttrack3. soname 4 was never released (Debian NEW carries libhttrack3), so this re-aligns master with the archive rather than flip-flopping anything downstream. Tests: 18_local-update re-mirrors and asserts the names survive the update pass; 15_local-types gains a notype.html negative control; 17_local-empty-ct stays green. Full make check: 27 pass, 0 fail. One accepted behavior change: a mime filter matching exactly text/html no longer matches a typeless response (its type is the sentinel, html-ish but not literally text/html); the response is still parsed and crawled as html. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
145 lines
4.2 KiB
Makefile
Executable File
145 lines
4.2 KiB
Makefile
Executable File
#!/usr/bin/make -f
|
|
|
|
# Uncomment this to turn on verbose mode.
|
|
#export DH_VERBOSE=1
|
|
|
|
# *** Patch for s390, mips ..
|
|
# It seems that htscore.c can not compile on several archs, due to compiler
|
|
# capacity limits. These lines shall be removed when gcc is upgraded.
|
|
# See discussions on 'Assembler messages: Branch out of range'
|
|
# Addition (04/2004): gcc-3.3 on arm fcks up with htscoremain.c and -O3 (..)
|
|
ifeq ($(DEB_HOST_ARCH),$(findstring $(DEB_HOST_ARCH),s390 mips mipsel m68k arm))
|
|
CFLAGS += -DNOSTRDEBUG
|
|
endif
|
|
|
|
DEB_CFLAGS_MAINT_APPEND=-O3
|
|
|
|
# DEB_BUILD_OPTIONS flags
|
|
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
|
|
CFLAGS += -g3
|
|
endif
|
|
|
|
ifeq (,$(findstring nostrip,$(DEB_BUILD_OPTIONS)))
|
|
INSTALL_PROGRAM += -s
|
|
endif
|
|
|
|
# as suggested by Simon McVittie
|
|
# (https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=744380)
|
|
override_dh_auto_test:
|
|
VERBOSE=1 dh_auto_test
|
|
|
|
configure: configure-stamp
|
|
configure-stamp:
|
|
dh_testdir
|
|
|
|
dh_autoreconf
|
|
# note: dpkg-buildflags to be removed when compat=9
|
|
dh_auto_configure -- --enable-online-unit-tests=auto $(shell dpkg-buildflags --export=configure)
|
|
|
|
touch configure-stamp
|
|
|
|
build: build-arch build-indep
|
|
|
|
build-indep:
|
|
|
|
build-arch: build-stamp
|
|
|
|
build-stamp: configure-stamp
|
|
dh_testdir
|
|
dh_auto_build
|
|
dh_auto_test
|
|
touch build-stamp
|
|
|
|
clean:
|
|
dh_testdir
|
|
dh_testroot
|
|
rm -f build-stamp configure-stamp
|
|
dh_auto_clean
|
|
dh_autoreconf_clean
|
|
dh_clean
|
|
|
|
install: build
|
|
dh_testdir
|
|
dh_testroot
|
|
dh_prep
|
|
dh_installdirs
|
|
dh_auto_install --destdir=$(CURDIR)/debian/httrack
|
|
##$(MAKE) install DESTDIR=$(CURDIR)/debian/httrack
|
|
|
|
# place html files (doc, webhttrack files)
|
|
|
|
# remove symlink created by html/Makefile's install-data-hook
|
|
rm $(CURDIR)/debian/httrack/usr/share/httrack/html
|
|
# and put back in place data
|
|
mv $(CURDIR)/debian/httrack/usr/share/doc/httrack \
|
|
$(CURDIR)/debian/httrack/usr/share/httrack/html
|
|
# create symlink on the other side
|
|
mkdir $(CURDIR)/debian/httrack/usr/share/doc/httrack
|
|
ln -s ../../httrack/html \
|
|
$(CURDIR)/debian/httrack/usr/share/doc/httrack/html
|
|
# and put in place the two outer files
|
|
mv $(CURDIR)/debian/httrack/usr/share/httrack/html/httrack-doc.html \
|
|
$(CURDIR)/debian/httrack/usr/share/doc/httrack/httrack-doc.html
|
|
|
|
# remove *.la (https://wiki.debian.org/ReleaseGoals/LAFileRemoval)
|
|
rm -f $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/*.la
|
|
rm -f $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/*.la
|
|
# remove *.a unless we do not have *.so files
|
|
# see BUG #744594
|
|
if ls $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/*.so >/dev/null 2>/dev/null ; then \
|
|
rm -f $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/*.a ; \
|
|
fi
|
|
if ls $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/*.so >/dev/null 2>/dev/null ; then \
|
|
rm -f $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/*.a ; \
|
|
fi
|
|
mv $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/lib* \
|
|
$(CURDIR)/debian/httrack/usr/share/httrack/libtest/
|
|
mkdir -p $(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest
|
|
mv $(CURDIR)/debian/httrack/usr/share/httrack/libtest/lib* \
|
|
$(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest
|
|
ln -s /usr/share/httrack/libtest/readme.txt \
|
|
$(CURDIR)/debian/httrack/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest/readme.txt
|
|
|
|
dh_lintian
|
|
|
|
dh_movefiles --sourcedir=debian/httrack
|
|
|
|
# remove empty dirs
|
|
find debian -type d | xargs rmdir -p --ignore-fail-on-non-empty
|
|
|
|
# Build architecture-independent files here.
|
|
binary-indep: build install
|
|
dh_testdir -i
|
|
dh_testroot -i
|
|
dh_installdocs -i
|
|
dh_installchangelogs -i history.txt
|
|
dh_link -i
|
|
dh_compress -i
|
|
dh_fixperms -i
|
|
dh_installdeb -i
|
|
dh_gencontrol -i
|
|
dh_md5sums -i
|
|
dh_builddeb -i
|
|
|
|
# Build architecture-dependent files here.
|
|
binary-arch: build install
|
|
dh_testdir -a
|
|
dh_testroot -a
|
|
dh_installdocs -a
|
|
dh_installmenu -a
|
|
dh_installchangelogs -a history.txt
|
|
dh_link -a
|
|
dh_strip -a
|
|
dh_compress -a
|
|
dh_fixperms -a
|
|
dh_makeshlibs -a -X/usr/lib/$(DEB_HOST_MULTIARCH)/httrack/libtest --version-info
|
|
dh_installdeb -a
|
|
# we depend on the current version (ABI may change)
|
|
dh_shlibdeps -a -ldebian/libhttrack3/usr/lib/$(DEB_HOST_MULTIARCH)
|
|
dh_gencontrol -a
|
|
dh_md5sums -a
|
|
dh_builddeb -a
|
|
|
|
binary: binary-indep binary-arch
|
|
.PHONY: build clean binary-indep binary-arch binary install configure
|