tests: run 24_local-resume-overlap under set -e

Follow the golden rule for shell scripts: start with set -e so a non-last failure can't be masked. Guard the backgrounded-crawl kill/wait spots with || true so the expected SIGTERM exit doesn't abort the run. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>
Harden #198 fix: verify the truncate, assert the test hit the resume path
2026-06-29 21:45:24 +03:00 · 2026-06-26 09:37:15 +02:00 · 2026-06-26 09:26:10 +02:00 · 2026-06-26 09:11:31 +02:00
27 changed files with 58 additions and 831 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -188,51 +188,6 @@ jobs:
        if: failure()
        run: cat tests/test-suite.log 2>/dev/null || true

-  # MemorySanitizer catches reads of uninitialized memory (#143's stack-garbage
-  # size filter) that ASan/UBSan miss. It flags any byte an uninstrumented lib
-  # wrote, so the job stays in our own code: offline self-tests only, no openssl
-  # (--disable-https), no zlib cache tests, static (the runtime is not in .so's).
-  msan:
-    name: msan (MemorySanitizer, clang)
-    runs-on: ubuntu-24.04
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          submodules: recursive
-
-      - name: Install build dependencies
-        run: |
-          set -euo pipefail
-          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends \
-            build-essential clang autoconf automake libtool autoconf-archive \
-            zlib1g-dev
-
-      - name: Configure (MSan, static, no https)
-        run: |
-          set -euo pipefail
-          autoreconf -fi
-          ./configure CC=clang \
-            CFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -g -O1 -fno-omit-frame-pointer" \
-            LDFLAGS="-fsanitize=memory" \
-            --disable-https --disable-shared --enable-static
-
-      - name: Build
-        run: make -j"$(nproc)"
-
-      - name: Test (offline self-tests under MSan)
-        env:
-          MSAN_OPTIONS: abort_on_error=1:halt_on_error=1
-        run: |
-          set -euo pipefail
-          # Engine self-tests only; the cache trio pulls in uninstrumented zlib.
-          tests="$(cd tests && ls 01_engine-*.test | grep -v -- '-cache' | tr '\n' ' ')"
-          make check TESTS="$tests"
-
-      - name: Print the test log on failure
-        if: failure()
-        run: cat tests/test-suite.log 2>/dev/null || true
-
  # Optional-dependency build: compile and test with HTTPS/OpenSSL disabled --
  # the configuration users on minimal systems build, and one libssl is not even
  # installed here so configure cannot silently re-enable it. The matrix above
--- a/man/httrack.1
+++ b/man/httrack.1
@@ -3,7 +3,7 @@
 .\"
 .\" This file is generated by man/makeman.sh; do not edit by hand.
 .\" SPDX-License-Identifier: GPL-3.0-or-later
-.TH httrack 1 "27 June 2026" "httrack website copier"
+.TH httrack 1 "26 June 2026" "httrack website copier"
 .SH NAME
 httrack \- offline browser : copy websites to a local directory
 .SH SYNOPSIS
@@ -43,7 +43,6 @@ httrack \- offline browser : copy websites to a local directory
 [ \fB\-x, \-\-replace\-external\fR ]
 [ \fB\-%x, \-\-disable\-passwords\fR ]
 [ \fB\-%q, \-\-include\-query\-string\fR ]
-[ \fB\-%g, \-\-strip\-query\fR ]
 [ \fB\-o, \-\-generate\-errors\fR ]
 [ \fB\-X, \-\-purge\-old[=N]\fR ]
 [ \fB\-%p, \-\-preserve\fR ]
@@ -199,8 +198,6 @@ replace external html links by error pages (\-\-replace\-external)
 do not include any password for external password protected websites (%x0 include) (\-\-disable\-passwords)
 .IP \-%q
 *include query string for local files (useless, for information purpose only) (%q0 don't include) (\-\-include\-query\-string)
-.IP \-%g
-strip query keys for dedup ([host/pattern=]key1,key2,...) (\-\-strip\-query <param>)
 .IP \-o
 *generate output html file in case of error (404..) (o0 don't generate) (\-\-generate\-errors)
 .IP \-X
@@ -228,8 +225,6 @@ tolerant requests (accept bogus responses on some servers, but not standard!) (\
 update hacks: various hacks to limit re\-transfers when updating (identical size, bogus response..) (\-\-updatehack)
 .IP \-%u
 url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..) (\-\-urlhack)
-.br
-opt out of one url\-hack part: \-\-keep\-www\-prefix (www.foo.com<>foo.com), \-\-keep\-double\-slashes (//), \-\-keep\-query\-order (?b&a)
 .IP \-%A
 assume that a type (cgi,asp..) is always linked with a mime type (\-%A php3,cgi=text/html;dat,bin=application/x\-zip) (\-\-assume <param>)
 .br
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -60,9 +60,6 @@ Please visit our Website: http://www.httrack.com
  param1 : this option must be alone, and needs one distinct parameter (-P <path>)
  param0 : this option must be alone, but the parameter should be put together (+*.gif)
 */
-/* clang-format off: hand-aligned table; clang-format reflows the whole
-   initializer (2->4 space) on any edit, churning every untouched row. */
-/* clang-format off */
 const char *hts_optalias[][4] = {
  /*   {"","","",""}, */
  {"path", "-O", "param1", "output path"},
@@ -110,8 +107,6 @@ const char *hts_optalias[][4] = {
  {"disable-passwords", "-%x", "single", ""}, {"disable-password", "-%x",
                                               "single", ""},
  {"include-query-string", "-%q", "single", ""},
-  {"strip-query", "-%g", "param1",
-   "strip [host/pattern=]key1,key2,... from URLs"},
  {"generate-errors", "-o", "single", ""},
  {"do-not-generate-errors", "-o0", "single", ""},
  {"purge-old", "-X", "param", ""},
@@ -128,9 +123,6 @@ const char *hts_optalias[][4] = {
  {"tolerant", "-%B", "single", ""},
  {"updatehack", "-%s", "single", ""}, {"sizehack", "-%s", "single", ""},
  {"urlhack", "-%u", "single", ""},
-  {"keep-www-prefix", "-%j", "single", ""},
-  {"keep-double-slashes", "-%o", "single", ""},
-  {"keep-query-order", "-%y", "single", ""},
  {"user-agent", "-F", "param1", "user-agent identity"},
  {"referer", "-%R", "param1", "default referer URL"},
  {"from", "-%E", "param1", "from email address"},
@@ -249,7 +241,6 @@ const char *hts_optalias[][4] = {

  {"", "", "", ""}
 };
-/* clang-format on */

 /* 
  Check for alias in command-line 
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -3766,27 +3766,7 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
                    }
 #endif
 /********** **************************** ********** */
-                  }
-                  // MIME type excluded by a -mime: filter: abort, don't fetch
-                  // the body (#58)
-                  else if (HTTP_IS_OK(back[i].r.statuscode) &&
-                           !back[i].testmode &&
-                           strnotempty(back[i].r.contenttype) &&
-                           hts_acceptmime(opt, 0, back[i].url_adr,
-                                          back[i].url_fil,
-                                          back[i].r.contenttype) == 1) {
-                    deletehttp(&back[i].r);
-                    back[i].r.soc = INVALID_SOCKET;
-                    back[i].status = STATUS_READY;
-                    back_set_finished(sback, i);
-                    back[i].r.statuscode = STATUSCODE_EXCLUDED;
-                    strcpybuff(back[i].r.msg, "Excluded by MIME type filter");
-                    hts_log_print(
-                        opt, LOG_NOTICE,
-                        "File excluded by MIME type filter (%s): %s%s",
-                        back[i].r.contenttype, back[i].url_adr,
-                        back[i].url_fil);
-                  } else { // il faut aller le chercher
+                  } else {      // il faut aller le chercher

                    // effacer buffer (requète)
                    if (!noFreebuff) {
@@ -4005,6 +3985,7 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,

                      }
                    }
+
                  }

                  /*} */
--- a/src/htsbasenet.h
+++ b/src/htsbasenet.h
@@ -146,8 +146,7 @@ typedef enum BackStatusCode {
  STATUSCODE_NON_FATAL = -5,
  STATUSCODE_SSL_HANDSHAKE = -6,
  STATUSCODE_TOO_BIG = -7,
-  STATUSCODE_TEST_OK = -10,
-  STATUSCODE_EXCLUDED = -11 /* aborted: MIME excluded by a -mime: filter */
+  STATUSCODE_TEST_OK = -10
 } BackStatusCode;

 /** HTTrack status ('status' member of of 'lien_back') **/
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -736,39 +736,26 @@ int httpmirror(char *url1, httrackp * opt) {
    /* OPTIMIZED for fast load */
    if (StringNotEmpty(opt->filelist)) {
      char *filelist_buff = NULL;
-      size_t filelist_sz = 0;
-      const char *filelist_err = NULL; /* failure reason, NULL on success */
-      const off_t fs = fsize(StringBuff(opt->filelist));
+      const size_t filelist_sz = off_t_to_size_t(fsize(StringBuff(opt->filelist)));

-      if (fs < 0) {
-        /* fsize() hides the cause; redo stat() for a precise errno (#49) */
-        struct stat st;
-        filelist_err = stat(StringBuff(opt->filelist), &st) != 0
-                           ? strerror(errno)
-                           : "not a regular file";
-      } else if ((filelist_sz = off_t_to_size_t(fs)) == (size_t) -1) {
-        filelist_err = "file too large";
-        filelist_sz = 0;
-      } else {
+      if (filelist_sz != (size_t) -1) {
        FILE *fp = fopen(StringBuff(opt->filelist), "rb");

-        if (fp == NULL) {
-          filelist_err = strerror(errno);
-        } else {
+        if (fp) {
          filelist_buff = malloct(filelist_sz + 1);
-          if (filelist_buff == NULL) {
-            filelist_err = "out of memory";
-          } else if (fread(filelist_buff, 1, filelist_sz, fp) != filelist_sz) {
-            freet(filelist_buff);
-            filelist_err = "read error";
-          } else {
-            filelist_buff[filelist_sz] = '\0';
+          if (filelist_buff) {
+            if (fread(filelist_buff, 1, filelist_sz, fp) != filelist_sz) {
+              freet(filelist_buff);
+              filelist_buff = NULL;
+            } else {
+              *(filelist_buff + filelist_sz) = '\0';
+            }
          }
          fclose(fp);
        }
      }

-      if (filelist_buff != NULL) {
+      if (filelist_buff) {
        int filelist_ptr = 0;
        int n = 0;
        char BIGSTK line[HTS_URLMAXSIZE * 2];
@@ -793,8 +780,8 @@ int httpmirror(char *url1, httrackp * opt) {
        // Free buffer
        freet(filelist_buff);
      } else {
-        hts_log_print(opt, LOG_ERROR, "Could not include URL list \"%s\": %s",
-                      StringBuff(opt->filelist), filelist_err);
+        hts_log_print(opt, LOG_ERROR, "Could not include URL list: %s",
+                      StringBuff(opt->filelist));
      }
    }

@@ -3739,9 +3726,6 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
  if (StringNotEmpty(from->user_agent))
    StringCopyS(to->user_agent, from->user_agent);

-  if (StringNotEmpty(from->strip_query))
-    StringCopyS(to->strip_query, from->strip_query);
-
  if (from->retry > -1)
    to->retry = from->retry;

--- a/src/htscore.h
+++ b/src/htscore.h
@@ -234,12 +234,8 @@ struct hash_struct {
  coucal adrfil;
  /* former address+path -> link index (renamed/moved entries) */
  coucal former_adrfil;
-  /* effective urlhack sub-flags: www.==host / // collapse / query-arg sort */
-  hts_boolean norm_host;
-  hts_boolean norm_slash;
-  hts_boolean norm_query;
-  /* query-strip keys (not owned); set from opt->strip_query at hash_init */
-  const char *strip_query;
+  /* scratch buffers reused across lookups (not reentrant) */
+  int normalized;
  char normfil[HTS_URLMAXSIZE * 2];
  char normfil2[HTS_URLMAXSIZE * 2];
  char catbuff[CATBUFF_SIZE];
@@ -368,22 +364,6 @@ int fspc(httrackp * opt, FILE * fp, const char *type);

 char *next_token(char *p, int flag);

-/* Like fil_normalized(), but first drops query keys in STRIP (comma-separated,
-   "*" = all); STRIP NULL/empty behaves exactly like fil_normalized(). */
-char *fil_normalized_filtered(const char *source, char *dest,
-                              const char *strip);
-
-/* As fil_normalized_filtered(), but DO_SLASH/DO_QUERY gate the // collapse and
-   the query-argument sort independently (the urlhack sub-flags). */
-char *fil_normalized_filtered_ex(const char *source, char *dest,
-                                 const char *strip, int do_slash, int do_query);
-
-/* For URL ADR/FIL, return (in DEST) the comma keylist to strip from the
-   '\n'-separated "[pattern=]keys" RULES (patterns matched on host/path via
-   strjoker, last wins); NULL if none match. Feeds fil_normalized_filtered(). */
-const char *hts_query_strip_keys(const char *rules, const char *adr,
-                                 const char *fil, char *dest, size_t destsize);
-
 /* Read a whole file into a freshly malloc'd, NUL-terminated buffer; the caller
   owns it and must release it with freet(). Return NULL on missing/unreadable
   file (readfile_or substitutes defaultdata instead). The byte content is NOT
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -1570,30 +1570,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
                  com++;
                }
                break;          // url hack
-              case 'j':
-                opt->no_www_dedup =
-                    HTS_TRUE; // --keep-www-prefix: keep www.X != X
-                if (*(com + 1) == '0') {
-                  opt->no_www_dedup = HTS_FALSE;
-                  com++;
-                }
-                break;
-              case 'o':
-                opt->no_slash_dedup =
-                    HTS_TRUE; // --keep-double-slashes: keep //
-                if (*(com + 1) == '0') {
-                  opt->no_slash_dedup = HTS_FALSE;
-                  com++;
-                }
-                break;
-              case 'y':
-                opt->no_query_dedup =
-                    HTS_TRUE; // --keep-query-order: keep ?b&a order
-                if (*(com + 1) == '0') {
-                  opt->no_query_dedup = HTS_FALSE;
-                  com++;
-                }
-                break;
              case 'v':
                opt->verbosedisplay = HTS_VERBOSE_FULL;
                if (isdigit((unsigned char) *(com + 1))) {
@@ -1961,21 +1937,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
                }
                break;

-              case 'g': // strip-query: accumulate "[pattern=]keys" entries
-                if ((na + 1 >= argc) || (argv[na + 1][0] == '-')) {
-                  HTS_PANIC_PRINTF("Option strip-query needs a blank space and "
-                                   "[host/pattern=]key1,key2,...");
-                  printf("Example: --strip-query "
-                         "\"www.example.com/*=utm_source,sid\"\n");
-                  htsmain_free();
-                  return -1;
-                } else {
-                  na++;
-                  if (StringNotEmpty(opt->strip_query))
-                    StringCat(opt->strip_query, "\n");
-                  StringCat(opt->strip_query, argv[na]);
-                }
-                break;
              case 't':        /* do not change type (ending) of filenames according to the MIME type */
                opt->no_type_change = 1;
                if (*(com+1)=='0') { opt->no_type_change = 0; com++; }
--- a/src/htsfilters.c
+++ b/src/htsfilters.c
@@ -76,8 +76,7 @@ int fa_strjoker(int type, char **filters, int nfil, const char *nom, LLint * siz
    }
    if (size)
      sz = *size;
-    /* size unknown (scan time): no size pointer => size tests stay neutral */
-    if (strjoker(nom, filters[i] + filteroffs, size ? &sz : NULL, size_flag)) {
+    if (strjoker(nom, filters[i] + filteroffs, &sz, size_flag)) {       // reconnu
      if (size)
        if (sz != *size)
          sizelimit = sz;
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -106,10 +106,10 @@ static coucal_hashkeys key_adrfil_hashes_generic(void *arg,
  const lien_url*const lien = (const lien_url*) value;
  const char *const adr = !former ? lien->adr : lien->former_adr;
  const char *const fil = !former ? lien->fil : lien->former_fil;
-  const char *const adr_norm =
-      adr != NULL ? (hash->norm_host ? jump_normalized_const(adr)
-                                     : jump_identification_const(adr))
-                  : NULL;
+  const char *const adr_norm = adr != NULL ? 
+    ( hash->normalized  ? jump_normalized_const(adr)
+                        : jump_identification_const(adr) )
+    : NULL;

  // copy address
  assertf(adr_norm != NULL);
@@ -117,18 +117,10 @@ static coucal_hashkeys key_adrfil_hashes_generic(void *arg,

  // copy link
  assertf(fil != NULL);
-  {
-    /* resolve the per-URL strip keys; strip applies even when urlhack is off */
-    char BIGSTK keybuf[HTS_URLMAXSIZE];
-    const char *const keys = hts_query_strip_keys(hash->strip_query, adr, fil,
-                                                  keybuf, sizeof(keybuf));
-
-    if (hash->norm_slash || hash->norm_query || keys != NULL) {
-      fil_normalized_filtered_ex(fil, &hash->normfil[strlen(hash->normfil)],
-                                 keys, hash->norm_slash, hash->norm_query);
-    } else {
-      strcpy(&hash->normfil[strlen(hash->normfil)], fil);
-    }
+  if (hash->normalized) {
+    fil_normalized(fil, &hash->normfil[strlen(hash->normfil)]);
+  } else {
+    strcpy(&hash->normfil[strlen(hash->normfil)], fil);
  }

  // hash
@@ -140,7 +132,8 @@ static int key_adrfil_equals_generic(void *arg,
                                     coucal_key_const a_,
                                     coucal_key_const b_, 
                                     const int former) {
-  hash_struct *const hash = (hash_struct *) arg;
+  hash_struct *const hash = (hash_struct*) arg;
+  const int normalized = hash->normalized;
  const lien_url*const a = (const lien_url*) a_;
  const lien_url*const b = (const lien_url*) b_;
  const char *const a_adr = !former ? a->adr : a->former_adr;
@@ -157,10 +150,10 @@ static int key_adrfil_equals_generic(void *arg,
  assertf(b_fil != NULL);

  // skip scheme and authentication to the domain (possibly without www.)
-  ja = hash->norm_host ? jump_normalized_const(a_adr)
-                       : jump_identification_const(a_adr);
-  jb = hash->norm_host ? jump_normalized_const(b_adr)
-                       : jump_identification_const(b_adr);
+  ja = normalized
+    ? jump_normalized_const(a_adr) : jump_identification_const(a_adr);
+  jb = normalized
+    ? jump_normalized_const(b_adr) : jump_identification_const(b_adr);
  assertf(ja != NULL);
  assertf(jb != NULL);
  if (strcasecmp(ja, jb) != 0) {
@@ -168,23 +161,12 @@ static int key_adrfil_equals_generic(void *arg,
  }

  // now compare pathes
-  {
-    char BIGSTK ka[HTS_URLMAXSIZE], kb[HTS_URLMAXSIZE];
-    const char *const keysa =
-        hts_query_strip_keys(hash->strip_query, a_adr, a_fil, ka, sizeof(ka));
-    const char *const keysb =
-        hts_query_strip_keys(hash->strip_query, b_adr, b_fil, kb, sizeof(kb));
-
-    if (hash->norm_slash || hash->norm_query || keysa != NULL ||
-        keysb != NULL) {
-      fil_normalized_filtered_ex(a_fil, hash->normfil, keysa, hash->norm_slash,
-                                 hash->norm_query);
-      fil_normalized_filtered_ex(b_fil, hash->normfil2, keysb, hash->norm_slash,
-                                 hash->norm_query);
-      return strcmp(hash->normfil, hash->normfil2) == 0;
-    } else {
-      return strcmp(a_fil, b_fil) == 0;
-    }
+  if (normalized) {
+    fil_normalized(a_fil, hash->normfil);
+    fil_normalized(b_fil, hash->normfil2);
+    return strcmp(hash->normfil, hash->normfil2) == 0;
+  } else {
+    return strcmp(a_fil, b_fil) == 0;
  }
 }

@@ -240,17 +222,11 @@ static int key_former_adrfil_equals(void *arg,
  return key_adrfil_equals_generic(arg, a, b, 1);
 }

-void hash_init(httrackp *opt, hash_struct *hash, hts_boolean normalized) {
+void hash_init(httrackp *opt, hash_struct * hash, int normalized) {
  hash->sav = coucal_new(0);
  hash->adrfil = coucal_new(0);
  hash->former_adrfil = coucal_new(0);
-  /* urlhack is the umbrella; per-feature negatives opt out of each part */
-  hash->norm_host = normalized && !opt->no_www_dedup;
-  hash->norm_slash = normalized && !opt->no_slash_dedup;
-  hash->norm_query = normalized && !opt->no_query_dedup;
-  /* snapshot the query-strip list (not owned; valid for the hash lifetime) */
-  hash->strip_query =
-      StringNotEmpty(opt->strip_query) ? StringBuff(opt->strip_query) : NULL;
+  hash->normalized = normalized;

  hts_set_hash_handler(hash->sav, opt);
  hts_set_hash_handler(hash->adrfil, opt);
@@ -306,26 +282,6 @@ void hash_free(hash_struct *hash) {
  }
 }

-/* Test helper: do the two URLs dedupe to the same key under opt's urlhack
-   flags? Exercises the live hash compare (norm_host/slash/query resolution). */
-hts_boolean hash_url_equals(httrackp *opt, const char *adra, const char *fila,
-                            const char *adrb, const char *filb) {
-  hash_struct hash;
-  lien_url la, lb;
-  hts_boolean eq;
-
-  memset(&la, 0, sizeof(la));
-  memset(&lb, 0, sizeof(lb));
-  la.adr = key_duphandler(NULL, adra);
-  la.fil = key_duphandler(NULL, fila);
-  lb.adr = key_duphandler(NULL, adrb);
-  lb.fil = key_duphandler(NULL, filb);
-  hash_init(opt, &hash, opt->urlhack);
-  eq = key_adrfil_equals(&hash, &la, &lb);
-  hash_free(&hash);
-  return eq;
-}
-
 // retour: position ou -1 si non trouvé
 int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
              hash_struct_type type) {
--- a/src/htshash.h
+++ b/src/htshash.h
@@ -51,12 +51,8 @@ typedef enum hash_struct_type {
 } hash_struct_type;

 // tables de hachage
-void hash_init(httrackp *opt, hash_struct *hash, hts_boolean normalized);
+void hash_init(httrackp *opt, hash_struct *hash, int normalized);
 void hash_free(hash_struct *hash);
-/* Test helper: HTS_TRUE if the two URLs dedupe together under opt's urlhack
-   flags. */
-hts_boolean hash_url_equals(httrackp *opt, const char *adra, const char *fila,
-                            const char *adrb, const char *filb);
 int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
              hash_struct_type type);
 void hash_write(hash_struct * hash, size_t lpos);
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -563,7 +563,6 @@ void help(const char *app, int more) {
    (" %x  do not include any password for external password protected websites (%x0 include)");
  infomsg
    (" %q *include query string for local files (useless, for information purpose only) (%q0 don't include)");
-  infomsg(" %g  strip query keys for dedup ([host/pattern=]key1,key2,...)");
  infomsg
    ("  o *generate output html file in case of error (404..) (o0 don't generate)");
  infomsg("  X *purge old files after update (X0 keep delete)");
@@ -588,9 +587,6 @@ void help(const char *app, int more) {
    (" %s  update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..)");
  infomsg
    (" %u  url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..)");
-  infomsg("     opt out of one url-hack part: --keep-www-prefix "
-          "(www.foo.com<>foo.com), --keep-double-slashes (//), "
-          "--keep-query-order (?b&a)");
  infomsg
    (" %A  assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)");
  infomsg("     shortcut: '--assume standard' is equivalent to -%A "
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -3610,10 +3610,7 @@ static int sortNormFnc(const void *a_, const void *b_) {
  return strcmp(*a + 1, *b + 1);
 }

-/* Path normalizer core: optionally collapse redundant '//' (DO_SLASH) and/or
-   sort query arguments (DO_QUERY) so equivalent URLs dedupe. */
-static char *fil_normalized_ex(const char *source, char *dest, int do_slash,
-                               int do_query) {
+HTSEXT_API char *fil_normalized(const char *source, char *dest) {
  char lastc = 0;
  int gotquery = 0;
  int ampargs = 0;
@@ -3623,8 +3620,8 @@ static char *fil_normalized_ex(const char *source, char *dest, int do_slash,
  for(i = j = 0; source[i] != '\0'; i++) {
    if (!gotquery && source[i] == '?')
      gotquery = ampargs = 1;
-    if (do_slash && !gotquery && lastc == '/' && source[i] == '/') {
-      // foo//bar -> foo/bar
+    if ((!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar
+      ) {
    } else {
      if (gotquery && source[i] == '&') {
        ampargs++;
@@ -3636,7 +3633,7 @@ static char *fil_normalized_ex(const char *source, char *dest, int do_slash,
  dest[j++] = '\0';

  /* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */
-  if (do_query && ampargs > 1) {
+  if (ampargs > 1) {
    char **amps = malloct(ampargs * sizeof(char *));
    char *copyBuff = NULL;
    size_t qLen = 0;
@@ -3684,153 +3681,6 @@ static char *fil_normalized_ex(const char *source, char *dest, int do_slash,
  return dest;
 }

-HTSEXT_API char *fil_normalized(const char *source, char *dest) {
-  return fil_normalized_ex(source, dest, 1, 1);
-}
-
-/* Is query key ARG[0..keylen) in the comma-separated STRIP list? "*" = all;
-   case-sensitive, space-trimmed tokens. */
-static int hts_query_key_stripped(const char *arg, size_t keylen,
-                                  const char *strip) {
-  const char *p = strip;
-
-  while (*p != '\0') {
-    const char *start = p;
-    size_t toklen;
-
-    while (*p != '\0' && *p != ',')
-      p++;
-    toklen = (size_t) (p - start);
-    while (toklen > 0 && *start == ' ') {
-      start++;
-      toklen--;
-    }
-    while (toklen > 0 && start[toklen - 1] == ' ')
-      toklen--;
-    if (toklen == 1 && start[0] == '*')
-      return 1;
-    if (toklen == keylen && strncmp(start, arg, keylen) == 0)
-      return 1;
-    if (*p == ',')
-      p++;
-  }
-  return 0;
-}
-
-/* see htscore.h */
-char *fil_normalized_filtered_ex(const char *source, char *dest,
-                                 const char *strip, int do_slash,
-                                 int do_query) {
-  const char *query;
-  char BIGSTK tmp[HTS_URLMAXSIZE * 2];
-  htsbuff cb;
-  int wrote = 0;
-
-  /* No strip list, or no query: plain normalization. */
-  if (strip == NULL || *strip == '\0' ||
-      (query = strchr(source, '?')) == NULL) {
-    return fil_normalized_ex(source, dest, do_slash, do_query);
-  }
-
-  /* Copy the path, re-emit kept query args, let fil_normalized() sort. Walk
-     every field incl. empty/trailing ("a&","?&&") so the result is a fixpoint
-     (the read re-normalizes it; a dropped empty arg would miss dedup). */
-  cb = htsbuff_ptr(tmp, sizeof(tmp));
-  htsbuff_catn(&cb, source, (size_t) (query - source));
-  for (query++;;) {
-    const char *const arg = query;
-    const char *eq = NULL;
-    size_t keylen, arglen;
-
-    while (*query != '\0' && *query != '&') {
-      if (eq == NULL && *query == '=')
-        eq = query;
-      query++;
-    }
-    arglen = (size_t) (query - arg);
-    keylen = eq != NULL ? (size_t) (eq - arg) : arglen;
-    if (!hts_query_key_stripped(arg, keylen, strip)) {
-      htsbuff_catc(&cb, wrote ? '&' : '?');
-      htsbuff_catn(&cb, arg, arglen);
-      wrote = 1;
-    }
-    if (*query == '\0')
-      break;
-    query++;
-  }
-  return fil_normalized_ex(tmp, dest, do_slash, do_query);
-}
-
-/* see htscore.h */
-char *fil_normalized_filtered(const char *source, char *dest,
-                              const char *strip) {
-  return fil_normalized_filtered_ex(source, dest, strip, 1, 1);
-}
-
-/* see htscore.h */
-const char *hts_query_strip_keys(const char *rules, const char *adr,
-                                 const char *fil, char *dest, size_t destsize) {
-  const char *p, *q;
-  const char *result = NULL;
-  char BIGSTK url[HTS_URLMAXSIZE * 2];
-
-  if (rules == NULL || *rules == '\0' || destsize == 0)
-    return NULL;
-
-  /* Match string = normalized host/path, query removed. jump_normalized_const
-     collapses www+scheme/auth so read and write (double-normalized) agree;
-     query excluded keeps the decision on host/path only. */
-  url[0] = '\0';
-  strcatbuff(url, jump_normalized_const(adr));
-  if (fil[0] != '/')
-    strcatbuff(url, "/");
-  q = strchr(fil, '?');
-  if (q != NULL)
-    strncatbuff(url, fil, (int) (q - fil));
-  else
-    strcatbuff(url, fil);
-
-  /* Walk the '\n' entries; last match wins (like the +/- filter eval). Each is
-     "pattern=keys"; no '=' is the bare form, pattern "*". */
-  for (p = rules; *p != '\0';) {
-    const char *const line = p;
-    const char *eol, *eq, *keys;
-    char BIGSTK pat[HTS_URLMAXSIZE * 2];
-
-    while (*p != '\0' && *p != '\n')
-      p++;
-    eol = p;
-    if (*p == '\n')
-      p++;
-    if (eol == line)
-      continue;
-    eq = memchr(line, '=', (size_t) (eol - line));
-    if (eq != NULL) {
-      size_t patlen = (size_t) (eq - line);
-
-      if (patlen >= sizeof(pat))
-        patlen = sizeof(pat) - 1;
-      memcpy(pat, line, patlen);
-      pat[patlen] = '\0';
-      keys = eq + 1;
-    } else {
-      pat[0] = '*';
-      pat[1] = '\0';
-      keys = line;
-    }
-    if (strjoker(url, pat, NULL, NULL) != NULL) {
-      size_t klen = (size_t) (eol - keys);
-
-      if (klen >= destsize)
-        klen = destsize - 1;
-      memcpy(dest, keys, klen);
-      dest[klen] = '\0';
-      result = dest;
-    }
-  }
-  return result;
-}
-
 #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
 HTSEXT_API char *adr_normalized_sized(const char *source, char *dest,
                                      size_t destsize) {
@@ -6040,11 +5890,7 @@ HTSEXT_API httrackp *hts_create_opt(void) {
  opt->verbosedisplay = HTS_VERBOSE_NONE; // no text animation
  opt->sizehack = HTS_FALSE;
  opt->urlhack = HTS_TRUE;
-  opt->no_www_dedup = HTS_FALSE;
-  opt->no_slash_dedup = HTS_FALSE;
-  opt->no_query_dedup = HTS_FALSE;
  StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
-  StringCopy(opt->strip_query, "");
  opt->ftp_proxy = HTS_TRUE;
  opt->convert_utf8 = HTS_TRUE;
  StringCopy(opt->filelist, "");
@@ -6189,7 +6035,6 @@ HTSEXT_API void hts_free_opt(httrackp * opt) {
    StringFree(opt->urllist);
    StringFree(opt->footer);
    StringFree(opt->mod_blacklist);
-    StringFree(opt->strip_query);

    StringFree(opt->path_html);
    StringFree(opt->path_html_utf8);
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -198,13 +198,6 @@ int url_savename(lien_adrfilsave *const afs,
  // copy of fil, used for lookups (see urlhack)
  const char *normadr = adr;
  const char *normfil = fil_complete;
-  /* query keys to strip for this URL (NULL = none); decoupled from urlhack */
-  char BIGSTK stripkeys[HTS_URLMAXSIZE];
-  const char *const strip =
-      StringNotEmpty(opt->strip_query)
-          ? hts_query_strip_keys(StringBuff(opt->strip_query), adr,
-                                 fil_complete, stripkeys, sizeof(stripkeys))
-          : NULL;
  const char *const print_adr = jump_protocol_const(adr);
  const char *start_pos = NULL, *nom_pos = NULL, *dot_pos = NULL;     // Position nom et point

@@ -237,13 +230,9 @@ int url_savename(lien_adrfilsave *const afs,
  // www-42.foo.com -> foo.com
  // foo.com/bar//foobar -> foo.com/bar/foobar
  if (opt->urlhack) {
-    // dedup-lookup key; honor the per-feature negatives like htshash.c so
-    // distinct URLs keep distinct savenames (else keep normadr = adr)
-    if (!opt->no_www_dedup)
-      normadr = adr_normalized_sized(adr, normadr_, sizeof(normadr_));
-    normfil =
-        fil_normalized_filtered_ex(fil_complete, normfil_, strip,
-                                   !opt->no_slash_dedup, !opt->no_query_dedup);
+    // copy of adr (without protocol), used for lookups (see urlhack)
+    normadr = adr_normalized_sized(adr, normadr_, sizeof(normadr_));
+    normfil = fil_normalized(fil_complete, normfil_);
  } else {
    if (link_has_authority(adr_complete)) {     // https or other protocols : in "http/" subfolder
      char *pos = strchr(adr_complete, ':');
@@ -256,11 +245,6 @@ int url_savename(lien_adrfilsave *const afs,
        normadr = normadr_;
      }
    }
-    // strip still applies with urlhack off (host left untouched); no // or
-    // query-sort here, to match the hash key (norm_slash/norm_query are 0 when
-    // urlhack is off) so a URL is looked up under the key it was stored with
-    if (strip != NULL)
-      normfil = fil_normalized_filtered_ex(fil_complete, normfil_, strip, 0, 0);
  }

  // à afficher sans ftp://
--- a/src/htsopt.h
+++ b/src/htsopt.h
@@ -529,12 +529,6 @@ struct httrackp {
  htslibhandles libHandles; /**< loaded external module handles */
  //
  htsoptstate state; /**< embedded live engine state */
-  String strip_query; /**< query keys to drop when deduping URLs (-strip-query);
-                           appended at the tail to keep field offsets stable */
-  hts_boolean
-      no_www_dedup; /**< with urlhack, keep www.host distinct from host */
-  hts_boolean no_slash_dedup; /**< with urlhack, keep redundant // in paths */
-  hts_boolean no_query_dedup; /**< with urlhack, keep query-argument order */
 };

 /* Running statistics for a mirror. */
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -3602,28 +3602,16 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
             ident_url_relatif(mov_url, urladr(), urlfil(), moved)) >= 0) {
          int set_prio_to = 0;  // pas de priotité fixéd par wizard

-          // check whether URLHack is harmless or not (per the effective
-          // sub-flags)
-          if (opt->urlhack && (!opt->no_www_dedup || !opt->no_slash_dedup ||
-                               !opt->no_query_dedup)) {
-            const int norm_host = !opt->no_www_dedup;
-            const int norm_slash = !opt->no_slash_dedup;
-            const int norm_query = !opt->no_query_dedup;
+          // check whether URLHack is harmless or not
+          if (opt->urlhack) {
            char BIGSTK n_adr[HTS_URLMAXSIZE * 2], n_fil[HTS_URLMAXSIZE * 2];
            char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2];

-            strlcpybuff(n_adr,
-                        norm_host ? jump_normalized_const(moved->adr)
-                                  : jump_identification_const(moved->adr),
-                        sizeof(n_adr));
-            strlcpybuff(pn_adr,
-                        norm_host ? jump_normalized_const(urladr())
-                                  : jump_identification_const(urladr()),
-                        sizeof(pn_adr));
-            fil_normalized_filtered_ex(moved->fil, n_fil, NULL, norm_slash,
-                                       norm_query);
-            fil_normalized_filtered_ex(urlfil(), pn_fil, NULL, norm_slash,
-                                       norm_query);
+            n_adr[0] = n_fil[0] = '\0';
+            (void) adr_normalized_sized(moved->adr, n_adr, sizeof(n_adr));
+            (void) fil_normalized(moved->fil, n_fil);
+            (void) adr_normalized_sized(urladr(), pn_adr, sizeof(pn_adr));
+            (void) fil_normalized(urlfil(), pn_fil);
            if (strcasecmp(n_adr, pn_adr) == 0
                && strcasecmp(n_fil, pn_fil) == 0) {
              hts_log_print(opt, LOG_WARNING,
--- a/src/htsselftest.c
+++ b/src/htsselftest.c
@@ -524,32 +524,6 @@ static int st_filter(httrackp *opt, int argc, char **argv) {
  return 0;
 }

-/* Size-aware filter verdict via fa_strjoker: a negative <size> means the size
-   is still unknown (scan time), so a size rule like -*.jpg*[<10] must stay
-   neutral. */
-static int st_filtersize(httrackp *opt, int argc, char **argv) {
-  LLint sz;
-  int size_flag = 0, verdict, known;
-
-  (void) opt;
-  if (argc < 3) {
-    fprintf(stderr, "filtersize: needs <size> <string> <filter> [filter...]\n");
-    return 1;
-  }
-  known = (argv[0][0] != '-'); /* "-1"/"-" => size unknown */
-  sz = -1;
-  if (known)
-    sscanf(argv[0], LLintP, &sz);
-  verdict = fa_strjoker(0, &argv[2], argc - 2, argv[1], known ? &sz : NULL,
-                        known ? &size_flag : NULL, NULL);
-  printf("verdict=%s size_flag=%d\n",
-         verdict > 0   ? "allowed"
-         : verdict < 0 ? "forbidden"
-                       : "unknown",
-         size_flag);
-  return 0;
-}
-
 static int st_simplify(httrackp *opt, int argc, char **argv) {
  (void) opt;
  if (argc < 1) {
@@ -1052,173 +1026,6 @@ static int st_cookies(httrackp *opt, int argc, char **argv) {
  return err;
 }

-/* --strip-query: resolver + fil_normalized_filtered, end to end. */
-static int st_stripquery(httrackp *opt, int argc, char **argv) {
-  char dest[1024], keys[256], ref[1024];
-  const char *k;
-
-  (void) opt;
-  (void) argc;
-  (void) argv;
-
-  /* empty rules == plain fil_normalized */
-  assertf(hts_query_strip_keys(NULL, "h.com", "/p?a=1", keys, sizeof(keys)) ==
-          NULL);
-  assertf(hts_query_strip_keys("", "h.com", "/p?a=1", keys, sizeof(keys)) ==
-          NULL);
-  assertf(strcmp(fil_normalized_filtered("/p?b=2&a=1", dest, NULL),
-                 fil_normalized("/p?b=2&a=1", ref)) == 0);
-
-  /* bare form (*=keys): strip the key everywhere, keep+sort the rest */
-  k = hts_query_strip_keys("sid", "any.com", "/p?b=2&sid=x&a=1", keys,
-                           sizeof(keys));
-  assertf(k != NULL && strcmp(k, "sid") == 0);
-  assertf(strcmp(fil_normalized_filtered("/p?b=2&sid=x&a=1", dest, k),
-                 "/p?a=1&b=2") == 0);
-
-  /* reordered variant + an extra stripped key == the clean URL */
-  assertf(strcmp(fil_normalized_filtered("/p?sid=y&a=1&b=2", dest, "sid"),
-                 fil_normalized("/p?a=1&b=2", ref)) == 0);
-
-  /* host pattern matches only that host, incl. its www-normalized forms */
-  assertf(hts_query_strip_keys("ex.com/*=utm", "other.com", "/p?utm=1", keys,
-                               sizeof(keys)) == NULL);
-  assertf(hts_query_strip_keys("ex.com/*=utm", "ex.com", "/p?utm=1", keys,
-                               sizeof(keys)) != NULL);
-  assertf(hts_query_strip_keys("ex.com/*=utm", "www.ex.com", "/p?utm=1", keys,
-                               sizeof(keys)) != NULL);
-  assertf(hts_query_strip_keys("ex.com/*=utm", "http://www-3.ex.com",
-                               "/p?utm=1", keys, sizeof(keys)) != NULL);
-
-  /* last match wins, wholesale: host rule overrides global, no union */
-  k = hts_query_strip_keys("*=sid\nex.com/*=utm", "ex.com",
-                           "/p?sid=1&utm=2&a=3", keys, sizeof(keys));
-  assertf(k != NULL && strcmp(k, "utm") == 0);
-  assertf(strcmp(fil_normalized_filtered("/p?sid=1&utm=2&a=3", dest, k),
-                 "/p?a=3&sid=1") == 0);
-  k = hts_query_strip_keys("*=sid\nex.com/*=utm", "z.com", "/p?sid=1&a=3", keys,
-                           sizeof(keys));
-  assertf(k != NULL && strcmp(k, "sid") == 0);
-
-  /* whole-key match, not prefix: "utm" must not strip utm_source */
-  assertf(strcmp(fil_normalized_filtered("/p?utm_source=x&a=1", dest, "utm"),
-                 "/p?a=1&utm_source=x") == 0);
-
-  /* "*" drops every param; a fully-stripped single-arg query loses its '?' */
-  assertf(strcmp(fil_normalized_filtered("/p?a=1&b=2", dest, "*"), "/p") == 0);
-  assertf(strcmp(fil_normalized_filtered("/p?utm=1", dest, "utm"), "/p") == 0);
-
-  /* degenerate forms a=, b, c== (key 'c'); strip c keeps a= and b */
-  assertf(strcmp(fil_normalized_filtered("/p?a=&b&c==", dest, "c"),
-                 "/p?a=&b") == 0);
-  /* short key must not strip a longer one: 'c' must not touch 'cc' */
-  assertf(strcmp(fil_normalized_filtered("/p?cc=1&c=2", dest, "c"),
-                 "/p?cc=1") == 0);
-
-  /* repeated key: every occurrence is stripped, not just the first */
-  assertf(
-      strcmp(fil_normalized_filtered("/p?foo=42&bar=13&foo=43", dest, "foo"),
-             "/p?bar=13") == 0);
-  /* repeated key mixing missing/empty values */
-  assertf(
-      strcmp(fil_normalized_filtered("/p?foo&bar=13&foo=42&foo=", dest, "foo"),
-             "/p?bar=13") == 0);
-  /* repeated key kept (no match): all occurrences retained, then sorted */
-  assertf(strcmp(fil_normalized_filtered("/p?foo=42&bar=13&foo=43", dest, "z"),
-                 "/p?bar=13&foo=42&foo=43") == 0);
-
-  /* value containing '=': the key is only the part before the first '='. Strip
-     'foo' drops "foo=42=17" whole; the '=' in the value is not a delimiter. */
-  assertf(strcmp(fil_normalized_filtered("/p?foo=42=17&bar=", dest, "foo"),
-                 "/p?bar=") == 0);
-  /* keeping it preserves the embedded '=' verbatim */
-  assertf(strcmp(fil_normalized_filtered("/p?foo=42=17&bar=", dest, "bar"),
-                 "/p?foo=42=17") == 0);
-  /* a value segment is not a key: stripping "42" must not touch foo=42=17 */
-  assertf(strcmp(fil_normalized_filtered("/p?foo=42=17", dest, "42"),
-                 "/p?foo=42=17") == 0);
-
-  /* Idempotency: the read path re-normalizes an already-normalized fil, so the
-     result must be a fixpoint or dedup misses (catches a dropped empty/trailing
-     arg like "?&&", "a&"). */
-  {
-    static const char *const qs[] = {"/p?a=&b&c==",
-                                     "/p?a&&b",
-                                     "/p?&a",
-                                     "/p?a&",
-                                     "/p?",
-                                     "/p?=v",
-                                     "/p?&&",
-                                     "/p?b=2&a=1",
-                                     "/p?utm=x&",
-                                     "/p?&utm=x",
-                                     "/p?foo=42&bar=13&foo=43",
-                                     "/p?foo&bar=13&foo=42&foo=",
-                                     "/p?foo=42=17&bar="};
-    static const char *const strips[] = {NULL, "z", "utm", "*", "a", "foo"};
-    char once[1024], twice[1024];
-    size_t i, j;
-
-    for (i = 0; i < sizeof(qs) / sizeof(qs[0]); i++) {
-      for (j = 0; j < sizeof(strips) / sizeof(strips[0]); j++) {
-        fil_normalized_filtered(qs[i], once, strips[j]);
-        fil_normalized_filtered(once, twice, strips[j]);
-        assertf(strcmp(once, twice) == 0);
-      }
-    }
-  }
-
-  printf("strip-query self-test OK\n");
-  return 0;
-}
-
-/* -%u url-hack split (#271): each sub-flag must toggle independently. */
-static int st_urlhack(httrackp *opt, int argc, char **argv) {
-  (void) argc;
-  (void) argv;
-#define EQ(aa, fa, ab, fb) hash_url_equals(opt, aa, fa, ab, fb)
-  /* urlhack on, no opt-outs: www, // and query order all collapse */
-  opt->urlhack = HTS_TRUE;
-  opt->no_www_dedup = opt->no_slash_dedup = opt->no_query_dedup = HTS_FALSE;
-  assertf(EQ("www.foo.com", "/a", "foo.com", "/a"));
-  assertf(EQ("foo.com", "/a//b", "foo.com", "/a/b"));
-  assertf(EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
-
-  /* keep-www-prefix: host off; // and query still collapse */
-  opt->no_www_dedup = HTS_TRUE;
-  assertf(!EQ("www.foo.com", "/a", "foo.com", "/a"));
-  assertf(EQ("foo.com", "/a//b", "foo.com", "/a/b"));
-  assertf(EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
-  opt->no_www_dedup = HTS_FALSE;
-
-  /* keep-double-slashes: // significant; www, query order still collapse */
-  opt->no_slash_dedup = HTS_TRUE;
-  assertf(!EQ("foo.com", "/a//b", "foo.com", "/a/b"));
-  assertf(EQ("www.foo.com", "/a", "foo.com", "/a"));
-  assertf(EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
-  opt->no_slash_dedup = HTS_FALSE;
-
-  /* keep-query-order: query order significant; www and // still collapse */
-  opt->no_query_dedup = HTS_TRUE;
-  assertf(!EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
-  assertf(EQ("www.foo.com", "/a", "foo.com", "/a"));
-  assertf(EQ("foo.com", "/a//b", "foo.com", "/a/b"));
-  opt->no_query_dedup = HTS_FALSE;
-
-  /* all opt-outs == urlhack off entirely */
-  opt->no_www_dedup = opt->no_slash_dedup = opt->no_query_dedup = HTS_TRUE;
-  assertf(!EQ("www.foo.com", "/a", "foo.com", "/a"));
-  assertf(!EQ("foo.com", "/a//b", "foo.com", "/a/b"));
-  assertf(!EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
-  opt->urlhack = HTS_FALSE;
-  opt->no_www_dedup = opt->no_slash_dedup = opt->no_query_dedup = HTS_FALSE;
-  assertf(!EQ("www.foo.com", "/a", "foo.com", "/a"));
-  assertf(!EQ("foo.com", "/a//b", "foo.com", "/a/b"));
-#undef EQ
-  printf("urlhack self-test OK\n");
-  return 0;
-}
-
 /* ------------------------------------------------------------ */
 /* Registry: name -> handler, with a usage hint and a one-line description. */
 /* ------------------------------------------------------------ */
@@ -1231,14 +1038,7 @@ static const struct selftest_entry {
 } selftests[] = {
    {"filter", "<pattern> <string>", "match a string against a wildcard filter",
     st_filter},
-    {"filtersize", "<size> <string> <filter>...",
-     "size-aware filter verdict (negative size = unknown/scan time)",
-     st_filtersize},
    {"simplify", "<path>", "collapse ./ and ../ in a path", st_simplify},
-    {"stripquery", "", "--strip-query pattern/key stripping self-test",
-     st_stripquery},
-    {"urlhack", "", "-%u url-hack sub-flag (www/slash/query) self-test",
-     st_urlhack},
    {"mime", "<filename>", "MIME type for a filename", st_mime},
    {"charset", "<charset> <string>",
     "convert a string to UTF-8 from a charset", st_charset},
--- a/tests/01_engine-filelist.test
+++ b/tests/01_engine-filelist.test
@@ -1,65 +0,0 @@
-#!/bin/bash
-#
-# -%L URL-list loading (#49): a readable list is honored; an unusable one fails
-# with the reason (errno / not-a-regular-file), not a bare "Could not include
-# URL list". Offline: file:// fixture, no server. Asserts on httrack's own
-# strings and the message shape, so it is locale-independent.
-
-set -euo pipefail
-
-tmp=$(mktemp -d "${TMPDIR:-/tmp}/httrack_filelist.XXXXXX") || exit 1
-trap 'rm -rf "$tmp"' EXIT HUP INT QUIT PIPE TERM
-
-echo '<html><body>hi</body></html>' >"$tmp/index.html"
-
-# run httrack with the given -%L target; structured log lands in $out/hts-log.txt
-run() {
-    local out="$1" list="$2"
-    rm -rf "$out"
-    mkdir -p "$out"
-    httrack -O "$out" --quiet -n "-%L" "$list" >"$out/.stdout" 2>&1 || true
-    LOG="$out/hts-log.txt"
-}
-
-fail() {
-    echo "FAIL: $1"
-    cat "$LOG"
-    exit 1
-}
-loghas() {
-    grep -Eq "$1" "$LOG" || fail "expected /$1/ in $LOG"
-}
-lognot() {
-    if grep -Eq "$1" "$LOG"; then fail "unexpected /$1/ in $LOG"; fi
-}
-
-# readable list: its one URL is loaded and counted (count must be non-zero)
-printf 'file://%s/index.html\n' "$tmp" >"$tmp/urls.txt"
-run "$tmp/ok" "$tmp/urls.txt"
-loghas '[1-9][0-9]* links added from'
-
-# missing file: quoted name + a non-empty reason, never the old reasonless
-# "Could not include URL list: <name>". The reason is the stat() errno, not the
-# directory fallback literal (guards against dropping the errno lookup).
-run "$tmp/miss" "$tmp/nope.txt"
-loghas 'Could not include URL list "[^"]+": .+'
-lognot 'Could not include URL list: '
-lognot 'not a regular file'
-
-# a directory is rejected with our own reason (locale-independent)
-mkdir -p "$tmp/adir"
-run "$tmp/dir" "$tmp/adir"
-loghas 'Could not include URL list "[^"]+": not a regular file'
-
-# unreadable regular file: the fopen() errno arm fires, distinct from the
-# directory branch. Root bypasses mode 000, so skip it there.
-if test "$(id -u)" -ne 0; then
-    : >"$tmp/noperm.txt"
-    chmod 000 "$tmp/noperm.txt"
-    run "$tmp/perm" "$tmp/noperm.txt"
-    chmod 644 "$tmp/noperm.txt"
-    loghas 'Could not include URL list "[^"]+": .+'
-    lognot 'not a regular file'
-fi
-
-exit 0
--- a/tests/01_engine-filter.test
+++ b/tests/01_engine-filter.test
@@ -71,27 +71,3 @@ nomatch '*[\[\]]' '[' # not matched, despite the docs
 match '*[\[\]]' ']'   # only via the empty class-match + trailing ']'
 match '*[\[\]]' '[]'  # one of {'[','\'} then the trailing ']'
 nomatch '*[\[\]]' '[]x'
-
-# Size-based rules (-#test=filtersize <size> <string> <filter...>): a negative size
-# means the size is still unknown (scan time). A size exclusion must stay neutral
-# then, so the file is fetched and only cancelled once its size is known (#143).
-fsize() {
-    local want="$1"
-    shift
-    test "$(httrack -O /dev/null -#test=filtersize "$@")" == "$want" || exit 1
-}
-fsize 'verdict=allowed size_flag=0' -1 foo.jpg -* '+*.jpg' '-*.jpg*[<10]'   # scan time: keep
-fsize 'verdict=forbidden size_flag=1' 5 foo.jpg -* '+*.jpg' '-*.jpg*[<10]'  # <10KB: cancel
-fsize 'verdict=allowed size_flag=1' 20 foo.jpg -* '+*.jpg' '-*.jpg*[<10]'   # >=10KB: keep
-fsize 'verdict=forbidden size_flag=0' -1 foo.txt -* '+*.jpg' '-*.jpg*[<10]' # not a jpg
-# the '>' operator is just as neutral at scan time, and fires once size is known
-fsize 'verdict=allowed size_flag=0' -1 foo.jpg -* '+*.jpg' '-*.jpg*[>10]'   # scan time: keep
-fsize 'verdict=forbidden size_flag=1' 20 foo.jpg -* '+*.jpg' '-*.jpg*[>10]' # >10KB: cancel
-
-# [name]/[file]/[path] never span '?' mid-string; a trailing query is still
-# tolerated by the global '?' rule (same as plain *.aspx), not the class (#144).
-nomatch '*[path]/end' 'a?b/end'
-nomatch '*[file]end' 'foo?xend'
-nomatch '*[name]X' 'abc?X'
-match '*[file]' 'foo?x=1' # trailing query: tolerated, as for *.aspx
-match '*.aspx' 'page.aspx?y=2'
--- a/tests/01_engine-stripquery.test
+++ b/tests/01_engine-stripquery.test
@@ -1,8 +0,0 @@
-#!/bin/bash
-#
-
-set -euo pipefail
-
-# --strip-query: pattern-scoped query-key stripping for dedup. All assertions
-# live in the engine self-test (hts_query_strip_keys + fil_normalized_filtered).
-httrack -O /dev/null -#test=stripquery | grep -q "strip-query self-test OK"
--- a/tests/01_engine-urlhack.test
+++ b/tests/01_engine-urlhack.test
@@ -1,8 +0,0 @@
-#!/bin/bash
-#
-
-set -euo pipefail
-
-# -%u url-hack split (#271): www / // / query-order dedup toggle independently.
-# All assertions live in the engine self-test (hash compare flag resolution).
-httrack -O /dev/null -#test=urlhack run | grep -q "urlhack self-test OK"
--- a/tests/25_local-mime-exclude.test
+++ b/tests/25_local-mime-exclude.test
@@ -1,16 +0,0 @@
-#!/bin/bash
-#
-# A -mime: exclusion must abort the transfer on the response Content-Type, not
-# fetch the whole 1 MB body then discard it (#58). The bytes-received guard is
-# the real one: the file is absent either way, but only the fix keeps the count
-# tiny (header only) instead of pulling the body. Match it positively (a small,
-# <=4-digit count) so a vanished/reworded summary line fails rather than passes.
-
-: "${top_srcdir:=..}"
-
-bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
-    --found 'mimex/real.html' \
-    --not-found 'mimex/blob.pdf' \
-    --log-found 'excluded by MIME type filter' \
-    --log-found '\[[0-9]{1,4} bytes received' \
-    httrack 'BASEURL/mimex/index.html' '-mime:application/pdf'
--- a/tests/26_local-strip-query.test
+++ b/tests/26_local-strip-query.test
@@ -1,23 +0,0 @@
-#!/bin/bash
-#
-# End-to-end --strip-query (#112): two links to one resource differing only by
-# ?utm_source dedup to a single saved file (2 files written: index + resource);
-# the control crawl without the option keeps both variants (3 files). Locks the
-# CLI->opt->hash plumbing the engine self-test can't reach.
-
-set -e
-
-: "${top_srcdir:=..}"
-
-# stripped: the two ?utm_source variants collapse to one resource
-bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 2 \
-    httrack 'BASEURL/stripquery/index.html' --strip-query 'utm_source'
-
-# control: no stripping -> both query-named variants are saved
-bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 3 \
-    httrack 'BASEURL/stripquery/index.html'
-
-# strip still applies with url-hack off (-%u0): exercises the urlhack-off
-# savename branch, which must normalize the dedup key the same way the hash does
-bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 2 \
-    httrack 'BASEURL/stripquery/index.html' -%u0 --strip-query 'utm_source'
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,7 +5,6 @@ EXTRA_DIST = $(TESTS) crawl-test.sh run-all-tests.sh check-network.sh \
 	proxy-https-server.py \
 	local-crawl.sh local-server.py server.crt server.key \
 	server-root/simple/basic.html server-root/simple/link.html \
-	server-root/stripquery/index.html server-root/stripquery/a.html \
 	fixtures/cache-golden/hts-cache/new.zip

 TESTS_ENVIRONMENT =
@@ -35,7 +34,6 @@ TESTS = \
 	01_engine-dns.test \
 	01_engine-doitlog.test \
 	01_engine-entities.test \
-	01_engine-filelist.test \
 	01_engine-filter.test \
 	01_engine-hashtable.test \
 	01_engine-idna.test \
@@ -46,9 +44,7 @@ TESTS = \
 	01_engine-savename.test \
 	01_engine-selftest-dispatch.test \
 	01_engine-simplify.test \
-	01_engine-stripquery.test \
 	01_engine-strsafe.test \
-	01_engine-urlhack.test \
 	02_manpage-regen.test \
 	02_update-cache.test \
 	10_crawl-simple.test \
@@ -70,8 +66,6 @@ TESTS = \
 	21_local-intl-update.test \
 	22_local-broken-size.test \
 	23_local-errpage.test \
-	24_local-resume-overlap.test \
-	25_local-mime-exclude.test \
-	26_local-strip-query.test
+	24_local-resume-overlap.test

 CLEANFILES = check-network_sh.cache
--- a/tests/local-server.py
+++ b/tests/local-server.py
@@ -177,24 +177,6 @@ class Handler(SimpleHTTPRequestHandler):
        body, ctype = self.TYPE_MATRIX[path]
        self.send_raw(body, ctype)

-    # --- MIME-type exclusion abort (issue #58) -----------------------------
-    # A -mime:application/pdf filter must abort the transfer once the header
-    # arrives, not download the whole body and discard it.
-    def route_mimex_index(self):
-        self.send_html(
-            '\t<a href="blob.pdf">pdf</a>\n' '\t<a href="real.html">real</a>\n'
-        )
-
-    # 1 MB body: the fix aborts after the header, so httrack's "bytes received"
-    # stays tiny; without it the engine reads the body and the count jumps.
-    MIMEX_BLOB = b"%PDF-1.4\n" + b"\x00" * (1024 * 1024)
-
-    def route_mimex_blob(self):
-        self.send_raw(self.MIMEX_BLOB, "application/pdf")
-
-    def route_mimex_real(self):
-        self.send_raw(b"<html><body>real</body></html>", "text/html")
-
    # --- special chars in URLs across an update (issue #157) ---------------
    # A dotless, accented basename served as text/html (MediaWiki style). The
    # name the first crawl picks (.html) must survive the update pass.
@@ -373,9 +355,6 @@ class Handler(SimpleHTTPRequestHandler):
        "/errpage/good.html": route_errpage_good,
        "/errpage/missing.html": route_errpage_missing,
        "/errpage/empty.html": route_errpage_empty,
-        "/mimex/index.html": route_mimex_index,
-        "/mimex/blob.pdf": route_mimex_blob,
-        "/mimex/real.html": route_mimex_real,
    }

    # --- dispatch ----------------------------------------------------------
--- a/tests/server-root/stripquery/a.html
+++ b/tests/server-root/stripquery/a.html
@@ -1 +0,0 @@
-<html><body>resource A</body></html>
--- a/tests/server-root/stripquery/index.html
+++ b/tests/server-root/stripquery/index.html
@@ -1,5 +0,0 @@
-<html><body>
-Two links to one resource, differing only by a tracking parameter.
-<a href="a.html?utm_source=x">x</a>
-<a href="a.html?utm_source=y">y</a>
-</body></html>