tests: run 24_local-resume-overlap under set -e

Follow the golden rule for shell scripts: start with set -e so a non-last failure can't be masked. Guard the backgrounded-crawl kill/wait spots with || true so the expected SIGTERM exit doesn't abort the run. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>
Harden #198 fix: verify the truncate, assert the test hit the resume path
2026-06-26 12:07:54 +03:00 · 2026-06-26 09:37:15 +02:00 · 2026-06-26 09:26:10 +02:00 · 2026-06-26 09:11:31 +02:00 · 2026-06-26 08:05:59 +02:00 · 2026-06-26 06:46:59 +02:00
41 changed files with 2091 additions and 1081 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -33,8 +33,9 @@ the operational checklist: toolchain, invariants, and how to ship a change.
 - Be terse. Comment the why, in English; translate French comments you touch.
 - Strip AI tells from prose (em-dash overuse, rule-of-three, filler, vague
  attributions). Ref: Wikipedia "Signs of AI writing". Claude Code: `/humanizer`.
- Behavior change → add a test. Fast path: a hidden `httrack -#N` debug
-  subcommand (`htscoremain.c`) driven by a `tests/NN_*.test`, over a slow crawl.
+- Behavior change → add a test. Fast path: a hidden `httrack -#test=NAME` engine
+  self-test (registry in `htsselftest.c`; `-#test` lists them) driven by a
+  `tests/NN_*.test`, over a slow crawl.

 ## Review your change adversarially (strongly suggested)
 Before pushing, and when reviewing others, don't skim for bugs:
--- a/man/httrack.1
+++ b/man/httrack.1
@@ -3,7 +3,7 @@
 .\"
 .\" This file is generated by man/makeman.sh; do not edit by hand.
 .\" SPDX-License-Identifier: GPL-3.0-or-later
-.TH httrack 1 "13 June 2026" "httrack website copier"
+.TH httrack 1 "26 June 2026" "httrack website copier"
 .SH NAME
 httrack \- offline browser : copy websites to a local directory
 .SH SYNOPSIS
@@ -313,12 +313,8 @@ debug HTTP headers in logfile (\-\-debug\-headers)
 .SS Guru options: (do NOT use if possible)
 .IP \-#X
 *use optimized engine (limited memory boundary checks) (\-\-fast\-engine)
-.IP \-#0
-filter test (\-#0 '*.gif' 'www.bar.com/foo.gif') (\-\-debug\-testfilters <param>)
-.IP \-#1
-simplify test (\-#1 ./foo/bar/../foobar)
-.IP \-#2
-type test (\-#2 /foo/bar.php)
+.IP \-#test
+list engine self\-tests (run one with \-#test=NAME [args])
 .IP \-#C
 cache list (\-#C '*.com/spider*.gif' (\-\-debug\-cache <param>)
 .IP \-#R
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -56,7 +56,7 @@ whttrackrundir = $(bindir)
 whttrackrun_SCRIPTS = webhttrack

 libhttrack_la_SOURCES =  htscore.c htsparse.c htsback.c htscache.c \
-	htscache_selftest.c htsdns_selftest.c \
+	htscache_selftest.c htsdns_selftest.c htsselftest.c \
 	htscatchurl.c htsfilters.c htsftp.c htshash.c coucal/coucal.c \
 	htshelp.c htslib.c htscoremain.c \
 	htsname.c htsrobots.c htstools.c htswizard.c \
@@ -66,7 +66,7 @@ libhttrack_la_SOURCES =  htscore.c htsparse.c htsback.c htscache.c \
 	md5.c \
 	minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
 	hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \
-	htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htsdns_selftest.h htscatchurl.h  \
+	htsbasenet.h htsbauth.h htscache.h htscache_selftest.h htsdns_selftest.h htsselftest.h htscatchurl.h  \
 	htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h  \
 	htsfilters.h htsftp.h htsglobal.h htshash.h coucal/coucal.h \
 	htshelp.h htsindex.h htslib.h htsmd5.h \
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -57,7 +57,10 @@ Please visit our Website: http://www.httrack.com
 // DOS
 #include <process.h>            /* _beginthread, _endthread */
 #endif
+#include <io.h> /* _chsize_s */
+#define HTS_FTRUNCATE(fp, sz) _chsize_s(_fileno(fp), (sz))
 #else
+#define HTS_FTRUNCATE(fp, sz) ftruncate(fileno(fp), (sz))
 #endif

 #define VT_CLREOL       "\33[K"
@@ -3774,35 +3777,70 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
                    // xxc SI CHUNK VERIFIER QUE CA MARCHE??
                    if (back[i].r.statuscode == 206) {  // on nous envoie un morceau (la fin) coz une partie sur disque!
                      off_t sz = fsize_utf8(back[i].url_sav);
+                      /* RFC 7233: resume at the server's Content-Range start,
+                         not the offset we requested; a server may resume
+                         earlier and appending the overlap duplicates bytes
+                         (#198). */
+                      const LLint resume = back[i].r.crange_start;
+                      const hts_boolean range_ok =
+                          back[i].r.crange > 0 && resume >= 0 &&
+                          resume <= (LLint) sz &&
+                          back[i].r.crange_end + 1 == back[i].r.crange &&
+                          (back[i].r.totalsize < 0 ||
+                           back[i].r.totalsize ==
+                               back[i].r.crange_end - resume + 1);

 #if HDEBUG
                      printf("partial content: " LLintP " on disk..\n",
                             (LLint) sz);
 #endif
-                      if (sz >= 0) {
+                      if (sz >= 0 && range_ok) {
                        if (!is_hypertext_mime(opt, back[i].r.contenttype, back[i].url_sav)) {  // pas HTML
                          if (opt->getmode & HTS_GETMODE_NONHTML) {
                            filenote(&opt->state.strc, back[i].url_sav, NULL);  // noter fichier comme connu
                            file_notify(opt, back[i].url_adr, back[i].url_fil,
                                        back[i].url_sav, 0, 1,
                                        back[i].r.notmodified);
-                            back[i].r.out = FOPEN(fconv(catbuff, sizeof(catbuff), back[i].url_sav), "ab");       // append
+                            back[i].r.out =
+                                FOPEN(fconv(catbuff, sizeof(catbuff),
+                                            back[i].url_sav),
+                                      "r+b"); // resume in place
                            if (back[i].r.out && opt->cache != 0) {
-                              back[i].r.is_write = 1;   // écrire
-                              back[i].r.size = sz;      // déja écrit
-                              back[i].r.statuscode = HTTP_OK;   // Forcer 'OK'
+                              back[i].r.is_write = 1;
+                              back[i].r.size = resume; // bytes already on disk
+                              back[i].r.statuscode = HTTP_OK; // force 'OK'
                              if (back[i].r.totalsize >= 0)
-                                back[i].r.totalsize += sz;      // plus en fait
-                              fseek(back[i].r.out, 0, SEEK_END);        // à la fin
-                              /* create a temporary reference file in case of broken mirror */
-                              if (back_serialize_ref(opt, &back[i]) != 0) {
-                                hts_log_print(opt, LOG_WARNING,
-                                              "Could not create temporary reference file for %s%s",
-                                              back[i].url_adr, back[i].url_fil);
-                              }
+                                back[i].r.totalsize += resume; // -> full size
+                              // drop bytes past the resume point; a silent
+                              // failure could leave a stale tail, so on error
+                              // drop the partial and refetch the whole file
+                              if (HTS_FTRUNCATE(back[i].r.out,
+                                                (off_t) resume) != 0) {
+                                fclose(back[i].r.out);
+                                back[i].r.out = NULL;
+                                url_savename_refname_remove(
+                                    opt, back[i].url_adr, back[i].url_fil);
+                                UNLINK(back[i].url_sav);
+                                back[i].status = STATUS_READY;
+                                back_set_finished(sback, i);
+                                strcpybuff(back[i].r.msg,
+                                           "Can not truncate partial file, "
+                                           "restarting");
+                              } else {
+                                fseeko(back[i].r.out, (off_t) resume, SEEK_SET);
+                                /* create a temporary reference file in case of
+                                 * broken mirror */
+                                if (back_serialize_ref(opt, &back[i]) != 0) {
+                                  hts_log_print(opt, LOG_WARNING,
+                                                "Could not create temporary "
+                                                "reference file for %s%s",
+                                                back[i].url_adr,
+                                                back[i].url_fil);
+                                }
 #if HDEBUG
-                              printf("continue interrupted file\n");
+                                printf("continue interrupted file\n");
 #endif
+                              }
                            } else {    // On est dans la m**
                              back[i].status = STATUS_READY;    // terminé (voir plus loin)
                              back_set_finished(sback, i);
@@ -3814,17 +3852,18 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
                          FILE *fp =
                            FOPEN(fconv(catbuff, sizeof(catbuff), back[i].url_sav), "rb");
                          if (fp) {
-                            LLint alloc_mem = sz + 1;
+                            LLint alloc_mem = resume + 1;

                            if (back[i].r.totalsize >= 0)
                              alloc_mem += back[i].r.totalsize; // AJOUTER RESTANT!
                            if (deleteaddr(&back[i].r)
                                && (back[i].r.adr =
                                    (char *) malloct((size_t) alloc_mem))) {
-                              back[i].r.size = sz;
+                              back[i].r.size = resume;
                              if (back[i].r.totalsize >= 0)
-                                back[i].r.totalsize += sz;      // plus en fait
-                              if ((fread(back[i].r.adr, 1, sz, fp)) != sz) {
+                                back[i].r.totalsize += resume; // -> full size
+                              if ((fread(back[i].r.adr, 1, (size_t) resume,
+                                         fp)) != (size_t) resume) {
                                back[i].status = STATUS_READY;  // terminé (voir plus loin)
                                back_set_finished(sback, i);
                                strcpybuff(back[i].r.msg,
@@ -3842,14 +3881,30 @@ void back_wait(struct_back * sback, httrackp * opt, cache_back * cache,
                                         "No memory for partial file");
                            }
                            fclose(fp);
-                          } else {      // Argh.. 
+                          } else {                              // open failed
                            back[i].status = STATUS_READY;      // terminé (voir plus loin)
                            back_set_finished(sback, i);
                            strcpybuff(back[i].r.msg,
                                       "Can not open partial file");
                          }
                        }
-                      } else {  // Non trouvé??
+                      } else if (sz >=
+                                 0) { // unusable range -> restart whole file
+                        hts_log_print(opt, LOG_WARNING,
+                                      "Unusable partial-content range for %s%s "
+                                      "(have " LLintP " bytes, got " LLintP
+                                      "-" LLintP "/" LLintP "), restarting",
+                                      back[i].url_adr, back[i].url_fil,
+                                      (LLint) sz, back[i].r.crange_start,
+                                      back[i].r.crange_end, back[i].r.crange);
+                        url_savename_refname_remove(opt, back[i].url_adr,
+                                                    back[i].url_fil);
+                        UNLINK(back[i].url_sav);
+                        back[i].status = STATUS_READY;
+                        back_set_finished(sback, i);
+                        strcpybuff(back[i].r.msg,
+                                   "Unusable partial content, restarting");
+                      } else {                          // partial not found
                        back[i].status = STATUS_READY;  // terminé (voir plus loin)
                        back_set_finished(sback, i);
                        strcpybuff(back[i].r.msg, "Can not find partial file");
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -220,6 +220,25 @@ struct cache_back_zip_entry {
 	} \
 } while(0)

+/* A cache (new.zip) write failed: storage is gone (disk full / dropped share),
+   so the mirror is doomed too. Abort it via exit_xh, don't crash as assertf
+   did. */
+static void cache_zip_write_failed(httrackp *opt, cache_back *cache,
+                                   const char *what, int zErr) {
+  if (!cache->zipWriteFailed) {
+    cache->zipWriteFailed = HTS_TRUE;
+    if (check_fatal_io_errno()) {
+      hts_log_print(opt, LOG_ERROR,
+                    "Mirror aborted: disk full or filesystem problems");
+    } else {
+      hts_log_print(opt, LOG_ERROR,
+                    "Mirror aborted: cache write failed (%s): %s", what,
+                    hts_get_zerror(zErr));
+    }
+  }
+  opt->state.exit_xh = -1; /* fatal: stop the mirror, exit non-zero */
+}
+
 /* Ajout d'un fichier en cache */
 void cache_add(httrackp * opt, cache_back * cache, const htsblk * r,
               const char *url_adr, const char *url_fil, const char *url_save,
@@ -236,6 +255,10 @@ void cache_add(httrackp * opt, cache_back * cache, const htsblk * r,
  const char *url_save_suffix = url_save;
  int zErr;

+  /* already failed and aborting; don't touch the broken stream again */
+  if (cache->zipWriteFailed)
+    return;
+
  // robots.txt hack
  if (url_save == NULL) {
    dataincache = 0;            // testing links
@@ -346,9 +369,8 @@ void cache_add(httrackp * opt, cache_back * cache, const htsblk * r,
                                   */
                                  headers, (uInt) strlen(headers), NULL, 0, NULL,       /* comment */
                                  Z_DEFLATED, Z_DEFAULT_COMPRESSION)) != Z_OK) {
-    int zip_zipOpenNewFileInZip_failed = 0;
-
-    assertf(zip_zipOpenNewFileInZip_failed);
+    cache_zip_write_failed(opt, cache, "opening a cache entry", zErr);
+    return;
  }

  /* Write data in cache */
@@ -358,9 +380,8 @@ void cache_add(httrackp * opt, cache_back * cache, const htsblk * r,
        if ((zErr =
             zipWriteInFileInZip((zipFile) cache->zipOutput, r->adr,
                                 (int) r->size)) != Z_OK) {
-          int zip_zipWriteInFileInZip_failed = 0;
-
-          assertf(zip_zipWriteInFileInZip_failed);
+          cache_zip_write_failed(opt, cache, "writing to the cache", zErr);
+          return;
        }
      }
    } else {
@@ -381,9 +402,10 @@ void cache_add(httrackp * opt, cache_back * cache, const htsblk * r,
              if ((zErr =
                   zipWriteInFileInZip((zipFile) cache->zipOutput, buff,
                                       (int) nl)) != Z_OK) {
-                int zip_zipWriteInFileInZip_failed = 0;
-
-                assertf(zip_zipWriteInFileInZip_failed);
+                cache_zip_write_failed(opt, cache, "writing to the cache",
+                                       zErr);
+                fclose(fp);
+                return;
              }
            }
          } while(nl > 0);
@@ -397,16 +419,14 @@ void cache_add(httrackp * opt, cache_back * cache, const htsblk * r,

  /* Close */
  if ((zErr = zipCloseFileInZip((zipFile) cache->zipOutput)) != Z_OK) {
-    int zip_zipCloseFileInZip_failed = 0;
-
-    assertf(zip_zipCloseFileInZip_failed);
+    cache_zip_write_failed(opt, cache, "closing a cache entry", zErr);
+    return;
  }

  /* Flush */
  if ((zErr = zipFlush((zipFile) cache->zipOutput)) != 0) {
-    int zip_zipFlush_failed = 0;
-
-    assertf(zip_zipFlush_failed);
+    cache_zip_write_failed(opt, cache, "flushing the cache", zErr);
+    return;
  }
 }

--- a/src/htscache_selftest.c
+++ b/src/htscache_selftest.c
@@ -47,6 +47,7 @@ Please visit our Website: http://www.httrack.com
 #include "htslib.h"
 #include "htszlib.h"

+#include <errno.h>
 #include <stdio.h>
 #include <string.h>

@@ -316,6 +317,136 @@ static int disk_fallback_selftest(httrackp *opt) {
  return fail;
 }

+typedef struct {
+  size_t budget;  /**< bytes allowed through before writes start failing */
+  int fail_errno; /**< errno set on the failing write (ENOSPC, EIO, ...) */
+  int writes;     /**< zwrite call count, to detect re-entry into the stream */
+} writefail_inject;
+
+/* zwrite that copies until the budget runs out, then fails with inj->fail_errno
+   (the #174/#219 condition). Counts calls so the test can prove a flagged cache
+   never re-enters the stream. */
+static uLong selftest_failing_zwrite(voidpf opaque, voidpf stream,
+                                     const void *buf, uLong size) {
+  writefail_inject *inj = (writefail_inject *) opaque;
+
+  inj->writes++;
+  if (inj->budget >= (size_t) size) {
+    inj->budget -= (size_t) size;
+    return (uLong) fwrite(buf, 1, (size_t) size, (FILE *) stream);
+  }
+  errno = inj->fail_errno;
+  return 0; /* short write -> the minizip op returns an error */
+}
+
+/* Open a ZIP whose writes fail past inj->budget, so cache_add() hits an error.
+ */
+static zipFile selftest_open_failing_zip(const char *path,
+                                         writefail_inject *inj) {
+  zlib_filefunc_def ff;
+
+  fill_fopen_filefunc(&ff); /* real fopen/read/seek/close; ignores opaque */
+  ff.zwrite_file = selftest_failing_zwrite;
+  ff.opaque = inj;
+  return zipOpen2(path, APPEND_STATUS_CREATE, NULL, &ff);
+}
+
+/* Store one octet-stream body into `cache` (all-in-cache, body in the ZIP). */
+static void writefail_store(httrackp *opt, cache_back *cache, const char *fil,
+                            const char *body, size_t body_len) {
+  htsblk r;
+  char locbuf[4];
+  char *bodycopy = malloct(body_len);
+
+  hts_init_htsblk(&r);
+  r.statuscode = 200;
+  r.size = (LLint) body_len;
+  strcpybuff(r.msg, "OK");
+  strcpybuff(r.contenttype, "application/octet-stream");
+  locbuf[0] = '\0';
+  r.location = locbuf;
+  r.is_write = 0;
+  memcpy(bodycopy, body, body_len);
+  r.adr = bodycopy;
+  cache_add(opt, cache, &r, "example.com", fil, "example.com/blob.bin", 1,
+            NULL);
+  freet(bodycopy);
+}
+
+/* #174/#219: a failing cache write used to crash via assertf(); it must instead
+   stop the mirror (exit_xh = -1) without crashing. Assert that, plus the cache
+   is flagged and a sibling write doesn't re-enter the broken stream. */
+int cache_write_failure_selftest(httrackp *opt, const char *dir) {
+  int fail = 0;
+  char path[HTS_URLMAXSIZE];
+  /* incompressible + big, so deflate flushes (and fails) mid-write, before
+   * close */
+  static const size_t body_len = 256 * 1024;
+  char *body = malloct(body_len);
+  int phase;
+
+  gen_body(body, body_len, 1 /* incompressible */);
+  fconcat(path, sizeof(path), dir, "/wfail.zip");
+
+  /* phase 0: fail on the body write, fatal errno (ENOSPC, the disk-full
+     branch). phase 1: fail on the open, non-fatal errno (EIO, dropped-share
+     branch). Both must abort the mirror. */
+  for (phase = 0; phase < 2; phase++) {
+    cache_back cache;
+    writefail_inject inj;
+    int writes_after_fail;
+
+    inj.budget = (phase == 0) ? 4096 : 0;
+    inj.fail_errno = (phase == 0) ? ENOSPC : EIO;
+    inj.writes = 0;
+    memset(&cache, 0, sizeof(cache));
+    cache.type = 1;
+    cache.log = stderr;
+    cache.errlog = stderr;
+    cache.hashtable = coucal_new(0);
+    cache.zipOutput = selftest_open_failing_zip(path, &inj);
+    if (cache.zipOutput == NULL) {
+      fprintf(stderr, "cache-writefail: could not open injected ZIP\n");
+      fail++;
+      continue;
+    }
+
+    opt->state.exit_xh = 0; /* clear; the failing write must set it to -1 */
+    writefail_store(opt, &cache, "/blob.bin", body, body_len);
+    if (!cache.zipWriteFailed) {
+      fprintf(stderr, "cache-writefail: phase %d: write error not caught\n",
+              phase);
+      fail++;
+    }
+    if (opt->state.exit_xh != -1) {
+      fprintf(stderr,
+              "cache-writefail: phase %d: mirror not aborted (exit_xh=%d)\n",
+              phase, opt->state.exit_xh);
+      fail++;
+    }
+
+    /* a flagged cache must no-op a sibling write: no further backend write */
+    writes_after_fail = inj.writes;
+    writefail_store(opt, &cache, "/blob2.bin", body, 16);
+    if (inj.writes != writes_after_fail) {
+      fprintf(stderr,
+              "cache-writefail: phase %d: sibling write re-entered the broken "
+              "stream (%d extra backend writes)\n",
+              phase, inj.writes - writes_after_fail);
+      fail++;
+    }
+
+    if (cache.zipOutput != NULL) {
+      zipClose(cache.zipOutput,
+               NULL); /* best-effort; may fail on the backend */
+      cache.zipOutput = NULL;
+    }
+  }
+
+  freet(body);
+  return fail;
+}
+
 int cache_selftests(httrackp *opt, const char *dir) {
  int failures = 0;
  cache_back cache;
--- a/src/htscache_selftest.h
+++ b/src/htscache_selftest.h
@@ -52,6 +52,10 @@ int cache_selftests(httrackp *opt, const char *dir);
   committed file, never by the test). Returns the failed-check count. */
 int cache_golden_selftest(httrackp *opt, const char *dir, int regen);

+/* #174/#219: assert a failing cache write aborts the mirror cleanly instead of
+   crashing. Returns the failed-check count. */
+int cache_write_failure_selftest(httrackp *opt, const char *dir);
+
 #endif

 #endif
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -214,6 +214,8 @@ struct cache_back {
  cache_back_zip_entry *zipEntries;
  int zipEntriesOffs;
  int zipEntriesCapa;
+  hts_boolean
+      zipWriteFailed; /**< a cache write failed; stop touching the stream */
 };

 #ifndef HTS_DEF_FWSTRUCT_hash_struct
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -45,9 +45,7 @@ Please visit our Website: http://www.httrack.com
 #include "htsmodules.h"
 #include "htszlib.h"
 #include "htscharset.h"
-#include "htsencoding.h"
-#include "htscache_selftest.h"
-#include "htsdns_selftest.h"
+#include "htsselftest.h"
 #include "htsmd5.h"

 #include <ctype.h>
@@ -114,442 +112,6 @@ HTSEXT_API int hts_main(int argc, char **argv) {
  return ret;
 }

-// very minimalistic internal tests
-static void basic_selftests(void) {
-  // BUG 756328
-  const char *const source = "/intent/tweet?url=https%3A%2F%2Fwww.httrack.com%2Fvacatures%2F1562519%2Fmedewerker-data-services&text=Medewerker+Data+Services&via=httrackcom";
-  char buffer[1024];
-  fil_normalized(source, buffer);
-  // MD5 selftests
-  md5selftest();
-  // cookie_get field extraction (tab-separated, 0-based)
-  {
-    char cbuf[8192];
-
-    assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 0), "a") == 0);
-    assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 1), "b") == 0);
-    assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 2), "c") == 0);
-    // multi-char fields catch length/boundary bugs that 1-char fields hide
-    assertf(strcmp(cookie_get(cbuf, "host\tx\t/path/to", 0), "host") == 0);
-    assertf(strcmp(cookie_get(cbuf, "host\tx\t/path/to", 2), "/path/to") == 0);
-    assertf(strcmp(cookie_get(cbuf, "a\t\tc", 1), "") == 0);  // empty field
-    assertf(strcmp(cookie_get(cbuf, "a\tb\tc", 9), "") == 0); // beyond last
-  }
-  // back_infostr() status-line formatting (no sockets: pure formatting over
-  // in-memory slots). Stresses a few thousand entries across every status-code
-  // arm. Regression for a clobber bug where the size/totalsize trailer was
-  // written straight into the destination, wiping the URL it had just built.
-  {
-    static const struct {
-      int code;
-      const char *tag;
-    } cases[] = {
-        {200, "READY "},     {-1, "ERROR "},       {-2, "TIMEOUT "},
-        {-3, "TOOSLOW "},    {400, "BADREQUEST "}, {403, "FORBIDDEN "},
-        {404, "NOT FOUND "}, {500, "SERVERROR "},  {999, "ERROR(999)"},
-    };
-    const int ncases = (int) (sizeof(cases) / sizeof(cases[0]));
-    const int n = 2000;
-    lien_back *slots = calloct(n, sizeof(lien_back));
-    char line[HTS_URLMAXSIZE * 4 + 1024];
-    char expect[HTS_URLMAXSIZE * 4 + 1024];
-    struct_back sb;
-    int idx;
-
-    sb.lnk = slots;
-    sb.count = n;
-    sb.ready = NULL;
-    sb.ready_size_bytes = 0;
-    for (idx = 0; idx < n; idx++) {
-      lien_back *const slot = &slots[idx];
-
-      slot->r.location = slot->location_buffer;
-      slot->status = STATUS_READY;
-      slot->r.statuscode = cases[idx % ncases].code;
-      slot->r.size = idx;
-      slot->r.totalsize = idx + 1;
-      snprintf(slot->url_adr, sizeof(slot->url_adr), "http://h%d.example", idx);
-      snprintf(slot->url_fil, sizeof(slot->url_fil), "/p/%d.html", idx);
-    }
-    for (idx = 0; idx < n; idx++) {
-      line[0] = '\0';
-      back_infostr(&sb, idx, 3, line, sizeof(line));
-      // Exact match (not substring): pins tag/URL/trailer order and rejects a
-      // partial clobber, duplication, or truncation that a presence check would
-      // let through. The expected format is stated here independently.
-      snprintf(expect, sizeof(expect),
-               "%s\"http://h%d.example/p/%d.html\" " LLintP " " LLintP " ",
-               cases[idx % ncases].tag, idx, idx, (LLint) idx,
-               (LLint) (idx + 1));
-      assertf(strcmp(line, expect) == 0);
-    }
-    // Near-maximal URL, driven through back_info() (which owns the status
-    // buffer internally and prints to a FILE*). url_adr + url_fil together
-    // overrun the old HTS_URLMAXSIZE*2+1024 buffer, so the bounded appends
-    // would abort unless that buffer is sized to hold both fields. Regression
-    // for that sizing -- exercising back_infostr() directly would miss it,
-    // since the caller's buffer is what matters.
-    {
-      lien_back *const slot = &slots[0];
-      const size_t adrlen = sizeof(slot->url_adr) - 8;
-      const size_t fillen = sizeof(slot->url_fil) - 8;
-      FILE *const fp = tmpfile();
-      size_t got;
-
-      assertf(fp != NULL);
-      slot->status = STATUS_READY;
-      slot->r.statuscode = 200;
-      slot->r.size = 1;
-      slot->r.totalsize = 2;
-      memset(slot->url_adr, 'a', adrlen);
-      slot->url_adr[adrlen] = '\0';
-      slot->url_fil[0] = '/';
-      memset(slot->url_fil + 1, 'b', fillen - 1);
-      slot->url_fil[fillen] = '\0';
-      back_info(&sb, 0, 3, fp);
-      rewind(fp);
-      got = fread(line, 1, sizeof(line) - 1, fp);
-      line[got] = '\0';
-      fclose(fp);
-      snprintf(expect, sizeof(expect),
-               "READY \"%s%s\" " LLintP " " LLintP " " LF, slot->url_adr,
-               slot->url_fil, (LLint) 1, (LLint) 2);
-      assertf(strcmp(line, expect) == 0);
-    }
-    freet(slots);
-  }
-  // next_token(): in-place token scanner. Strips surrounding quotes, unescapes
-  // \" and \\ when flag is set, and returns the token terminator (the space, or
-  // NULL at end of string). The unquote/unescape rewrites the string in place
-  // by shifting left, so the result is always shorter -- regression for that
-  // compaction.
-  {
-    char tok[64];
-
-    // plain token: unchanged, returns a pointer AT the separating space (exact
-    // position, not just any space -- a strchr-style impl would land elsewhere
-    // once quotes shift the content)
-    strcpybuff(tok, "abc def");
-    {
-      char *const end = next_token(tok, 0);
-      assertf(end == tok + 3 && *end == ' ' && strcmp(tok, "abc def") == 0);
-    }
-    // surrounding quotes stripped, returns the (post-shift) trailing space
-    strcpybuff(tok, "\"ab\" cd");
-    {
-      char *const end = next_token(tok, 1);
-      assertf(end == tok + 2 && *end == ' ' && strcmp(tok, "ab cd") == 0);
-    }
-    // a space inside quotes does not end the token; end of string returns NULL
-    strcpybuff(tok, "\"a b\"c");
-    {
-      char *const end = next_token(tok, 1);
-      assertf(end == NULL && strcmp(tok, "a bc") == 0);
-    }
-    // \" and \\ are unescaped to literal " and \ in place
-    strcpybuff(tok, "\"a\\\"b\\\\c\"");
-    {
-      char *const end = next_token(tok, 1);
-      assertf(end == NULL && strcmp(tok, "a\"b\\c") == 0);
-    }
-    // unterminated quote: the opening quote is dropped, the rest survives, and
-    // the scan runs to the NUL (returns NULL)
-    strcpybuff(tok, "\"ab");
-    {
-      char *const end = next_token(tok, 1);
-      assertf(end == NULL && strcmp(tok, "ab") == 0);
-    }
-    // trailing lone backslash in a quote: *(p+1) is the NUL, not an escape, so
-    // the backslash is kept intact (and there is no over-read past the NUL)
-    strcpybuff(tok, "\"a\\");
-    {
-      char *const end = next_token(tok, 1);
-      assertf(end == NULL && strcmp(tok, "a\\") == 0);
-    }
-  }
-  // fil_normalized(): canonicalizes a URL path. Query arguments are sorted
-  // alphabetically (by the text after each '?'/'&') and the query is rebuilt
-  // through a bounded builder; outside the query, "//" collapses to "/".
-  // Regression for that builder.
-  {
-    char norm[256];
-
-    assertf(strcmp(fil_normalized("/p?b=2&a=1&c=3", norm), "/p?a=1&b=2&c=3") ==
-            0);
-    assertf(strcmp(fil_normalized("/a//b", norm), "/a/b") == 0);
-    // "//" is collapsed only before the query; inside the query it is kept
-    assertf(strcmp(fil_normalized("/a//b?x=c//d", norm), "/a/b?x=c//d") == 0);
-  }
-  // give_mimext(): mime type -> file extension, bounded into the caller buffer.
-  // Returns 1 when an extension was written, 0 otherwise.
-  {
-    char ext[16];
-
-    assertf(give_mimext(ext, sizeof(ext), "image/gif") == 1);
-    assertf(strcmp(ext, "gif") == 0);
-    assertf(give_mimext(ext, sizeof(ext), "text/html") == 1);
-    assertf(strcmp(ext, "html") == 0);
-    assertf(give_mimext(ext, sizeof(ext), "no/such-mime-type") == 0);
-    assertf(ext[0] == '\0');
-  }
-  // convtolower(): lower-cases into the caller buffer (bounded by its size).
-  {
-    char low[64];
-
-    assertf(strcmp(convtolower(low, sizeof(low), "ABC/Def.HTML"),
-                   "abc/def.html") == 0);
-  }
-  // cut_path(): splits a path into directory (with trailing '/') and basename,
-  // each bounded by its buffer size.
-  {
-    char path[256];
-    char pname[256];
-
-    {
-      char full[] = "/dir/sub/file.html";
-
-      cut_path(full, path, sizeof(path), pname, sizeof(pname));
-      assertf(strcmp(path, "/dir/sub/") == 0);
-      assertf(strcmp(pname, "file.html") == 0);
-    }
-    { // a trailing slash is trimmed before the split
-      char full[] = "/dir/sub/";
-
-      cut_path(full, path, sizeof(path), pname, sizeof(pname));
-      assertf(strcmp(path, "/dir/") == 0);
-      assertf(strcmp(pname, "sub") == 0);
-    }
-    { // a path of length <= 1 yields empty results
-      char full[] = "/";
-
-      cut_path(full, path, sizeof(path), pname, sizeof(pname));
-      assertf(path[0] == '\0' && pname[0] == '\0');
-    }
-  }
-  // get_httptype_sized(): a long MIME type (Office OOXML reaches 73 chars) is
-  // written whole into a contenttype-sized buffer; returns 1 on a match, 0 when
-  // flag==0 and nothing matched. Regression for the old contenttype[64]
-  // overflow.
-  {
-    httrackp *opt = hts_create_opt();
-    htsblk r; // write into the real struct field, not a stand-in
-
-    assertf(opt != NULL);
-    // a long MIME (Office OOXML reaches 73 chars) must fit htsblk.contenttype
-    // whole: a [64] field would make this bounded copy abort.
-    assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
-                               "deck.pptx", 0) == 1);
-    assertf(strcmp(r.contenttype,
-                   "application/vnd.openxmlformats-officedocument."
-                   "presentationml.presentation") == 0);
-    assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
-                               "x.gif", 0) == 1);
-    assertf(strcmp(r.contenttype, "image/gif") == 0);
-    // no extension and flag==0: nothing written, returns 0
-    assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
-                               "noextfile", 0) == 0);
-    assertf(r.contenttype[0] == '\0');
-    // no extension and flag==1: octet-stream fallback, returns 1
-    assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
-                               "noextfile", 1) == 1);
-    assertf(strcmp(r.contenttype, "application/octet-stream") == 0);
-    // a user --assume rule with an empty value matches but writes nothing:
-    // get_userhttptype returns 1 with the buffer empty, so get_httptype_sized
-    // must still report 0 (callers test the return like the old
-    // strnotempty(s)).
-    StringCopy(opt->mimedefs, "\ncgi=\n");
-    assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
-                               "/x.cgi", 0) == 0);
-    assertf(r.contenttype[0] == '\0');
-    StringCopy(opt->mimedefs, "\ncgi=text/html\n");
-    assertf(get_httptype_sized(opt, r.contenttype, sizeof(r.contenttype),
-                               "/x.cgi", 0) == 1);
-    assertf(strcmp(r.contenttype, "text/html") == 0);
-    hts_free_opt(opt);
-  }
-  // adr_normalized_sized(): bounded host normalization (passthrough when
-  // already normal).
-  {
-    char n[HTS_URLMAXSIZE];
-
-    assertf(strcmp(adr_normalized_sized("example.com", n, sizeof(n)),
-                   "example.com") == 0);
-  }
-  // standard_name(): builds "<name><md5?>.<ext>" into a bounded buffer. The md5
-  // is appended (4 chars) only when the URL has a query string (see url_md5),
-  // so test both; pin the structure (name + ext, lengths), not the md5 chars.
-  {
-    char b[HTS_URLMAXSIZE * 2];
-    const char *nom = "index.html"; // name part
-    const char *dot = nom + 5;      // points at ".html"
-    size_t len;
-
-    // no query -> no md5: "index" + ".html"
-    standard_name(b, sizeof(b), dot, nom, "http://example.com/index.html", 0);
-    assertf(strcmp(b, "index.html") == 0);
-    // query -> 4 md5 chars between name and ext: "index" + md5(4) + ".html"
-    standard_name(b, sizeof(b), dot, nom, "http://example.com/index.html?v=1",
-                  0);
-    len = strlen(b);
-    assertf(len == 5 + 4 + 5);
-    assertf(strncmp(b, "index", 5) == 0);
-    assertf(strcmp(b + len - 5, ".html") == 0);
-    // short names: name kept (<=8), the extension is clamped to 3 -> ".htm"
-    standard_name(b, sizeof(b), dot, nom, "http://example.com/index.html?v=1",
-                  1);
-    len = strlen(b);
-    assertf(len == 5 + 4 + 4);
-    assertf(strcmp(b + len - 4, ".htm") == 0);
-    // short names with a >8-char name: the name is clamped to 8 ("indexpag")
-    {
-      const char *lnom = "indexpage.html";
-      const char *ldot = lnom + 9; // points at ".html"
-
-      standard_name(b, sizeof(b), ldot, lnom,
-                    "http://example.com/indexpage.html?v=1", 1);
-      len = strlen(b);
-      assertf(len == 8 + 4 + 4);
-      assertf(strncmp(b, "indexpag", 8) == 0);
-      assertf(strcmp(b + len - 4, ".htm") == 0);
-    }
-  }
-  // longfile_to_83(): single-name 8-3 (mode 1) / ISO9660 (mode 2) conversion;
-  // uppercases, clamps the name (8 / 31) and the extension (3). It rewrites
-  // 'save' in place, so pass a mutable array.
-  {
-    char n83[256];
-
-    {
-      char save[] = "longfilename.html";
-
-      longfile_to_83(1, n83, sizeof(n83), save); // 8-3: name->8, ext->3
-      assertf(strcmp(n83, "LONGFILE.HTM") == 0);
-    }
-    {
-      char save[] = "longfilename.html";
-
-      longfile_to_83(2, n83, sizeof(n83), save); // ISO9660: name->31, ext->3
-      assertf(strcmp(n83, "LONGFILENAME.HTM") == 0);
-    }
-    { // sanitization: leading '.'->'_', interior dots
-      char save[] = ".a b.c.d e"; // collapse to '_', spaces/specials -> '_'
-                                  // (only the last dot stays as the separator)
-      longfile_to_83(1, n83, sizeof(n83), save);
-      assertf(strcmp(n83, "_A_B_C.D_E") == 0);
-    }
-  }
-  // long_to_83(): per-segment 8-3 conversion of a whole path.
-  {
-    char n83[HTS_URLMAXSIZE * 2];
-    char save[] = "dir/longfilename.html";
-
-    long_to_83(1, n83, sizeof(n83), save);
-    assertf(strcmp(n83, "DIR/LONGFILE.HTM") == 0);
-  }
-  // lienrelatif(): relative path from the directory of curr_fil to link.
-  {
-    char s[HTS_URLMAXSIZE * 2];
-
-    // same directory -> just the basename
-    assertf(lienrelatif(s, sizeof(s), "dir/page.html", "dir/index.html") == 0);
-    assertf(strcmp(s, "page.html") == 0);
-    // link one level up -> a "../" prefix
-    assertf(lienrelatif(s, sizeof(s), "a.html", "dir/index.html") == 0);
-    assertf(strcmp(s, "../a.html") == 0);
-  }
-}
-
-/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
-   Returns 0 if every bounded operation behaved correctly, 1 otherwise.
-   The abort-on-overflow guarantee is checked separately by the -#8 "overflow"
-   sub-mode (it aborts the process by design). */
-static int string_safety_selftests(void) {
-  char buf[8];
-
-  /* strcpybuff into a sized array: exact copy */
-  strcpybuff(buf, "abc");
-  if (strcmp(buf, "abc") != 0)
-    return 1;
-
-  /* strcatbuff append within capacity */
-  strcatbuff(buf, "de");
-  if (strcmp(buf, "abcde") != 0)
-    return 1;
-
-  /* strncatbuff appends at most N source chars */
-  strcpybuff(buf, "ab");
-  strncatbuff(buf, "cdef", 2);
-  if (strcmp(buf, "abcd") != 0)
-    return 1;
-
-  /* strlcpybuff: explicit-capacity copy into a pointer destination, the form
-     the migration moves toward */
-  {
-    char storage[8];
-    char *const p = storage;
-
-    strlcpybuff(p, "hello", sizeof(storage));
-    if (strcmp(p, "hello") != 0)
-      return 1;
-  }
-
-  /* strcpybuff into a pointer destination: routes through the unchecked
-     strcpybuff_ptr_ fallback (the path the -#8 warning flags). The warning is
-     intentional here; we only verify the fallback still copies correctly. */
-#if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattribute-warning"
-#endif
-  {
-    char storage[8];
-    char *const p = storage;
-
-    strcpybuff(p, "ptr");
-    if (strcmp(p, "ptr") != 0)
-      return 1;
-  }
-#if defined(__GNUC__)
-#pragma GCC diagnostic pop
-#endif
-
-  /* htsbuff: bounded builder over a fixed array (append, truncating append,
-     reset, and length tracking) */
-  {
-    char dst[8];
-    htsbuff b = htsbuff_array(dst);
-
-    htsbuff_cat(&b, "ab");
-    htsbuff_cat(&b, "cd");
-    if (strcmp(htsbuff_str(&b), "abcd") != 0 || b.len != 4)
-      return 1;
-
-    htsbuff_catn(&b, "efghij", 2);      /* append at most 2 */
-    if (strcmp(htsbuff_str(&b), "abcdef") != 0)
-      return 1;
-
-    htsbuff_cpy(&b, "xyz");             /* reset */
-    if (strcmp(htsbuff_str(&b), "xyz") != 0 || b.len != 3)
-      return 1;
-
-    htsbuff_catc(&b, '!'); /* single character */
-    if (strcmp(htsbuff_str(&b), "xyz!") != 0 || b.len != 4)
-      return 1;
-  }
-
-  /* boundary: filling to exactly cap-1 must succeed (one more aborts, which the
-     -#8 overflow-buff mode checks) */
-  {
-    char d2[4];
-    htsbuff c = htsbuff_array(d2);
-
-    htsbuff_cat(&c, "abc");
-    if (strcmp(htsbuff_str(&c), "abc") != 0 || c.len != 3)
-      return 1;
-  }
-
-  return 0;
-}
-
 static int hts_main_internal(int argc, char **argv, httrackp * opt);

 // Main, récupère les paramètres et appelle le robot
@@ -1344,6 +906,25 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
     }
   */

+  /* Engine self-tests: -#test lists them, -#test=NAME [args] runs one. Handled
+     here, ahead of the no-URL usage gate below, so they need no dummy URL. */
+  {
+    int k;
+
+    for (k = 1; k < argc; k++) {
+      const char *const a = argv[k];
+
+      if (a[0] == '-' && a[1] == '#' && strncmp(a + 2, "test", 4) == 0 &&
+          (a[6] == '\0' || a[6] == '=')) {
+        const char *const name = a[6] == '=' ? a + 7 : NULL;
+        const int code = hts_selftest(opt, name, argc - (k + 1), &argv[k + 1]);
+
+        htsmain_free();
+        return code;
+      }
+    }
+  }
+
  // Pas d'URL
 #if DEBUG_STEPS
  printf("Checking URLs\n");
@@ -2432,42 +2013,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
          case '#':{           // non documenté
              com++;
              switch (*com) {
-              case 'A': // cache self-test: httrack -#A <dir>
-                if (na + 1 < argc) {
-                  const int err = cache_selftests(opt, argv[na + 1]);
-
-                  printf("cache-selftest: %s\n", err ? "FAIL" : "OK");
-                  htsmain_free();
-                  return err;
-                } else {
-                  fprintf(stderr, "Option #A requires a directory argument\n");
-                  htsmain_free();
-                  return 1;
-                }
-                break;
-              case 'B': // golden cache fixture read: httrack -#B <dir> [regen]
-                if (na + 1 < argc) {
-                  const int regen =
-                      (na + 2 < argc && strcmp(argv[na + 2], "regen") == 0);
-                  const int err =
-                      cache_golden_selftest(opt, argv[na + 1], regen);
-
-                  printf("cache-golden: %s\n", err ? "FAIL" : "OK");
-                  htsmain_free();
-                  return err;
-                } else {
-                  fprintf(stderr, "Option #B requires a directory argument\n");
-                  htsmain_free();
-                  return 1;
-                }
-                break;
-              case 'D': { // DNS resolver/cache self-test (mock getaddrinfo)
-                const int err = dns_selftests(opt);
-
-                printf("dns-selftest: %s\n", err ? "FAIL" : "OK");
-                htsmain_free();
-                return err;
-              } break;
              case 'C':        // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
                {
                  int hasFilter = 0;
@@ -2765,468 +2310,6 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
                }
                break;

-              case '0':        /* test #0 : filters */
-                if (na + 2 >= argc) {
-                  HTS_PANIC_PRINTF
-                    ("Option #0 needs to be followed by a filter string and a string");
-                  printf("Example: '-#0' '*.gif' 'foo.gif'\n");
-                  htsmain_free();
-                  return -1;
-                } else {
-                  if (strjoker(argv[na + 2], argv[na + 1], NULL, NULL))
-                    printf("%s does match %s\n", argv[na + 2], argv[na + 1]);
-                  else
-                    printf("%s does NOT match %s\n", argv[na + 2],
-                           argv[na + 1]);
-                  htsmain_free();
-                  return 0;
-                }
-                break;
-              case '1':        /* test #1 : fil_simplifie */
-                if (na + 1 >= argc) {
-                  HTS_PANIC_PRINTF("Option #1 needs to be followed by an URL");
-                  printf("Example: '-#1' ./foo/bar/../foobar\n");
-                  htsmain_free();
-                  return -1;
-                } else {
-                  fil_simplifie(argv[na + 1]);
-                  printf("simplified=%s\n", argv[na + 1]);
-                  htsmain_free();
-                  return 0;
-                }
-                break;
-              case 'l': /* lienrelatif: relative link from curr_fil to link */
-                if (na + 2 >= argc) {
-                  HTS_PANIC_PRINTF(
-                      "Option #l needs a link and a current-file path");
-                  printf(
-                      "Example: '-#l' 'host/dir/img.gif' 'host/dir/p.html'\n");
-                  htsmain_free();
-                  return -1;
-                } else {
-                  char s[HTS_URLMAXSIZE * 2];
-
-                  if (lienrelatif(s, sizeof(s), argv[na + 1], argv[na + 2]) ==
-                      0)
-                    printf("relative=%s\n", s);
-                  else
-                    printf("relative=<ERROR>\n");
-                  htsmain_free();
-                  return 0;
-                }
-                break;
-              case 'i': /* ident_url_relatif: resolve a link -> adr/fil */
-                if (na + 3 >= argc) {
-                  HTS_PANIC_PRINTF(
-                      "Option #i needs a link, an origin address and file");
-                  printf("Example: '-#i' '../img.gif' 'www.foo.com' "
-                         "'/d/p.html'\n");
-                  htsmain_free();
-                  return -1;
-                } else {
-                  lien_adrfil af;
-                  const int r = ident_url_relatif(argv[na + 1], argv[na + 2],
-                                                  argv[na + 3], &af);
-
-                  if (r == 0)
-                    printf("adr=%s fil=%s\n", af.adr, af.fil);
-                  else
-                    printf("error=%d\n", r);
-                  htsmain_free();
-                  return 0;
-                }
-                break;
-              case '2':        // mimedefs
-                if (na + 1 >= argc) {
-                  HTS_PANIC_PRINTF("Option #2 needs to be followed by an URL");
-                  printf("Example: '-#2' /foo/bar.php\n");
-                  htsmain_free();
-                  return -1;
-                } else {
-                  char mime[256];
-
-                  // initialiser mimedefs
-                  //get_userhttptype(opt,1,opt->mimedefs,NULL);
-                  // check
-                  if (get_httptype_sized(opt, mime, sizeof(mime), argv[na + 1],
-                                         0)) {
-                    char ext[256];
-
-                    printf("%s is '%s'\n", argv[na + 1], mime);
-                    if (give_mimext(ext, sizeof(ext), mime)) {
-                      printf("and its local type is '.%s'\n", ext);
-                    }
-                  } else {
-                    printf("%s is of an unknown MIME type\n", argv[na + 1]);
-                  }
-                  htsmain_free();
-                  return 0;
-                }
-                break;
-              case '3':        // charset tests: httrack -#3 "iso-8859-1" "café"
-                if (++na + 1 < argc) {
-                  char *s =
-                    hts_convertStringToUTF8(argv[na+1], strlen(argv[na+1]), argv[na]);
-                  if (s != NULL) {
-                    printf("%s\n", s);
-                    free(s);
-                  } else {
-                    fprintf(stderr, "invalid string for charset %s\n", argv[na]);
-                  }
-                  na += 2;
-                } else {
-                  fprintf(stderr,
-                    "Option #3 needs to be followed by a charset and a string");
-                }
-                htsmain_free();
-                return 0;
-                break;
-              case '4':  // IDNA encoder: httrack -#4 "www.café.com"
-                if (++na < argc) {
-                  char *s = hts_convertStringUTF8ToIDNA(argv[na], strlen(argv[na]));
-                  if (s != NULL) {
-                    printf("%s\n", s);
-                    free(s);
-                  } else {
-                    fprintf(stderr, "invalid string '%s'\n", argv[na]);
-                  }
-                  na += 1;
-                } else {
-                  fprintf(stderr,
-                    "Option #4 needs to be followed by an IDNA string");
-                }
-                htsmain_free();
-                return 0;
-                break;
-              case '5':  // IDNA encoder: httrack -#5
-                if (++na < argc) {
-                  char *s = hts_convertStringIDNAToUTF8(argv[na], strlen(argv[na]));
-                  if (s != NULL) {
-                    printf("%s\n", s);
-                    free(s);
-                  } else {
-                    fprintf(stderr, "invalid string '%s'\n", argv[na]);
-                  }
-                  na += 1;
-                } else {
-                  fprintf(stderr,
-                    "Option #5 needs to be followed by an IDNA string");
-                }
-                htsmain_free();
-                return 0;
-                break;
-              case '6':  // entities: httrack -#6 "&foo;" ["encoding"]
-                if (++na < argc) {
-                  char *const s = strdup(argv[na]);
-                  const char *const enc = na + 1 < argc ? argv[na + 1] : "UTF-8";
-                  if (s != NULL 
-                    && hts_unescapeEntitiesWithCharset(s, s, strlen(s), 
-                                                       enc) == 0) {
-                    printf("%s\n", s);
-                    free(s);
-                  } else {
-                    fprintf(stderr, "invalid string '%s'\n", argv[na]);
-                  }
-                  na += 1;
-                } else {
-                  fprintf(stderr,
-                    "Option #6 needs to be followed by a string");
-                }
-                htsmain_free();
-                return 0;
-                break;
-              case '8':        /* string-safety selftest: httrack -#8 [overflow <bigstr>] */
-                if (na + 1 < argc
-                    && strncmp(argv[na + 1], "overflow", 8) == 0) {
-                  /* Deliberately exceed a sized buffer: the bounded op must
-                     abort. The source comes from argv so its length is opaque
-                     to the compiler (no static -Wstringop-overflow, genuine
-                     runtime check). "overflow-buff" exercises htsbuff. */
-                  char small[4];
-                  const char *const src =
-                    (na + 2 < argc) ? argv[na + 2] : "overflowing";
-
-                  if (strcmp(argv[na + 1], "overflow-buff") == 0) {
-                    htsbuff b = htsbuff_array(small);
-
-                    htsbuff_cat(&b, src);
-                  } else {
-                    strcpybuff(small, src);
-                  }
-                  printf("strsafe: NOT aborted\n");     /* must be unreachable */
-                  htsmain_free();
-                  return 1;
-                } else {
-                  const int err = string_safety_selftests();
-
-                  printf("strsafe: %s\n", err ? "FAIL" : "OK");
-                  htsmain_free();
-                  return err;
-                }
-                break;
-              case '7':  // hashtable selftest: httrack -#7 nb_entries
-                basic_selftests();
-                if (++na < argc) {
-                  char *const snum = strdup(argv[na]);
-                  unsigned long count = 0;
-                  const char *const names[] = {
-                    "", "add", "delete", "dry-add", "dry-del",
-                    "test-exists", "test-not-exist"
-                  };
-                  const struct {
-                    enum {
-                      DO_END,
-                      DO_ADD,
-                      DO_DEL,
-                      DO_DRY_ADD,
-                      DO_DRY_DEL,
-                      TEST_ADD,
-                      TEST_DEL
-                    } type;
-                    size_t modulus;
-                    size_t offset;
-                  } bench[] = {
-                    { DO_ADD, 4, 0 },     /* add 4/0 */
-                    { TEST_ADD, 4, 0 },   /* check 4/0 */
-                    { TEST_DEL, 4, 1 },   /* check 4/1 */
-                    { TEST_DEL, 4, 2 },   /* check 4/2 */
-                    { TEST_DEL, 4, 3 },   /* check 4/3 */
-                    { DO_DRY_DEL, 4, 1 }, /* del 4/1 */
-                    { DO_DRY_DEL, 4, 2 }, /* del 4/2 */
-                    { DO_DRY_DEL, 4, 3 }, /* del 4/3 */
-                    { DO_ADD, 4, 1 },     /* add 4/1 */
-                    { DO_DRY_ADD, 4, 1 }, /* add 4/1 */
-                    { TEST_ADD, 4, 0 },   /* check 4/0 */
-                    { TEST_ADD, 4, 1 },   /* check 4/1 */
-                    { TEST_DEL, 4, 2 },   /* check 4/2 */
-                    { TEST_DEL, 4, 3 },   /* check 4/3 */
-                    { DO_ADD, 4, 2 },     /* add 4/2 */
-                    { DO_DRY_DEL, 4, 3 }, /* del 4/3 */
-                    { DO_ADD, 4, 3 },     /* add 4/3 */
-                    { DO_DEL, 4, 3 },     /* del 4/3 */
-                    { TEST_ADD, 4, 0 },   /* check 4/0 */
-                    { TEST_ADD, 4, 1 },   /* check 4/1 */
-                    { TEST_ADD, 4, 2 },   /* check 4/2 */
-                    { TEST_DEL, 4, 3 },   /* check 4/3 */
-                    { DO_DEL, 4, 0 },     /* del 4/0 */
-                    { DO_DEL, 4, 1 },     /* del 4/1 */
-                    { DO_DEL, 4, 2 },     /* del 4/2 */
-                    /* empty here */
-                    { TEST_DEL, 1, 0 },   /* check */
-                    { DO_ADD, 4, 0 },     /* add 4/0 */
-                    { DO_ADD, 4, 1 },     /* add 4/1 */
-                    { DO_ADD, 4, 2 },     /* add 4/2 */
-                    { DO_DEL, 42, 0 },    /* add 42/0 */
-                    { TEST_DEL, 42, 0 },  /* check 42/0 */
-                    { TEST_ADD, 42, 2 },  /* check 42/2 */
-                    { DO_END }
-                  };
-                  char *buff = NULL;
-                  const char **strings = NULL;
-
-                  /* produce key #i */
-#define FMT() \
-                  char buffer[256]; \
-                  const char *name; \
-                  const long expected = (long) i * 1664525 + 1013904223; \
-                  do { \
-                    if (strings == NULL) { \
-                      snprintf(buffer, sizeof(buffer), \
-                        "http://www.example.com/website/sample/for/hashtable/" \
-                        "%ld/index.html?foo=%ld&bar", \
-                        (long) i, (long) (expected)); \
-                      name = buffer; \
-                    } else { \
-                      name = strings[i]; \
-                    } \
-                  } while(0)
-
-                  /* produce random patterns, or read from a file */
-                  if (sscanf(snum, "%lu", &count) != 1) {
-                    const off_t size = fsize(snum);
-                    FILE *fp = fopen(snum, "rb");
-                    if (fp != NULL) {
-                      buff = malloc(size);
-                      if (buff != NULL && fread(buff, 1, size, fp) == size) {
-                        size_t capa = 0;
-                        size_t i, last;
-                        for(i = 0, last = 0, count = 0 ; i < size ; i++) {
-                          if (buff[i] == 10 || buff[i] == 0) {
-                            buff[i] = '\0';
-                            if (capa == count) {
-                              if (capa == 0) {
-                                capa = 16;
-                              } else {
-                                capa <<= 1;
-                              }
-                              strings = (const char **) realloc((void*) strings, capa*sizeof(char*));
-                            }
-                            strings[count++] = &buff[last];
-                            last = i + 1;
-                          }
-                        }
-                      }
-                      fclose(fp);
-                    }
-                  }
-
-                  /* successfully read */
-                  if (count > 0) {
-                    coucal hashtable = coucal_new(0);
-                    size_t loop;
-                    for(loop = 0 ; bench[loop].type != DO_END ; loop++) {
-                      size_t i;
-                      for(i = bench[loop].offset ; i < (size_t) count
-                          ; i += bench[loop].modulus) {
-                        int result;
-                        FMT();
-                        if (bench[loop].type == DO_ADD
-                            || bench[loop].type == DO_DRY_ADD) {
-                          size_t k;
-                          result = coucal_write(hashtable, name, (uintptr_t) expected);
-                          for(k = 0 ; k < /* stash_size*2 */ 32 ; k++) {
-                            (void) coucal_write(hashtable, name, (uintptr_t) expected);
-                          }
-                          /* revert logic */
-                          if (bench[loop].type == DO_DRY_ADD) {
-                            result = result ? 0 : 1;
-                          }
-                        }
-                        else if (bench[loop].type == DO_DEL
-                            || bench[loop].type == DO_DRY_DEL) {
-                          size_t k;
-                          result = coucal_remove(hashtable, name);
-                          for(k = 0 ; k < /* stash_size*2 */ 32 ; k++) {
-                            (void) coucal_remove(hashtable, name);
-                          }
-                          /* revert logic */
-                          if (bench[loop].type == DO_DRY_DEL) {
-                            result = result ? 0 : 1;
-                          }
-                        }
-                        else if (bench[loop].type == TEST_ADD
-                            || bench[loop].type == TEST_DEL) {
-                          intptr_t value = -1;
-                          result = coucal_readptr(hashtable, name, &value);
-                          if (bench[loop].type == TEST_ADD && result
-                              && value != expected) {
-                            fprintf(stderr, "value failed for %s (expected %ld, got %ld)\n",
-                                    name, (long) expected, (long) value);
-                            exit(EXIT_FAILURE);
-                          }
-                          /* revert logic */
-                          if (bench[loop].type == TEST_DEL) {
-                            result = result ? 0 : 1;
-                          }
-                        }
-                        if (!result) {
-                          fprintf(stderr, "failed %s{%d/+%d} test on loop %ld"
-                                  " at offset %ld for %s\n",
-                                  names[bench[loop].type],
-                                  (int) bench[loop].modulus,
-                                  (int) bench[loop].offset,
-                                  (long) loop, (long) i, name);
-                          exit(EXIT_FAILURE);
-                        }
-                      }
-                    }
-                    coucal_delete(&hashtable);
-                    fprintf(stderr, "all hashtable tests were successful!\n");
-                  } else {
-                    fprintf(stderr, "Malformed number\n");
-                    exit(EXIT_FAILURE);
-                  }
-#undef FMT
-                } else {
-                  fprintf(stderr,
-                    "Option #7 needs to be followed by a number");
-                  exit(EXIT_FAILURE);
-                }
-                htsmain_free();
-                return 0;
-                break;
-              case '9': { // copy_htsopt selftest: httrack -#9
-                httrackp *from = hts_create_opt();
-                httrackp *to = hts_create_opt();
-                int err = 0;
-
-                /* from-values differ from both the to-values and the
-                   hts_create_opt() defaults (nearlink FALSE, errpage/parseall
-                   TRUE), so a copy that no-ops or just resets to defaults is
-                   caught too, not only the unsigned-guard bug. */
-                from->retry = 7; /* int field: positive control */
-                to->retry = 0;
-                from->nearlink = HTS_TRUE;
-                to->nearlink = HTS_FALSE;
-                from->errpage = HTS_FALSE;
-                to->errpage = HTS_TRUE;
-                from->parseall = HTS_FALSE;
-                to->parseall = HTS_TRUE;
-
-                copy_htsopt(from, to);
-
-                if (to->retry != 7)
-                  err = 1;
-                if (to->nearlink != HTS_TRUE)
-                  err = 1;
-                if (to->errpage != HTS_FALSE)
-                  err = 1;
-                if (to->parseall != HTS_FALSE)
-                  err = 1;
-
-                /* HTS_DEFAULT (-1) is "unspecified": copy_htsopt must skip it,
-                   leaving the target intact. Only a signed (int-backed) field
-                   can hold -1, so this also guards the type against regressing
-                   to an unsigned hts_boolean. */
-                from->parseall = HTS_DEFAULT;
-                to->parseall = HTS_TRUE;
-                copy_htsopt(from, to);
-                if (to->parseall != HTS_TRUE)
-                  err = 1;
-
-                hts_free_opt(from);
-                hts_free_opt(to);
-                printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
-                htsmain_free();
-                return err;
-              } break;
-              case 'Q': { // cookie request-header selftest: httrack -#Q
-                static t_cookie cookie;
-                char hdr[1024];
-                /* RFC 6265: bare name=value pairs, no $Version/$Path (#151). */
-                const char *expected = "Cookie: name=value; has_js=1" H_CRLF;
-                int err = 0;
-
-                const char *dom = "www.example.com";
-                int added;
-
-                cookie.max_len = (int) sizeof(cookie.data);
-                cookie.data[0] = '\0';
-                added = cookie_add(&cookie, "name", "value", dom, "/");
-                added |= cookie_add(&cookie, "has_js", "1", dom, "/");
-                /* different domain: must be filtered out */
-                added |= cookie_add(&cookie, "junk", "x", "other.org", "/");
-                if (added) {
-                  printf("cookie-header: FAIL (cookie_add setup)\n");
-                  htsmain_free();
-                  return 1;
-                }
-
-                http_cookie_header_selftest(&cookie, dom, "/", hdr,
-                                            sizeof(hdr));
-                if (strcmp(hdr, expected) != 0)
-                  err = 1;
-                if (strstr(hdr, "$Version") != NULL ||
-                    strstr(hdr, "$Path") != NULL)
-                  err = 1;
-                if (strstr(hdr, "junk") != NULL) // wrong-domain cookie leaked
-                  err = 1;
-                printf("cookie-header: %s\n", err ? "FAIL" : "OK");
-                if (err)
-                  printf("  got: %s\n", hdr);
-                htsmain_free();
-                return err;
-              } break;
              case '!':
                HTS_PANIC_PRINTF
                  ("Option #! is disabled for security reasons");
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -646,9 +646,7 @@ void help(const char *app, int more) {
  infomsg("");
  infomsg("Guru options: (do NOT use if possible)");
  infomsg(" #X *use optimized engine (limited memory boundary checks)");
-  infomsg(" #0  filter test (-#0 '*.gif' 'www.bar.com/foo.gif')");
-  infomsg(" #1  simplify test (-#1 ./foo/bar/../foobar)");
-  infomsg(" #2  type test (-#2 /foo/bar.php)");
+  infomsg(" #test  list engine self-tests (run one with -#test=NAME [args])");
  infomsg(" #C  cache list (-#C '*.com/spider*.gif'");
  infomsg(" #R  cache repair (damaged cache)");
  infomsg(" #d  debug parser");
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -4177,9 +4177,10 @@ HTSEXT_API hts_boolean get_httptype_sized(httrackp *opt, char *s, size_t ssize,
    /* Check html -> text/html */
    const char *a = fil + strlen(fil) - 1;

-    while((*a != '.') && (*a != '/') && (a > fil))
+    /* a < fil when fil is empty: bound before dereferencing */
+    while ((a > fil) && (*a != '.') && (*a != '/'))
      a--;
-    if (*a == '.' && strlen(a) < 32) {
+    if (a >= fil && *a == '.' && strlen(a) < 32) {
      int j = 0;

      a++;
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -760,9 +760,9 @@ int url_savename(lien_adrfilsave *const afs,
        strcatbuff(fil, DEFAULT_HTML);  // nommer page par défaut (à priori ici html depuis un proxy http)
    }
  }
-  // Changer extension?
-  // par exemple, php3 sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
-  if (ext_chg && !opt->no_type_change) {                // changer ext
+  // Change the extension? e.g. php3 saved as html, cgi as html or gif/xbm
+  // depending on the resolved type.
+  if (ext_chg && !opt->no_type_change) {
    char *a = fil + strlen(fil) - 1;

    if ((opt->debug > 1) && (opt->log != NULL)) {
@@ -774,11 +774,19 @@ int url_savename(lien_adrfilsave *const afs,
                      adr_complete, fil_complete, ext);
    }
    if (ext_chg == 1) {
+      // Cut the old extension only when it is empty (a bare trailing dot), the
+      // new one, or a recognized one; an unknown trailing ".token" (e.g.
+      // /article-1.884291, #115) is part of the name, not an extension.
+      const char *const old_ext = get_ext(catbuff, sizeof(catbuff), fil);
+      const int known_ext = !*old_ext || strfield2(old_ext, ext) ||
+                            is_knowntype(opt, fil) || is_dyntype(old_ext) ||
+                            ishtml_ext(old_ext) != -1;
+
      while((a > fil) && (*a != '.') && (*a != '/'))
        a--;
-      if (*a == '.')
-        *a = '\0';              // couper
-      strcatbuff(fil, ".");     // recopier point
+      if (*a == '.' && known_ext)
+        *a = '\0';          // cut
+      strcatbuff(fil, "."); // re-add the dot
    } else {
      while((a > fil) && (*a != '/'))
        a--;
@@ -786,7 +794,7 @@ int url_savename(lien_adrfilsave *const afs,
        a++;
      *a = '\0';
    }
-    strcatbuff(fil, ext);       // copier ext/nom
+    strcatbuff(fil, ext); // append ext/name
  }
  // Rechercher premier / et dernier .
  {
@@ -1721,10 +1729,10 @@ char *url_savename_refname_fullpath(httrackp * opt, const char *adr,
    StringBuff(opt->path_log), digest_filename);
 }

-/* remove refname if any */
-void url_savename_refname_remove(httrackp * opt, const char *adr,
-                                 const char *fil) {
+/* remove refname if any; HTS_TRUE if it was removed */
+hts_boolean url_savename_refname_remove(httrackp *opt, const char *adr,
+                                        const char *fil) {
  char *filename = url_savename_refname_fullpath(opt, adr, fil);

-  (void) UNLINK(filename);
+  return UNLINK(filename) == 0 ? HTS_TRUE : HTS_FALSE;
 }
--- a/src/htsname.h
+++ b/src/htsname.h
@@ -104,8 +104,9 @@ char *url_md5(char *digest_buffer, const char *fil_complete);
 void url_savename_refname(const char *adr, const char *fil, char *filename);
 char *url_savename_refname_fullpath(httrackp * opt, const char *adr,
                                    const char *fil);
-void url_savename_refname_remove(httrackp * opt, const char *adr,
-                                 const char *fil);
+/* Remove the temp-ref for (adr,fil); HTS_TRUE if it was removed. */
+hts_boolean url_savename_refname_remove(httrackp *opt, const char *adr,
+                                        const char *fil);
 #endif

 #endif
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -3749,44 +3749,60 @@ int hts_mirror_check_moved(htsmoduleStruct * str,

      }                         // bloc
      // erreur HTTP (ex: 404, not found)
-    } else if ((r->statuscode == HTTP_PRECONDITION_FAILED)
-               || (r->statuscode == HTTP_REQUESTED_RANGE_NOT_SATISFIABLE)
-      ) {                       // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
-      if (fexist_utf8(heap(ptr)->sav)) {
-        remove(heap(ptr)->sav);        // Eliminer
-      } else {
-        hts_log_print(opt, LOG_WARNING,
-                      "Unexpected 412/416 error (%s) for %s%s, '%s' could not be found on disk",
-                      r->msg, urladr(), urlfil(),
-                      heap(ptr)->sav != NULL ? heap(ptr)->sav : "");
+    } else if ((r->statuscode == HTTP_PRECONDITION_FAILED) ||
+               (r->statuscode == HTTP_REQUESTED_RANGE_NOT_SATISFIABLE)) {
+      // 412/416: the resume partial is stale; re-get the whole file (#206)
+      lien_back *itemback = NULL;
+      int had_partial = 0;
+      int ref_existed = 0;
+      int ref_gone;
+
+      // Drop the temp-ref, its partial, and heap->sav so the re-get carries no
+      // Range; else back_add rebuilds the same Range and loops.
+      if (back_unserialize_ref(opt, heap(ptr)->adr, heap(ptr)->fil,
+                               &itemback) == 0) {
+        had_partial = 1;
+        ref_existed = 1;
+        // best-effort: an orphaned partial cannot re-Range once the ref is gone
+        if (fexist_utf8(itemback->url_sav))
+          (void) UNLINK(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
+                              itemback->url_sav));
+        back_clear_entry(itemback);
+        freet(itemback);
      }
-      if (!fexist_utf8(heap(ptr)->sav)) {    // Bien éliminé? (sinon on boucle..)
-#if HDEBUG
-        printf("Partial content NOT up-to-date, reget all file for %s\n",
-               heap(ptr)->sav);
-#endif
+      // don't re-record if the ref survived (it would re-Range and loop)
+      ref_gone =
+          url_savename_refname_remove(opt, heap(ptr)->adr, heap(ptr)->fil) ||
+          !ref_existed;
+      if (fexist_utf8(heap(ptr)->sav)) {
+        had_partial = 1;
+        remove(heap(ptr)->sav);
+      }
+
+      // Re-get once, only if a partial existed and both Range triggers are
+      // gone; a failed removal gives up rather than looping. range_used is
+      // unreliable (it does not survive the delayed-type two-pass).
+      if (had_partial && ref_gone && !fexist_utf8(heap(ptr)->sav)) {
        hts_log_print(opt, LOG_DEBUG, "Partial file reget (%s) for %s%s",
                      r->msg, urladr(), urlfil());
-        // enregistrer le MEME lien
        if (hts_record_link(opt, heap(ptr)->adr, heap(ptr)->fil, heap(ptr)->sav, "", "", NULL)) {
-          heap_top()->testmode = heap(ptr)->testmode;   // mode test?
-          heap_top()->link_import = 0;   // pas mode import
+          heap_top()->testmode = heap(ptr)->testmode;
+          heap_top()->link_import = 0;
          heap_top()->depth = heap(ptr)->depth;
          heap_top()->pass2 = max(heap(ptr)->pass2, numero_passe);
          heap_top()->retry = heap(ptr)->retry;
          heap_top()->premier = heap(ptr)->premier;
          heap_top()->precedent = ptr;
-          //
-          // canceller lien actuel
          error = 1;
-          hts_invalidate_link(opt, ptr);  // invalidate hashtable entry
-          //
-        } else {              // oups erreur, plus de mémoire!!
-          XH_uninit;          // désallocation mémoire & buffers
+          hts_invalidate_link(opt, ptr); // invalidate hashtable entry
+        } else {                         // out of memory
+          XH_uninit;
          return 0;
        }
      } else {
-        hts_log_print(opt, LOG_ERROR, "Can not remove old file %s", urlfil());
+        hts_log_print(opt, LOG_WARNING,
+                      "Giving up on partial reget (%s) for %s%s", r->msg,
+                      urladr(), urlfil());
        error = 1;
      }

--- a/src/htsselftest.c
+++ b/src/htsselftest.c
--- a/src/htsselftest.h
+++ b/src/htsselftest.h
@@ -0,0 +1,52 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) 2026 Xavier Roche and other contributors
+
+SPDX-License-Identifier: GPL-3.0-or-later
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Ethical use: we kindly ask that you NOT use this software to harvest email
+addresses or to collect any other private information about people. Doing so
+would dishonor our work and waste the many hours we have spent on it.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: htsselftest.h                                          */
+/*       named dispatch for the hidden engine self-tests        */
+/* Author: Xavier Roche                                         */
+/* ------------------------------------------------------------ */
+
+#ifndef HTSSELFTEST_DEFH
+#define HTSSELFTEST_DEFH
+
+#ifdef HTS_INTERNAL_BYTECODE
+
+#ifndef HTS_DEF_FWSTRUCT_httrackp
+#define HTS_DEF_FWSTRUCT_httrackp
+typedef struct httrackp httrackp;
+#endif
+
+/* Run engine self-test `name` over the positional args argv[0..argc-1], or list
+   the available tests when name is NULL, empty, or "list". Prints the result;
+   returns the process exit code (0 == success). The caller owns option cleanup.
+   Reached through the hidden `httrack -#test[=NAME ...]` subcommand. */
+int hts_selftest(httrackp *opt, const char *name, int argc, char **argv);
+
+#endif
+
+#endif
--- a/tests/01_engine-cache-golden.test
+++ b/tests/01_engine-cache-golden.test
@@ -4,7 +4,7 @@
 # POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms and GNU-only
 # tool flags despite the #!/bin/bash above.

-# Golden cache-format regression test (driven by 'httrack -#B <dir>').
+# Golden cache-format regression test (driven by 'httrack -#test=cache-golden <dir>').
 #
 # 01_engine-cache.test writes the cache with the same build it reads back (a
 # round-trip), so it cannot catch a read-path or ZIP-format regression where
@@ -13,7 +13,7 @@
 # byte-exact.
 #
 # Regenerate the fixture after a deliberate format change with
-# 'httrack -#B <dir> regen', then copy <dir>/hts-cache/new.zip over the
+# 'httrack -#test=cache-golden <dir> regen', then copy <dir>/hts-cache/new.zip over the
 # committed file.

 set -eu
@@ -37,11 +37,11 @@ trap 'rm -rf "$dir"' EXIT
 mkdir -p "$dir/hts-cache"
 cp "$fixture/hts-cache/new.zip" "$dir/hts-cache/new.zip"

-out=$(httrack -#B "$dir")
+out=$(httrack -#test=cache-golden "$dir")

 # Match the exact success line: the read must have found and verified every
-# entry, not merely failed to enter the mode (a bad -#B falls through to the
-# usage screen, which also exits non-zero but never prints this).
+# entry, not merely failed to enter the mode (a renamed/removed test prints the
+# registry to stderr, which also exits non-zero but never prints this).
 test "$out" = "cache-golden: OK" || {
    echo "expected 'cache-golden: OK', got: $out" >&2
    exit 1
--- a/tests/01_engine-cache-writefail.test
+++ b/tests/01_engine-cache-writefail.test
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Keep this POSIX-portable: the harness runs it via $(BASH), which is a plain
+# POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms and GNU-only
+# tool flags despite the #!/bin/bash above.
+
+# Cache write-failure handling (httrack -#test=cache-writefail <dir>). #174/#219.
+# A failing new.zip write (disk full) used to crash the process via assertf; it
+# must instead stop the mirror with a fatal error (exit_xh=-1), no crash. The
+# self-test asserts that; reverting the fix makes -#test=cache-writefail abort (SIGABRT) and fail.
+
+set -eu
+
+dir=$(mktemp -d)
+trap 'rm -rf "$dir"' EXIT
+
+out=$(httrack -#test=cache-writefail "$dir")
+
+# Match the exact success line (error logs also go to stdout); a renamed/removed
+# test prints the registry to stderr, which exits non-zero but never prints this.
+printf '%s\n' "$out" | grep -qx "cache-writefail: OK" || {
+    echo "expected 'cache-writefail: OK', got: $out" >&2
+    exit 1
+}
--- a/tests/01_engine-cache.test
+++ b/tests/01_engine-cache.test
@@ -4,7 +4,7 @@
 # POSIX /bin/sh on some platforms (e.g. macOS), so avoid bashisms and GNU-only
 # tool flags despite the #!/bin/bash above.

-# Cache create/read/update logic (driven by 'httrack -#A <dir>').
+# Cache create/read/update logic (driven by 'httrack -#test=cache <dir>').
 #
 # The in-process self-test stores several hand-crafted edge entries (normal
 # HTML, an empty redirect with a near-limit location, a non-HTML body kept via
@@ -20,13 +20,13 @@ set -eu
 dir=$(mktemp -d)
 trap 'rm -rf "$dir"' EXIT

-# Like the other -# debug modes, a trailing token (the working directory) is
-# required; a bare '-#A' falls through to the usage screen.
-out=$(httrack -#A "$dir")
+# The working directory is a required argument; without it the test prints a
+# usage line to stderr and returns non-zero.
+out=$(httrack -#test=cache "$dir")

 # Match the exact success line, so the test cannot pass for an unrelated reason
-# (e.g. the -#A mode being gone and falling through to the usage screen, which
-# also exits non-zero but never prints this).
+# (e.g. the cache test being gone, which prints the registry to stderr but
+# never prints this line).
 test "$out" = "cache-selftest: OK" || {
    echo "expected 'cache-selftest: OK', got: $out" >&2
    exit 1
--- a/tests/01_engine-charset.test
+++ b/tests/01_engine-charset.test
@@ -4,13 +4,13 @@
 set -euo pipefail

 # charset -> UTF-8 conversion (hts_convertStringToUTF8).
-# -#3 <charset> <string> prints the string re-decoded from <charset> as UTF-8.
+# -#test=charset <charset> <string> prints the string re-decoded from <charset> as UTF-8.
 conv() {
-    test "$(httrack -O /dev/null -#3 "$1" "$2")" == "$3" || exit 1
+    test "$(httrack -O /dev/null -#test=charset "$1" "$2")" == "$3" || exit 1
 }
 # crash probe: malformed input must exit cleanly, not abort.
 runs() {
-    httrack -O /dev/null -#3 "$1" "$2" >/dev/null 2>&1 || exit 1
+    httrack -O /dev/null -#test=charset "$1" "$2" >/dev/null 2>&1 || exit 1
 }

 # the source bytes below are UTF-8 (this file is UTF-8); "café" is 0x63 61 66 C3 A9.
@@ -31,7 +31,7 @@ conv 'us-ascii' 'hello' 'hello'
 # unknown charset: ASCII passes through unchanged, but non-ASCII input cannot be
 # decoded and yields empty output (an error is printed to stderr).
 conv 'no-such-charset-xyz' 'abc' 'abc'
-test "$(httrack -O /dev/null -#3 'no-such-charset-xyz' 'café' 2>/dev/null)" == "" || exit 1
+test "$(httrack -O /dev/null -#test=charset 'no-such-charset-xyz' 'café' 2>/dev/null)" == "" || exit 1

 # malformed UTF-8 (lone continuation byte, truncated lead byte) must not crash
 runs 'utf-8' $'\x80'
--- a/tests/01_engine-cookies.test
+++ b/tests/01_engine-cookies.test
@@ -1,14 +1,15 @@
 #!/bin/bash
 #
 # Issue #151 guard: the request Cookie header must be bare RFC 6265 name=value
-# pairs, no $Version/$Path attributes. Driven by the 'httrack -#Q' selftest.
+# pairs, no $Version/$Path attributes. Driven by the 'httrack -#test=cookies' selftest.

 set -eu

-# A trailing token is required; a bare '-#Q' falls through to the usage screen.
-out=$(httrack -#Q run)
+# 'run' is an ignored placeholder argument.
+out=$(httrack -#test=cookies run)

-# Exact-match the success line so a fall-through to usage can't pass the test.
+# Exact-match the success line so a renamed/removed test (it prints the registry
+# to stderr) can't pass.
 test "$out" = "cookie-header: OK" || {
    echo "expected 'cookie-header: OK', got: $out" >&2
    exit 1
--- a/tests/01_engine-copyopt.test
+++ b/tests/01_engine-copyopt.test
@@ -2,15 +2,16 @@
 #
 # Regression guard for the unsigned-enum sentinel trap: copy_htsopt's
 # `if (from->X > -1)` guard is always false for unsigned hts_boolean fields, so
-# they silently stop being copied. Driven by the in-process 'httrack -#9' test.
+# they silently stop being copied. Driven by the in-process 'httrack -#test=copyopt' test.
 # Keep POSIX-portable (harness runs it via $(BASH), a plain /bin/sh on macOS).

 set -eu

-# A trailing token is required; a bare '-#9' falls through to the usage screen.
-out=$(httrack -#9 run)
+# 'run' is an ignored placeholder argument.
+out=$(httrack -#test=copyopt run)

-# Exact-match the success line so a fall-through to usage can't pass the test.
+# Exact-match the success line so a renamed/removed test (it prints the registry
+# to stderr) can't pass.
 test "$out" = "copy-htsopt: OK" || {
    echo "expected 'copy-htsopt: OK', got: $out" >&2
    exit 1
--- a/tests/01_engine-dns.test
+++ b/tests/01_engine-dns.test
@@ -5,9 +5,8 @@ set -euo pipefail

 # DNS resolver/cache self-test: a mock getaddrinfo (no network) checks address
 # family, single-address selection, the -@i4/-@i6 family filter, and cache reuse.
-# The trailing token is required, like the other -# selftests, so a bare command
-# line isn't treated as "no arguments" and routed to the usage screen.
-out=$(httrack -#D run)
+# 'run' is an ignored placeholder argument.
+out=$(httrack -#test=dns run)

 test "$out" = "dns-selftest: OK" || {
    echo "expected 'dns-selftest: OK', got: $out" >&2
--- a/tests/01_engine-entities.test
+++ b/tests/01_engine-entities.test
@@ -4,13 +4,13 @@
 set -euo pipefail

 # HTML entity unescaping (hts_unescapeEntitiesWithCharset).
-# -#6 <string> prints the string with entities decoded (UTF-8 output).
+# -#test=entities <string> prints the string with entities decoded (UTF-8 output).
 ent() {
-    test "$(httrack -O /dev/null -#6 "$1")" == "$2" || exit 1
+    test "$(httrack -O /dev/null -#test=entities "$1")" == "$2" || exit 1
 }
 # crash probe: malformed input must exit cleanly, not abort.
 runs() {
-    httrack -O /dev/null -#6 "$1" >/dev/null 2>&1 || exit 1
+    httrack -O /dev/null -#test=entities "$1" >/dev/null 2>&1 || exit 1
 }

 # named entities
--- a/tests/01_engine-filter.test
+++ b/tests/01_engine-filter.test
@@ -4,13 +4,13 @@
 set -euo pipefail

 # wildcard filter engine (strjoker), the core of +/- include/exclude rules.
-# -#0 <filter> <string> prints "<string> does match <filter>" or "... does NOT match ...".
+# -#test=filter <filter> <string> prints "<string> does match <filter>" or "... does NOT match ...".

 match() {
-    test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does match $1" || exit 1
+    test "$(httrack -O /dev/null -#test=filter "$1" "$2")" == "$2 does match $1" || exit 1
 }
 nomatch() {
-    test "$(httrack -O /dev/null -#0 "$1" "$2")" == "$2 does NOT match $1" || exit 1
+    test "$(httrack -O /dev/null -#test=filter "$1" "$2")" == "$2 does NOT match $1" || exit 1
 }

 # bare star matches everything
--- a/tests/01_engine-hashtable.test
+++ b/tests/01_engine-hashtable.test
@@ -3,5 +3,7 @@

 set -euo pipefail

-# httrack internal hashtable autotest on 100K keys
-httrack -#7 100000
+# httrack internal hashtable autotest on 100K keys. Assert the success line (on
+# stderr) so a misrouted registry entry can't pass on exit code alone.
+out=$(httrack -#test=hashtable 100000 2>&1)
+printf '%s\n' "$out" | grep -q "all hashtable tests were successful!" || exit 1
--- a/tests/01_engine-idna.test
+++ b/tests/01_engine-idna.test
@@ -3,13 +3,13 @@

 set -euo pipefail

-# IDNA / punycode encode (-#4) and decode (-#5). This code has a CVE history,
+# IDNA / punycode encode (-#test=idna-encode) and decode (-#test=idna-decode). This code has a CVE history,
 # so the edge cases below cover passthrough, round-trips, and malformed input.

-enc() { test "$(httrack -O /dev/null -#4 "$1")" == "$2" || exit 1; }
-dec() { test "$(httrack -O /dev/null -#5 "$1")" == "$2" || exit 1; }
+enc() { test "$(httrack -O /dev/null -#test=idna-encode "$1")" == "$2" || exit 1; }
+dec() { test "$(httrack -O /dev/null -#test=idna-decode "$1")" == "$2" || exit 1; }
 # crash probe: malformed ACE input must exit cleanly, not abort.
-runs() { httrack -O /dev/null -#5 "$1" >/dev/null 2>&1 || exit 1; }
+runs() { httrack -O /dev/null -#test=idna-decode "$1" >/dev/null 2>&1 || exit 1; }

 # encode
 enc 'www.café.com' 'www.xn--caf-dma.com'
--- a/tests/01_engine-mime.test
+++ b/tests/01_engine-mime.test
@@ -4,13 +4,13 @@
 set -euo pipefail

 # MIME type guessing from extension (get_httptype / give_mimext).
-# -#2 <path> prints "<path> is '<mime>'" then "and its local type is '.<ext>'".
+# -#test=mime <path> prints "<path> is '<mime>'" then "and its local type is '.<ext>'".

 mime() {
-    test "$(httrack -O /dev/null -#2 "$1" | head -1)" == "$1 is '$2'" || exit 1
+    test "$(httrack -O /dev/null -#test=mime "$1" | head -1)" == "$1 is '$2'" || exit 1
 }
 unknown() {
-    test "$(httrack -O /dev/null -#2 "$1" | head -1)" == "$1 is of an unknown MIME type" || exit 1
+    test "$(httrack -O /dev/null -#test=mime "$1" | head -1)" == "$1 is of an unknown MIME type" || exit 1
 }

 mime '/a/b.html' 'text/html'
--- a/tests/01_engine-relative.test
+++ b/tests/01_engine-relative.test
@@ -8,7 +8,7 @@ set -euo pipefail
 # relative path from <curr>'s directory to <link>
 rel() {
    local got
-    got=$(httrack -O /dev/null -#l "$1" "$2")
+    got=$(httrack -O /dev/null -#test=relative "$1" "$2")
    test "$got" == "relative=$3" ||
        {
            echo "FAIL rel($1, $2): got '$got' want 'relative=$3'"
@@ -19,7 +19,7 @@ rel() {
 # resolve <link> against origin <adr>/<fil> -> adr=.. fil=..
 ident() {
    local got
-    got=$(httrack -O /dev/null -#i "$1" "$2" "$3")
+    got=$(httrack -O /dev/null -#test=resolve "$1" "$2" "$3")
    test "$got" == "$4" ||
        {
            echo "FAIL ident($1, $2, $3): got '$got' want '$4'"
--- a/tests/01_engine-savename.test
+++ b/tests/01_engine-savename.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+
+set -euo pipefail
+
+# Local save-name extension resolution (url_savename via -#test=savename <fil> <content-type>).
+# Asserts on the basename of "savename: <path>".
+
+name() {
+    out="$(httrack -O /dev/null -#test=savename "$1" "$2" | sed -n 's/^savename: //p')"
+    test "${out##*/}" == "$3" || {
+        echo "FAIL: '$1' '$2' -> '$out' (want '$3')"
+        exit 1
+    }
+}
+
+# #115: an unknown trailing ".token" is part of the name, keep it and append the type.
+name '/article-1.884291' 'text/html' 'article-1.884291.html'
+name '/news/story-12345.987654' 'text/html' 'story-12345.987654.html'
+
+# Recognized extensions still collapse to the resolved type.
+name '/page.php' 'text/html' 'page.html'
+name '/page.asp' 'text/html' 'page.html'
+name '/foo' 'text/html' 'foo.html'
+
+# A bare trailing dot is not a tail to keep.
+name '/page.' 'text/html' 'page.html'
+
+# Soft-404 (#267/#408): a binary URL served as HTML is named .html.
+name '/x.pdf' 'text/html' 'x.html'
+name '/x.gif' 'text/html' 'x.html'
+
+# Type agrees with the extension: keep it, no churn, no double extension.
+name '/x.pdf' 'application/pdf' 'x.pdf'
+name '/x.jpg' 'image/jpeg' 'x.jpg'
+name '/x.html' 'text/html' 'x.html'
+name '/x.js' 'application/x-javascript' 'x.js'
+name '/types/data.json' 'application/json' 'data.json'
+
+# Agreeing type must not rewrite the extension's casing (no strip-and-reappend).
+name '/x.JPG' 'image/jpeg' 'x.JPG'
--- a/tests/01_engine-selftest-dispatch.test
+++ b/tests/01_engine-selftest-dispatch.test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# The -#test dispatch itself: a bare -#test lists the registry, and an unknown
+# name errors (non-zero, diagnostic) instead of silently passing.
+
+set -eu
+
+# Bare -#test lists known tests (printed to stderr).
+list=$(httrack -#test 2>&1)
+printf '%s\n' "$list" | grep -q "filter" || exit 1
+printf '%s\n' "$list" | grep -q "cache-writefail" || exit 1
+
+# Unknown name: non-zero exit + diagnostic, and no test result line.
+rc=0
+err=$(httrack -#test=bogus 2>&1) || rc=$?
+test "$rc" -ne 0 || exit 1
+printf '%s\n' "$err" | grep -q "Unknown self-test" || exit 1
--- a/tests/01_engine-simplify.test
+++ b/tests/01_engine-simplify.test
@@ -5,7 +5,7 @@ set -euo pipefail

 # path simplify engine (fil_simplifie): collapses ./ and ../ segments.
 simp() {
-    test "$(httrack -O /dev/null -#1 "$1")" == "simplified=$2" || exit 1
+    test "$(httrack -O /dev/null -#test=simplify "$1")" == "simplified=$2" || exit 1
 }

 simp './foo/bar/' 'foo/bar/'
--- a/tests/01_engine-strsafe.test
+++ b/tests/01_engine-strsafe.test
@@ -3,23 +3,22 @@

 set -euo pipefail

-# htssafe.h bounded string operations (driven by 'httrack -#8').
+# htssafe.h bounded string operations (driven by 'httrack -#test=strsafe').

 # Success path: every bounded op (strcpybuff/strcatbuff/strncatbuff/strlcpybuff)
-# must behave correctly. Like the other -# debug modes, a trailing token is
-# required (a bare '-#8' falls through to the usage screen).
+# must behave correctly. 'run' selects the success path (vs the overflow modes).
 rc=0
-out=$(httrack -#8 run) || rc=$?
+out=$(httrack -#test=strsafe run) || rc=$?
 test "$rc" -eq 0 || exit 1
 test "$out" == "strsafe: OK" || exit 1

 # Overflow path: an over-capacity write into a sized buffer must be caught by
 # the bounded macro and abort the process, not be silently truncated/completed.
 # Assert the htssafe abort signature specifically, so the test cannot pass for
-# an unrelated reason (e.g. the -#8 mode being gone and falling through to the
-# usage screen, which also exits non-zero).
+# an unrelated reason (e.g. the strsafe test being gone, which prints the
+# registry to stderr and also exits non-zero).
 # the bounded macro aborts (non-zero exit), so don't let set -e trip on it
-err=$(httrack -#8 overflow "this string is far too long for the buffer" 2>&1) || true
+err=$(httrack -#test=strsafe overflow "this string is far too long for the buffer" 2>&1) || true
 case "$err" in
 *"strsafe: NOT aborted"*)
    echo "over-capacity write was NOT caught" >&2
@@ -36,7 +35,7 @@ esac
 # capacity (4 bytes into a 4-byte buffer), so this also pins the boundary: a
 # '<=' off-by-one in the capacity check would let it through (and print "NOT
 # aborted"). Match the specific htsbuff abort message, not just any assert.
-err=$(httrack -#8 overflow-buff "abcd" 2>&1) || true
+err=$(httrack -#test=strsafe overflow-buff "abcd" 2>&1) || true
 case "$err" in
 *"strsafe: NOT aborted"*)
    echo "htsbuff over-capacity write was NOT caught" >&2
--- a/tests/20_local-resume-loop.test
+++ b/tests/20_local-resume-loop.test
@@ -0,0 +1,113 @@
+#!/bin/bash
+# Issue #206: a continue/update crawl looped forever when the resume Range got a
+# 416. Pass 1 leaves a partial + temp-ref; pass 2 must terminate and not loop.
+set -u
+
+: "${top_srcdir:=..}"
+testdir=$(cd "$(dirname "$0")" && pwd)
+server="${testdir}/local-server.py"
+
+command -v python3 >/dev/null || ! echo "python3 not found; skipping" || exit 77
+
+tmpdir=$(mktemp -d "${TMPDIR:-/tmp}/httrack_206.XXXXXX") || exit 1
+serverpid=
+crawlpid=
+cleanup() {
+    test -n "$crawlpid" && kill -9 "$crawlpid" 2>/dev/null
+    if test -n "$serverpid"; then
+        kill "$serverpid" 2>/dev/null
+        wait "$serverpid" 2>/dev/null
+    fi
+    rm -rf "$tmpdir"
+}
+trap cleanup EXIT HUP INT QUIT PIPE TERM
+
+# --- start the server, discover its ephemeral port --------------------------
+# RESUME_COUNTER gets a byte per /resume/blob.txt request (pass-2 delta bounds re-gets).
+serverlog="${tmpdir}/server.log"
+counter="${tmpdir}/blobcount"
+RESUME_COUNTER="$counter" python3 "$server" --root "${testdir}/server-root" >"$serverlog" 2>&1 &
+serverpid=$!
+port=
+for _ in $(seq 1 50); do
+    line=$(head -n1 "$serverlog" 2>/dev/null)
+    if test "${line%% *}" == "PORT"; then
+        port="${line#PORT }"
+        break
+    fi
+    kill -0 "$serverpid" 2>/dev/null || {
+        echo "server exited early: $(cat "$serverlog")"
+        exit 1
+    }
+    sleep 0.1
+done
+test -n "$port" || {
+    echo "could not discover server port"
+    exit 1
+}
+base="http://127.0.0.1:${port}"
+
+which httrack >/dev/null || {
+    echo "could not find httrack"
+    exit 1
+}
+out="${tmpdir}/crawl"
+mkdir "$out"
+common=(-O "$out" --quiet --disable-security-limits --robots=0 --timeout=30 --retries=0)
+refdir="${out}/hts-cache/ref"
+
+# --- pass 1: crawl, interrupt once the blob download is underway -------------
+printf '[pass 1: interrupt mid-download] ..\t'
+httrack "${common[@]}" "${base}/resume/index.html" >"${tmpdir}/log1" 2>&1 &
+crawlpid=$!
+# Wait until blob.txt is requested, then SIGTERM so httrack's exit handler
+# finalizes the cache and serializes the temp-ref.
+for _ in $(seq 1 300); do
+    test -s "$counter" && break
+    kill -0 "$crawlpid" 2>/dev/null || break
+    sleep 0.1
+done
+sleep 0.5
+kill -TERM "$crawlpid" 2>/dev/null
+wait "$crawlpid" 2>/dev/null
+crawlpid=
+test -n "$(find "$refdir" -name '*.ref' 2>/dev/null)" || {
+    echo "FAIL: no temp-ref survived pass 1; cannot drive #206"
+    exit 1
+}
+echo "OK (temp-ref present)"
+before=$(wc -c <"$counter" 2>/dev/null || echo 0)
+
+# --- pass 2: --continue -> resume Range -> 416, bounded against the #206 loop -
+# Kill pass 2 after a deadline (portable stand-in for `timeout`, absent on macOS).
+printf '[pass 2: resume must terminate] ..\t'
+HANG_RC=137 # 128 + SIGKILL
+httrack "${common[@]}" --continue "${base}/resume/index.html" >"${tmpdir}/log2" 2>&1 &
+crawlpid=$!
+(sleep 30 && kill -9 "$crawlpid" 2>/dev/null) &
+guard=$!
+rc=0
+wait "$crawlpid" 2>/dev/null || rc=$?
+crawlpid=
+kill "$guard" 2>/dev/null || true
+wait "$guard" 2>/dev/null || true
+if test "$rc" -eq "$HANG_RC"; then
+    echo "FAIL: pass 2 did not terminate (#206 resume->416 loop)"
+    exit 1
+fi
+echo "OK (terminated, rc=$rc)"
+
+# The fix re-gets once (resume Range + range-less re-get = 2): the lower bound
+# rejects a drop-the-link non-fix (1), the upper bound rejects the loop (many).
+after=$(wc -c <"$counter" 2>/dev/null || echo 0)
+hits=$((after - before))
+printf '[bounded re-get count] ..\t'
+if test "$hits" -lt 2; then
+    echo "FAIL: only ${hits} pass-2 request(s); the stale partial was not re-got"
+    exit 1
+fi
+if test "$hits" -gt 8; then
+    echo "FAIL: ${hits} pass-2 requests for blob.txt (resume is looping)"
+    exit 1
+fi
+echo "OK (${hits} requests)"
--- a/tests/21_local-intl-update.test
+++ b/tests/21_local-intl-update.test
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# #157: a dotless, accented URL named .html on the first crawl must keep .html
+# across an update -- not revert to the extensionless name.
+
+: "${top_srcdir:=..}"
+
+bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --rerun \
+    --found 'intl/Instalação_CVS_no_Ubuntu.html' \
+    --not-found 'intl/Instalação_CVS_no_Ubuntu' \
+    httrack 'BASEURL/intl/index.html'
--- a/tests/22_local-broken-size.test
+++ b/tests/22_local-broken-size.test
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Issues #32/#41: a Content-Length that disagrees with the body warns "bogus
+# state (broken size)" and skips the cache; -%B (tolerant) accepts it.
+
+: "${top_srcdir:=..}"
+
+# Default: warn, but the file is still written.
+bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
+    --found 'size/oversize.bin' \
+    --log-found 'bogus state \(broken size' \
+    httrack 'BASEURL/size/index.html'
+
+# -%B (tolerant): no warning, file written.
+bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
+    --found 'size/oversize.bin' \
+    --log-not-found 'bogus state' \
+    httrack 'BASEURL/size/index.html' '-%B'
--- a/tests/23_local-errpage.test
+++ b/tests/23_local-errpage.test
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Issue #17: with "no error pages" (-o0), 4xx/5xx bodies must not be written;
+# a genuine 0-byte 200 stays. Default (-o1) writes the error page. (#17's purge
+# half also does not reproduce; the purge path is not exercised here.)
+set -e
+
+: "${top_srcdir:=..}"
+
+# -o0: 404 suppressed, good page and the legit 0-byte 200 kept.
+bash "$top_srcdir/tests/local-crawl.sh" --errors 1 \
+    --found 'errpage/good.html' \
+    --found 'errpage/empty.html' \
+    --not-found 'errpage/missing.html' \
+    httrack 'BASEURL/errpage/index.html' '-o0'
+
+# Control -o1 (default): the 404 error page is written.
+bash "$top_srcdir/tests/local-crawl.sh" --errors 1 \
+    --found 'errpage/missing.html' \
+    httrack 'BASEURL/errpage/index.html' '-o1'
--- a/tests/24_local-resume-overlap.test
+++ b/tests/24_local-resume-overlap.test
@@ -0,0 +1,109 @@
+#!/bin/bash
+# Issue #198: on a resumed download the server may answer the Range with a 206
+# that starts *before* the offset we asked for (block-aligned ranges). httrack
+# must honor the returned Content-Range, not blindly append, or the overlap
+# bytes get duplicated and the file grows (corrupt PDFs). Pass 1 interrupts
+# flaky.bin mid-body (partial + temp-ref); pass 2 resumes against a 206 that
+# backs up 8 bytes. The result must equal the same bytes fetched whole (full.bin).
+set -eu
+
+: "${top_srcdir:=..}"
+testdir=$(cd "$(dirname "$0")" && pwd)
+server="${testdir}/local-server.py"
+
+command -v python3 >/dev/null || ! echo "python3 not found; skipping" || exit 77
+
+tmpdir=$(mktemp -d "${TMPDIR:-/tmp}/httrack_198.XXXXXX") || exit 1
+serverpid=
+crawlpid=
+cleanup() {
+    if test -n "$crawlpid"; then kill -9 "$crawlpid" 2>/dev/null || true; fi
+    if test -n "$serverpid"; then
+        kill "$serverpid" 2>/dev/null || true
+        wait "$serverpid" 2>/dev/null || true
+    fi
+    rm -rf "$tmpdir"
+}
+trap cleanup EXIT HUP INT QUIT PIPE TERM
+
+# OVERLAP_COUNTER gets a byte per flaky.bin request so pass 1 knows when to interrupt.
+serverlog="${tmpdir}/server.log"
+counter="${tmpdir}/hits"
+resumed="${tmpdir}/resumed" # gets a byte when the server serves a resume 206
+OVERLAP_COUNTER="$counter" OVERLAP_RESUMED="$resumed" \
+    python3 "$server" --root "${testdir}/server-root" \
+    >"$serverlog" 2>&1 &
+serverpid=$!
+port=
+for _ in $(seq 1 50); do
+    line=$(head -n1 "$serverlog" 2>/dev/null)
+    if test "${line%% *}" == "PORT"; then
+        port="${line#PORT }"
+        break
+    fi
+    kill -0 "$serverpid" 2>/dev/null || {
+        echo "server exited early: $(cat "$serverlog")"
+        exit 1
+    }
+    sleep 0.1
+done
+test -n "$port" || {
+    echo "could not discover server port"
+    exit 1
+}
+base="http://127.0.0.1:${port}"
+
+which httrack >/dev/null || {
+    echo "could not find httrack"
+    exit 1
+}
+out="${tmpdir}/crawl"
+common=(-O "$out" --quiet --disable-security-limits --robots=0 --timeout=30 --retries=0 -c1)
+refdir="${out}/hts-cache/ref"
+
+# pass 1: interrupt once flaky.bin's prefix is streaming (partial + temp-ref).
+printf '[pass 1: interrupt flaky.bin] ..\t'
+httrack "${common[@]}" "${base}/overlap/index.html" >"${tmpdir}/log1" 2>&1 &
+crawlpid=$!
+for _ in $(seq 1 300); do
+    test -s "$counter" && break
+    kill -0 "$crawlpid" 2>/dev/null || break
+    sleep 0.1
+done
+sleep 0.5
+kill -TERM "$crawlpid" 2>/dev/null || true
+wait "$crawlpid" 2>/dev/null || true
+crawlpid=
+test -n "$(find "$refdir" -name '*.ref' 2>/dev/null)" || {
+    echo "FAIL: no temp-ref survived pass 1; cannot drive the resume"
+    exit 1
+}
+echo "OK (temp-ref present)"
+
+# pass 2: --continue -> resume Range -> 206 that starts 8 bytes early.
+printf '[pass 2: resume flaky.bin] ..\t'
+httrack "${common[@]}" --continue "${base}/overlap/index.html" >"${tmpdir}/log2" 2>&1 || true
+echo "OK"
+
+# Guard against a silent full re-download: the byte-compare below only tests the
+# fix if pass 2 actually went through the resume Range -> 206 path.
+printf '[resume path was exercised] ..\t'
+if ! test -s "$resumed"; then
+    echo "FAIL: pass 2 never triggered a resume 206; the overlap fix was not exercised"
+    exit 1
+fi
+echo "OK"
+
+printf '[resumed file is not corrupted] ..\t'
+dir=$(find "$out" -maxdepth 1 -type d -name '127.0.0.1*' | head -1)
+flaky="${dir}/overlap/flaky.bin"
+full="${dir}/overlap/full.bin"
+if ! test -f "$flaky" || ! test -f "$full"; then
+    echo "FAIL: flaky.bin or full.bin missing after pass 2"
+    exit 1
+fi
+if ! cmp -s "$flaky" "$full"; then
+    echo "FAIL: resumed flaky.bin ($(wc -c <"$flaky")) != full.bin ($(wc -c <"$full")); overlap duplicated"
+    exit 1
+fi
+echo "OK ($(wc -c <"$flaky") bytes, byte-identical)"
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -26,6 +26,7 @@ TESTS = \
 	00_runnable.test \
 	01_engine-cache.test \
 	01_engine-cache-golden.test \
+	01_engine-cache-writefail.test \
 	01_engine-charset.test \
 	01_engine-cmdline.test \
 	01_engine-cookies.test \
@@ -40,6 +41,8 @@ TESTS = \
 	01_engine-parse.test \
 	01_engine-rcfile.test \
 	01_engine-relative.test \
+	01_engine-savename.test \
+	01_engine-selftest-dispatch.test \
 	01_engine-simplify.test \
 	01_engine-strsafe.test \
 	02_manpage-regen.test \
@@ -58,6 +61,11 @@ TESTS = \
 	16_local-assume.test \
 	17_local-empty-ct.test \
 	18_local-update.test \
-	19_local-connect-fallback.test
+	19_local-connect-fallback.test \
+	20_local-resume-loop.test \
+	21_local-intl-update.test \
+	22_local-broken-size.test \
+	23_local-errpage.test \
+	24_local-resume-overlap.test

 CLEANFILES = check-network_sh.cache
--- a/tests/local-crawl.sh
+++ b/tests/local-crawl.sh
@@ -14,7 +14,9 @@
 # Usage:
 #   bash local-crawl.sh [--tls] [--root DIR] \
 #       --errors N --files N --found PATH ... --directory PATH ... \
+#       --log-found REGEX ... --log-not-found REGEX ... \
 #       httrack BASEURL/some/path [httrack-args...]
+# --log-found/--log-not-found grep (ERE) the crawl's hts-log.txt.

 set -u

@@ -107,7 +109,7 @@ while test "$pos" -lt "$nargs"; do
        audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
        pos=$((pos + 1))
        ;;
-    --found | --not-found | --directory)
+    --found | --not-found | --directory | --log-found | --log-not-found)
        audit+=("${args[$pos]}" "${args[$((pos + 1))]}")
        pos=$((pos + 1))
        ;;
@@ -196,6 +198,15 @@ if test -n "$rerun"; then
        exit 1
    }
    result "OK (update)"
+    # The update summary reports "files updated"; a fresh crawl never does. Assert
+    # it so a regression that bypasses the cache (re-crawls fresh) can't pass.
+    info "checking update used the cache"
+    if grep -aqE "mirror complete in .*files updated" "${out}/hts-log.txt"; then
+        result "OK"
+    else
+        result "update pass did not report cache activity"
+        exit 1
+    fi
 fi

 # --- discover the single host root (127.0.0.1_<port> or 127.0.0.1) -----------
@@ -248,6 +259,22 @@ while test "$i" -lt "${#audit[@]}"; do
            exit 1
        fi
        ;;
+    --log-found)
+        i=$((i + 1))
+        info "checking log matches ${audit[$i]}"
+        if grep -aqE "${audit[$i]}" "${out}/hts-log.txt"; then result "OK"; else
+            result "not in log"
+            exit 1
+        fi
+        ;;
+    --log-not-found)
+        i=$((i + 1))
+        info "checking log lacks ${audit[$i]}"
+        if grep -aqE "${audit[$i]}" "${out}/hts-log.txt"; then
+            result "present in log"
+            exit 1
+        else result "OK"; fi
+        ;;
    esac
    i=$((i + 1))
 done
--- a/tests/local-server.py
+++ b/tests/local-server.py
@@ -15,6 +15,7 @@ stdlib only (http.server + ssl) -- no new build or runtime dependency.

 import argparse
 import os
+import time
 from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
 from urllib.parse import quote, unquote, urlsplit

@@ -176,6 +177,152 @@ class Handler(SimpleHTTPRequestHandler):
        body, ctype = self.TYPE_MATRIX[path]
        self.send_raw(body, ctype)

+    # --- special chars in URLs across an update (issue #157) ---------------
+    # A dotless, accented basename served as text/html (MediaWiki style). The
+    # name the first crawl picks (.html) must survive the update pass.
+    INTL_NAME = "Instalação_CVS_no_Ubuntu"
+
+    def route_intl_index(self):
+        self.send_html('\t<a href="%s">accented</a>\n' % self.INTL_NAME)
+
+    def route_intl_page(self):
+        self.send_raw(b"<html><body>accented page</body></html>\n", "text/html")
+
+    # resume / 416 loop (#206): the first GET stalls after a prefix so the crawl
+    # can be interrupted (partial + temp-ref); every later request is 416.
+    RESUME_PREFIX = b"PARTIAL-" + b"x" * 4096  # flushed before the stall
+    RESUME_LEN = len(RESUME_PREFIX) + 4096  # declared length never delivered
+    _resume_started = False
+
+    def route_resume_index(self):
+        self.send_html('\t<a href="blob.txt">blob</a>')
+
+    def route_resume(self):
+        counter = os.environ.get("RESUME_COUNTER")
+        if counter:
+            with open(counter, "a") as fp:
+                fp.write("x")
+        # First GET: stall mid-body so the crawl can be interrupted with a partial.
+        if not Handler._resume_started:
+            Handler._resume_started = True
+            self.send_response(200)
+            self.send_header("Content-Type", "image/png")
+            self.send_header("Content-Length", str(self.RESUME_LEN))
+            self.send_header("Accept-Ranges", "bytes")
+            self.end_headers()
+            if self.command != "HEAD":
+                self.wfile.write(self.RESUME_PREFIX)
+                self.wfile.flush()
+                try:
+                    while True:
+                        time.sleep(3600)
+                except OSError:
+                    pass
+            return
+        self.send_response(416, "Requested Range Not Satisfiable")
+        self.send_header("Content-Type", "image/png")
+        self.send_header("Content-Range", "bytes */%d" % self.RESUME_LEN)
+        self.send_header("Content-Length", "0")
+        self.end_headers()
+
+    # 206 resume must honor the server's Content-Range, not the offset we asked
+    # for (#198): a server resuming a few bytes *before* the request must not
+    # leave httrack duplicating the overlap onto the partial. flaky.bin
+    # interrupts once then resumes OVERLAP_EARLY bytes early; full.bin serves
+    # the identical bytes in one shot, so the test can compare the two.
+    OVERLAP_BLOB = b"%PDF-1.4\n" + bytes((i * 37 + 11) % 256 for i in range(8000))
+    OVERLAP_EARLY = 8
+    OVERLAP_PREFIX_LEN = 4000  # flushed before the stall
+    _overlap_started = False
+
+    def route_overlap_index(self):
+        self.send_html('\t<a href="flaky.bin">flaky</a>\n\t<a href="full.bin">full</a>')
+
+    def route_overlap_full(self):
+        self.send_raw(self.OVERLAP_BLOB, "application/octet-stream")
+
+    def route_overlap(self):
+        counter = os.environ.get("OVERLAP_COUNTER")
+        if counter:
+            with open(counter, "a") as fp:
+                fp.write("x")
+        blob = self.OVERLAP_BLOB
+        rng = self.headers.get("Range")
+        # First GET: stream a prefix then stall, so the crawl can be interrupted
+        # mid-body (partial + temp-ref on disk).
+        if rng is None and not Handler._overlap_started:
+            Handler._overlap_started = True
+            self.send_response(200)
+            self.send_header("Content-Type", "application/octet-stream")
+            self.send_header("Content-Length", str(len(blob)))
+            self.send_header("Accept-Ranges", "bytes")
+            self.end_headers()
+            if self.command != "HEAD":
+                self.wfile.write(blob[: self.OVERLAP_PREFIX_LEN])
+                self.wfile.flush()
+                try:
+                    while True:
+                        time.sleep(3600)
+                except OSError:
+                    pass
+            return
+        if rng is None:  # no resume request: serve the whole file
+            return self.route_overlap_full()
+        # Resume: honor the Range, but back up OVERLAP_EARLY bytes.
+        start = (
+            int(rng[len("bytes=") :].split("-")[0]) if rng.startswith("bytes=") else 0
+        )
+        start = max(0, start - self.OVERLAP_EARLY)
+        # Signal that the resume Range -> 206 path actually fired, so the test
+        # can prove it was exercised (not a silent full re-download).
+        resumed = os.environ.get("OVERLAP_RESUMED")
+        if resumed:
+            with open(resumed, "a") as fp:
+                fp.write("x")
+        part = blob[start:]
+        self.send_response(206, "Partial Content")
+        self.send_header("Content-Type", "application/octet-stream")
+        self.send_header("Content-Length", str(len(part)))
+        self.send_header(
+            "Content-Range", "bytes %d-%d/%d" % (start, len(blob) - 1, len(blob))
+        )
+        self.end_headers()
+        if self.command != "HEAD":
+            self.wfile.write(part)
+
+    # error pages / 0-byte files (#17): -o0 ("no error pages") must keep 4xx/5xx
+    # bodies off disk; a genuine 0-byte 200 is a valid file and stays.
+    def route_errpage_index(self):
+        self.send_html(
+            '\t<a href="good.html">good</a>\n'
+            '\t<a href="missing.html">missing</a>\n'
+            '\t<a href="empty.html">empty</a>\n'
+        )
+
+    def route_errpage_good(self):
+        self.send_raw(b"<html><body>good page</body></html>\n", "text/html")
+
+    def route_errpage_missing(self):
+        self.send_html("\t404 error body", status=404, extra_status="Not Found")
+
+    def route_errpage_empty(self):
+        self.send_raw(b"", "text/html")
+
+    # broken Content-Length (#32/#41): declared size != bytes sent. httrack
+    # warns "bogus state (broken size)" and skips the cache unless -%B.
+    def route_size_index(self):
+        self.send_html('\t<a href="oversize.bin">over</a>\n')
+
+    def route_size_oversize(self):
+        body = b"A" * 100
+        self.send_response(200)
+        self.send_header("Content-Type", "application/octet-stream")
+        self.send_header("Content-Length", str(len(body) - 2))  # lie: too short
+        self.send_header("Connection", "close")
+        self.end_headers()
+        if self.command != "HEAD":
+            self.wfile.write(body)
+
    ROUTES = {
        "/cookies/entrance.php": route_entrance,
        "/cookies/second.php": route_second,
@@ -195,6 +342,19 @@ class Handler(SimpleHTTPRequestHandler):
        "/types/style.css": route_types,
        "/types/data.json": route_types,
        "/types/gen.php": route_types,
+        "/intl/index.html": route_intl_index,
+        "/intl/" + INTL_NAME: route_intl_page,
+        "/resume/index.html": route_resume_index,
+        "/resume/blob.txt": route_resume,
+        "/overlap/index.html": route_overlap_index,
+        "/overlap/flaky.bin": route_overlap,
+        "/overlap/full.bin": route_overlap_full,
+        "/size/index.html": route_size_index,
+        "/size/oversize.bin": route_size_oversize,
+        "/errpage/index.html": route_errpage_index,
+        "/errpage/good.html": route_errpage_good,
+        "/errpage/missing.html": route_errpage_missing,
+        "/errpage/empty.html": route_errpage_empty,
    }

    # --- dispatch ----------------------------------------------------------
@@ -202,7 +362,8 @@ class Handler(SimpleHTTPRequestHandler):
    def dispatch(self):
        self._set_cookies = []
        path = urlsplit(self.path).path
-        handler = self.ROUTES.get(path)
+        # Match percent-encoded paths (accented #157 route) by their decoded form.
+        handler = self.ROUTES.get(path) or self.ROUTES.get(unquote(path))
        if handler is not None:
            handler(self)
            return True
Author	SHA1	Message	Date
Xavier Roche	7754a5b2b9	tests: run 24_local-resume-overlap under set -e Follow the golden rule for shell scripts: start with set -e so a non-last failure can't be masked. Guard the backgrounded-crawl kill/wait spots with \|\| true so the expected SIGTERM exit doesn't abort the run. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-26 09:37:15 +02:00
Xavier Roche	828bdbd632	Harden #198 fix: verify the truncate, assert the test hit the resume path Two follow-ups from review of the resume fix. If HTS_FTRUNCATE fails the partial could keep a stale tail (only when the resource shrank between runs, sz > full, so the body write no longer covers the old end). Check its return and, on failure, drop the partial and refetch the whole file instead of writing a possibly-corrupt one. The resume test only compared the resumed bytes against the whole file, which also passes if httrack silently re-downloads the file with no Range (the bug never fires). Mark when the server actually serves a resume 206 and assert pass 2 hit that path, so a full re-download fails the test instead of passing it. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-26 09:26:10 +02:00
Xavier Roche	5640bb6837	Honor the server's Content-Range when resuming a partial download (#198 ) A resumed download (Range: bytes=N-) may be answered with a 206 whose range starts before N: block-aligned caches and CDNs routinely round the start down to a block boundary, and RFC 7233 lets the server pick the range it returns. httrack ignored the returned Content-Range and blindly appended the 206 body to the bytes already on disk, so the overlapping bytes were duplicated and the file grew by the overlap. With timing deciding which files get interrupted (and thus resumed), this surfaced as a random subset of files corrupted on each run, each a few bytes too large. Resume at the server's crange_start instead: ftruncate the partial to that offset and write the 206 body there (the in-memory branch keeps only that prefix). When the returned range is unusable (a forward gap, no/garbage Content-Range, or one that doesn't reach EOF) drop the partial and refetch the whole file rather than stitch a corrupt one. Reading the existing crange_start/crange_end/crange fields only, no ABI change. Driven by tests/24_local-resume-overlap.test: pass 1 interrupts a download mid-body, pass 2 resumes against a 206 that backs up 8 bytes, and the result must be byte-identical to the same content fetched whole. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com>	2026-06-26 09:11:31 +02:00
Xavier Roche	bfc4a016ab	Replace single-letter -# self-tests with a named -#test=NAME registry (#427 ) The hidden engine self-tests had accreted into a grab-bag of arbitrary single-letter/-digit -# arms (-#0, -#A, -#W, ...) buried in the htscoremain.c option switch, with no mnemonics and stale --help text. Collapse them into one registry: -#test lists every test with a usage hint and one-line description, and -#test=NAME [args] runs one. The handlers and the two helpers they used (basic_selftests, string_safety_selftests) move to a new htsselftest.c keyed by a {name, args, desc, fn} table; htscoremain.c keeps only a small dispatch that runs ahead of the no-URL usage gate, so a bare -#test (or an arg-less test like copyopt/dns/cookies) no longer needs a dummy URL token to be reached. The genuine debug knobs (-#L, -#C, -#R, -#h, ...) stay as letters in the switch; only the unit self-tests, whose sole callers are tests/01_engine-*.test, are renamed, so this is internal-only with no compatibility surface. Behavior is preserved: each test prints the same result line and exit code, which the existing assertions pin. Three now-unused includes (htscache_selftest.h, htsdns_selftest.h, htsencoding.h) drop out of htscoremain.c. Tests: the engine tests move to -#test=NAME; 01_engine-hashtable now asserts its success line (not just exit code) so a misrouted registry row can't pass, and a new 01_engine-selftest-dispatch covers the bare-list and unknown-name paths. The --help/man "guru options" list now points at -#test instead of enumerating a stale subset. The lone vestigial alias --debug-testfilters still resolves to the removed -#0 (it was already non-functional: param1 supplies one argument, -#0 required two); it is left untouched because editing that array forces clang-format to reflow the whole untouched table. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-26 08:05:59 +02:00
Xavier Roche	756d8fb8bd	Stop the mirror with a fatal error on a cache write failure, don't crash (#174/#219) (#426 ) A failed write to the new.zip cache (zipOpenNewFileInZip / zipWriteInFileInZip / zipCloseFileInZip / zipFlush returning non-Z_OK) was a fatal assertf() that aborted the whole process and popped CRASH.TXT. The trigger is storage going away mid-crawl: a disk filling up overnight (#174) or a network share holding the mirror dropping (#219); WinHTTrack users commonly mirror to a NAS or mapped drive. The cache lives in the same output tree as the mirror, so a cache write failing means the mirror files can no longer be written either. Continuing would only produce a broken, incomplete mirror reported as success. So treat it the same way the engine already treats a failed mirror-file write (htscore.c:1961, htsback.c:2933): log the error and set opt->state.exit_xh = -1 to stop the mirror cleanly and exit non-zero. No crash, no CRASH.TXT. Route the cache_add() write sites through cache_zip_write_failed(), which logs once (the standard "disk full or filesystem problems" message when check_fatal_io_errno() confirms it) and flags the cache so sibling cache_add() calls don't re-enter the broken stream before the loop notices exit_xh. The flag is appended to the end of the engine-owned, non-installed struct cache_back, so the ABI is unchanged. Add an in-process self-test (httrack -#W) that drives cache_add() into a ZIP whose disk-full backend fails its writes; 01_engine-cache-writefail.test asserts httrack signals a fatal abort instead of crashing. Negative controls proven: reverting the fix makes -#W abort (SIGABRT); dropping the exit_xh assignment makes the test fail on the abort-signal check. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-26 06:46:59 +02:00
Xavier Roche	5501faa7b1	tests: lock "no error pages" (-o0) write-suppression (#17 ) (#425 ) #17 (WinHTTrack 3.47-19, 2013) reported 404 error pages and 0-byte files kept and unpurged with "no error pages" set. It does not reproduce on current master/Linux: -o0 keeps 4xx/5xx bodies off disk and out of the purge list, a genuine 0-byte 200 is correctly saved, and purge removes stale files on update. The report's .html names were the extension-mangle bug (Defect A, fixed in #408 — the reporter switched to HTTP/1.0 because binaries were renamed .html); the settings-revert-on-update path is fixed by the hts_tristate option work (`4549ec3`, #413). Add an /errpage/ route group to local-server.py and 23_local-errpage.test locking -o0 suppression with an -o1 control. Negative-control verified: neutering the errpage gate (htsparse.c:3902) makes the test fail. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-25 18:02:28 +02:00
Xavier Roche	6322b6fb1f	Lock --tolerant (-%B) on broken Content-Length, and fix an OOB it surfaced (#32/#41) (#424 ) * tests: lock --tolerant (-%B) behavior on broken Content-Length (#32/#41) A response whose Content-Length disagrees with the bytes actually sent warns "bogus state (broken size)" and is skipped from the cache, so it is re-fetched and re-warned on every run. --tolerant (-%B) already accepts such responses; either way the file reaches disk. Pin that contract with a local-server /size route (declares a length two bytes short of the body) and a test asserting the warning fires by default and is silenced under -%B, with the file present in both passes. Adds --log-found/--log-not-found ERE assertions on hts-log.txt to local-crawl.sh for the warning checks. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com> * htslib: fix global-buffer-overflow in get_httptype_sized on empty filename get_httptype_sized() set a = fil + strlen(fil) - 1, then dereferenced *a in the extension scan before the a > fil bound was checked, so an empty fil ("") read one byte before the string. istoobig() passes a literal "" to is_hypertext_mime() whenever it classifies by mime alone (the quota check in back_checksize), so any octet-stream-ish download hit it. Bound the loop and the dot test before dereferencing. Latent (an OOB read of one .rodata byte); surfaced under ASan by the new 22_local-broken-size.test, whose oversize.bin is application/octet-stream. Adds a direct empty-fil case to the -#7 basic_selftests block as a fast, deterministic leaf-level regression (it aborts under ASan on the old code). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Xavier Roche <roche@httrack.com> --------- Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-25 17:18:06 +02:00
Xavier Roche	58f368a91a	tests: lock special-char URL naming across an update (#157 ) (#423 ) #157 reported accented URLs (pt-BR MediaWiki) losing their .html extension on an update pass, observed with 3.49-2 on Windows. It does not reproduce on current master: the update resolves the cached content-type and re-applies .html consistently, for UTF-8 and ISO-8859-1 sources, raw Latin-1 href bytes, either percent-encoding case, and dotted tails. The original symptom was a Windows codepage vs UTF-8 X-Save filename mismatch that cannot occur on a UTF-8 filesystem. Add a regression test that locks the invariant: a dotless, accented basename served as text/html, crawled then updated, must keep its .html name and not leave an extensionless sibling. Also assert in the --rerun harness that the update pass reported "files updated" (a fresh crawl never does), so a regression that bypasses the cache and silently re-crawls fresh can no longer pass the update tests. Closes #157 Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-24 22:35:55 +02:00
Xavier Roche	c97b3e233e	Stop the 412/416 partial-reget loop on continue/update (#206 ) (#422 ) On resume, the Range request is rebuilt by back_add from a temp-ref keyed on (adr,fil) that records the partial download's real save name. A 412/416 ("Range Not Satisfiable") means that partial is stale and the whole file must be re-fetched. The handler only removed heap->sav, so when the resume pass recomputed a save name different from the temp-ref's (the default delayed-type machinery renames freely), the partial was never cleared: back_add re-sent the same Range, earned the same 416, and the link was re-recorded forever, growing the scan counter without bound. Clear the whole partial wherever it lives -- the temp-ref and the file it points at, plus heap->sav -- so the re-record falls through to a plain full GET. Re-get only when there was a partial to discard and both Range triggers (the ref and the on-disk file) are actually gone; once they are, a fresh 416 with nothing left to drop means the whole-file GET itself failed, so the link gives up cleanly instead of re-queueing. A failed removal (read-only or full cache) also gives up rather than looping, since back_add would otherwise re-Range the surviving ref; url_savename_refname_remove now reports the removal result so the handler can tell. (The request's range_used flag would be the natural one-shot signal, but it does not survive the delayed-type two-pass, so we key off the partial instead.) tests/20_local-resume-loop.test drives it offline: pass 1 is interrupted (SIGTERM, so the exit handler finalizes the cache and the temp-ref) to leave a partial, then pass 2 --continue gets 416 on every resume request. A portable watchdog kills pass 2 if it loops; the test asserts it terminates and attempts exactly one whole-file re-get (2 <= requests <= 8). It fails on the pre-fix handler (loops) and on a re-get that silently drops the link. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-24 21:12:40 +02:00
Xavier Roche	b615a4e7fd	Keep unrecognized URL tails instead of mangling them to .html (#421 ) url_savename truncated any trailing ".token" when applying a resolved content-type, so /article-1.884291 served as text/html was saved as article-1.html, dropping the .884291 tail and colliding with every sibling sharing the prefix. Cut the old extension only when it is empty (a bare trailing dot), the resolved type, a known MIME extension, a dynamic-page extension, or an html-family extension; otherwise keep the tail and append the type (article-1.884291.html). Recognized extensions still collapse as before, so the #267/#408 soft-404 behavior (a binary URL served as HTML named .html) is preserved, and a type that agrees with the extension causes no churn. Add a hidden -#N <fil> <content-type> self-test driving url_savename offline, plus tests/01_engine-savename.test covering the matrix. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-24 18:33:52 +02:00