mirror of
https://github.com/xroche/httrack.git
synced 2026-06-18 16:22:55 +03:00
Compare commits
2 Commits
tests/cach
...
docs/api-h
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1142b64696 | ||
|
|
22d3eb44cd |
@@ -30,6 +30,12 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** @file htsarrays.h
|
||||||
|
* Header-only generic dynamic array (a typed growable vector). All operations
|
||||||
|
* are macros parameterized by the array lvalue A; the element type T is fixed
|
||||||
|
* by the struct TypedArray(T) declares. Counts and capacities are in
|
||||||
|
* elements, not bytes. The array owns its backing store: grow it via the Add/
|
||||||
|
* Append/EnsureRoom macros and release it with TypedArrayFree. */
|
||||||
#ifndef HTS_ARRAYS_DEFSTATIC
|
#ifndef HTS_ARRAYS_DEFSTATIC
|
||||||
#define HTS_ARRAYS_DEFSTATIC
|
#define HTS_ARRAYS_DEFSTATIC
|
||||||
|
|
||||||
@@ -39,7 +45,8 @@ Please visit our Website: http://www.httrack.com
|
|||||||
|
|
||||||
#include "htssafe.h"
|
#include "htssafe.h"
|
||||||
|
|
||||||
/* Memory allocation assertion failure */
|
/* Abort (with the failed byte count) when a growth allocation fails. The
|
||||||
|
array macros never return an out-of-memory error; they assert and abort. */
|
||||||
static void hts_record_assert_memory_failed(const size_t size) {
|
static void hts_record_assert_memory_failed(const size_t size) {
|
||||||
fprintf(stderr, "memory allocation failed (%lu bytes)", \
|
fprintf(stderr, "memory allocation failed (%lu bytes)", \
|
||||||
(long int) size); \
|
(long int) size); \
|
||||||
@@ -61,6 +68,8 @@ static void hts_record_assert_memory_failed(const size_t size) {
|
|||||||
/** Capacity. **/ \
|
/** Capacity. **/ \
|
||||||
size_t capa; \
|
size_t capa; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Initializer for an empty array (no backing store, size and capacity 0). **/
|
||||||
#define EMPTY_TYPED_ARRAY { { NULL }, 0, 0 }
|
#define EMPTY_TYPED_ARRAY { { NULL }, 0, 0 }
|
||||||
|
|
||||||
/** Array size, in elements. **/
|
/** Array size, in elements. **/
|
||||||
@@ -84,7 +93,8 @@ static void hts_record_assert_memory_failed(const size_t size) {
|
|||||||
/** Size of T. **/
|
/** Size of T. **/
|
||||||
#define TypedArrayWidth(A) (sizeof(*TypedArrayElts(A)))
|
#define TypedArrayWidth(A) (sizeof(*TypedArrayElts(A)))
|
||||||
|
|
||||||
/** Nth element of the array. **/
|
/** Nth element of the array, as an lvalue. No bounds check; N must be
|
||||||
|
< TypedArraySize(A). **/
|
||||||
#define TypedArrayNth(A, N) (TypedArrayElts(A)[N])
|
#define TypedArrayNth(A, N) (TypedArrayElts(A)[N])
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -63,12 +63,15 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
/* GCC extension */
|
/* Compiler-portability attribute macros (no-ops on non-GCC). */
|
||||||
#ifndef HTS_UNUSED
|
#ifndef HTS_UNUSED
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#define HTS_UNUSED __attribute__ ((unused))
|
#define HTS_UNUSED __attribute__ ((unused))
|
||||||
|
|
||||||
#define HTS_STATIC static __attribute__ ((unused))
|
#define HTS_STATIC static __attribute__ ((unused))
|
||||||
|
|
||||||
#define HTS_INLINE __inline__
|
#define HTS_INLINE __inline__
|
||||||
|
/* printf-style format check; fmt/arg are 1-based argument positions. */
|
||||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
||||||
#else
|
#else
|
||||||
#define HTS_UNUSED
|
#define HTS_UNUSED
|
||||||
@@ -78,29 +81,37 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* min/max evaluate their arguments twice; pass side-effect-free expressions. */
|
||||||
#undef min
|
#undef min
|
||||||
#undef max
|
#undef max
|
||||||
#define min(a,b) ((a)>(b)?(b):(a))
|
#define min(a,b) ((a)>(b)?(b):(a))
|
||||||
|
|
||||||
#define max(a,b) ((a)>(b)?(a):(b))
|
#define max(a,b) ((a)>(b)?(a):(b))
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
#undef Sleep
|
#undef Sleep
|
||||||
#define min(a,b) ((a)>(b)?(b):(a))
|
#define min(a,b) ((a)>(b)?(b):(a))
|
||||||
|
|
||||||
#define max(a,b) ((a)>(b)?(a):(b))
|
#define max(a,b) ((a)>(b)?(a):(b))
|
||||||
|
|
||||||
|
/* Win32 Sleep() shim for POSIX; argument is milliseconds. */
|
||||||
#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
|
#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// teste égalité de 2 chars, case insensitive
|
/* hichar: ASCII uppercasing of one char. streql: case-insensitive equality of
|
||||||
|
two chars. ASCII only; not locale-aware. */
|
||||||
#define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a))
|
#define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a))
|
||||||
|
|
||||||
#define streql(a,b) (hichar(a)==hichar(b))
|
#define streql(a,b) (hichar(a)==hichar(b))
|
||||||
|
|
||||||
// caractère maj
|
/* True if c is an ASCII uppercase letter. */
|
||||||
#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
|
#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
|
||||||
|
|
||||||
/* Library internal definictions */
|
/* Library-internal only (engine translation units that define
|
||||||
|
HTS_INTERNAL_BYTECODE); not part of the consumer surface. */
|
||||||
#ifdef HTS_INTERNAL_BYTECODE
|
#ifdef HTS_INTERNAL_BYTECODE
|
||||||
|
|
||||||
// functions
|
/* Resolve a symbol in an already-loaded dynamic module. */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define DynamicGet(handle, sym) GetProcAddress(handle, sym)
|
#define DynamicGet(handle, sym) GetProcAddress(handle, sym)
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -31,6 +31,11 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** @file htsbasenet.h
|
||||||
|
Base networking definitions: platform socket headers, the optional global
|
||||||
|
OpenSSL context, and the status-code/connection-state enumerations stored in
|
||||||
|
htsblk and lien_back. Pulled in by htsnet.h. */
|
||||||
|
|
||||||
#ifndef HTS_DEFBASENETH
|
#ifndef HTS_DEFBASENETH
|
||||||
#define HTS_DEFBASENETH
|
#define HTS_DEFBASENETH
|
||||||
|
|
||||||
@@ -80,7 +85,8 @@ extern "C" {
|
|||||||
/* OpenSSL structure */
|
/* OpenSSL structure */
|
||||||
#include <openssl/bio.h>
|
#include <openssl/bio.h>
|
||||||
|
|
||||||
/* Global SSL context */
|
/** Process-wide OpenSSL client context, created lazily on first TLS use;
|
||||||
|
shared by all connections. NULL until initialized. */
|
||||||
extern SSL_CTX *openssl_ctx;
|
extern SSL_CTX *openssl_ctx;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -31,51 +31,77 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** @file htsbauth.h
|
||||||
|
HTTP Basic authentication storage: a per-session list of (URL-prefix,
|
||||||
|
credentials) pairs, plus the cookie jar that holds it. */
|
||||||
|
|
||||||
#ifndef HTSBAUTH_DEFH
|
#ifndef HTSBAUTH_DEFH
|
||||||
#define HTSBAUTH_DEFH
|
#define HTSBAUTH_DEFH
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
// robots wizard
|
/** One stored credential: the longest-prefix match against a request's
|
||||||
|
host+path selects which auth header to send. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_bauth_chain
|
#ifndef HTS_DEF_FWSTRUCT_bauth_chain
|
||||||
#define HTS_DEF_FWSTRUCT_bauth_chain
|
#define HTS_DEF_FWSTRUCT_bauth_chain
|
||||||
typedef struct bauth_chain bauth_chain;
|
typedef struct bauth_chain bauth_chain;
|
||||||
#endif
|
#endif
|
||||||
struct bauth_chain {
|
struct bauth_chain {
|
||||||
char prefix[1024]; /* www.foo.com/secure/ */
|
char prefix[1024]; /* host + path prefix, e.g. www.foo.com/secure/ */
|
||||||
char auth[1024]; /* base-64 encoded user:pass */
|
char auth[1024]; /* base-64 encoded user:pass (Authorization payload) */
|
||||||
struct bauth_chain *next; /* next element */
|
struct bauth_chain *next; /* next element, NULL-terminated list */
|
||||||
};
|
};
|
||||||
|
|
||||||
// buffer pour les cookies et authentification
|
/** Per-session cookie jar; also holds the basic-auth list head (auth).
|
||||||
|
The head node (auth) is embedded, not heap-allocated. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_t_cookie
|
#ifndef HTS_DEF_FWSTRUCT_t_cookie
|
||||||
#define HTS_DEF_FWSTRUCT_t_cookie
|
#define HTS_DEF_FWSTRUCT_t_cookie
|
||||||
typedef struct t_cookie t_cookie;
|
typedef struct t_cookie t_cookie;
|
||||||
#endif
|
#endif
|
||||||
struct t_cookie {
|
struct t_cookie {
|
||||||
int max_len;
|
int max_len; /* capacity of data[] in use */
|
||||||
char data[32768];
|
char data[32768]; /* raw cookie store (NUL-terminated field list) */
|
||||||
bauth_chain auth;
|
bauth_chain auth; /* embedded head of the basic-auth list */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Library internal definictions */
|
/* Library internal definictions */
|
||||||
#ifdef HTS_INTERNAL_BYTECODE
|
#ifdef HTS_INTERNAL_BYTECODE
|
||||||
|
|
||||||
// cookies
|
/* cookies */
|
||||||
int cookie_add(t_cookie * cookie, const char *cook_name, const char *cook_value,
|
int cookie_add(t_cookie * cookie, const char *cook_name, const char *cook_value,
|
||||||
const char *domain, const char *path);
|
const char *domain, const char *path);
|
||||||
|
|
||||||
int cookie_del(t_cookie * cookie, const char *cook_name, const char *domain, const char *path);
|
int cookie_del(t_cookie * cookie, const char *cook_name, const char *domain, const char *path);
|
||||||
|
|
||||||
int cookie_load(t_cookie * cookie, const char *path, const char *name);
|
int cookie_load(t_cookie * cookie, const char *path, const char *name);
|
||||||
|
|
||||||
int cookie_save(t_cookie * cookie, const char *name);
|
int cookie_save(t_cookie * cookie, const char *name);
|
||||||
|
|
||||||
void cookie_insert(char *s, size_t s_size, const char *ins);
|
void cookie_insert(char *s, size_t s_size, const char *ins);
|
||||||
|
|
||||||
void cookie_delete(char *s, size_t s_size, size_t pos);
|
void cookie_delete(char *s, size_t s_size, size_t pos);
|
||||||
|
|
||||||
const char *cookie_get(char *buffer, const char *cookie_base, int param);
|
const char *cookie_get(char *buffer, const char *cookie_base, int param);
|
||||||
|
|
||||||
char *cookie_find(char *s, const char *cook_name, const char *domain, const char *path);
|
char *cookie_find(char *s, const char *cook_name, const char *domain, const char *path);
|
||||||
|
|
||||||
char *cookie_nextfield(char *a);
|
char *cookie_nextfield(char *a);
|
||||||
|
|
||||||
// basic auth
|
/* basic auth */
|
||||||
|
|
||||||
|
/** Register credentials (auth = base-64 user:pass) for the prefix derived from
|
||||||
|
adr (host) and fil (path). No-op returning 0 if cookie is NULL, allocation
|
||||||
|
fails, or a matching prefix is already stored; returns 1 on insertion. */
|
||||||
int bauth_add(t_cookie * cookie, const char *adr, const char *fil, const char *auth);
|
int bauth_add(t_cookie * cookie, const char *adr, const char *fil, const char *auth);
|
||||||
|
|
||||||
|
/** Return the stored base-64 credentials whose prefix matches adr+fil, or NULL
|
||||||
|
if none (or cookie is NULL). Returned pointer aliases the jar's bauth_chain;
|
||||||
|
caller must not free it. */
|
||||||
char *bauth_check(t_cookie * cookie, const char *adr, const char *fil);
|
char *bauth_check(t_cookie * cookie, const char *adr, const char *fil);
|
||||||
|
|
||||||
|
/** Build the auth lookup key (host + path, query string stripped, truncated at
|
||||||
|
the last '/') from adr and fil into prefix; returns prefix. Caller must
|
||||||
|
supply a buffer of HTS_URLMAXSIZE * 2 bytes. */
|
||||||
char *bauth_prefix(char *buffer, const char *adr, const char *fil);
|
char *bauth_prefix(char *buffer, const char *adr, const char *fil);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
202
src/htscore.h
202
src/htscore.h
@@ -30,7 +30,9 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
// Fichier librairie .h
|
/* Core engine declarations. Not an installed header, but part of the de-facto
|
||||||
|
API surface: external consumers (e.g. httrack-android) read these structs and
|
||||||
|
constants and call functions declared here. */
|
||||||
#ifndef HTS_CORE_DEFH
|
#ifndef HTS_CORE_DEFH
|
||||||
#define HTS_CORE_DEFH
|
#define HTS_CORE_DEFH
|
||||||
|
|
||||||
@@ -38,7 +40,7 @@ Please visit our Website: http://www.httrack.com
|
|||||||
|
|
||||||
/* specific definitions */
|
/* specific definitions */
|
||||||
#include "htsbase.h"
|
#include "htsbase.h"
|
||||||
// Includes & définitions
|
/* Includes and definitions */
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@@ -83,45 +85,45 @@ typedef struct filecreate_params filecreate_params;
|
|||||||
// options
|
// options
|
||||||
#include "htsopt.h"
|
#include "htsopt.h"
|
||||||
|
|
||||||
// INCLUDES .H PARTIES DE CODE HTTRACK
|
// HTTrack engine sub-headers
|
||||||
|
|
||||||
// routine main
|
// main entry point
|
||||||
#include "htscoremain.h"
|
#include "htscoremain.h"
|
||||||
|
|
||||||
// core routines
|
// core routines
|
||||||
#include "htscore.h"
|
#include "htscore.h"
|
||||||
|
|
||||||
// divers outils pour httrack.c
|
// misc tools for httrack.c
|
||||||
#include "htstools.h"
|
#include "htstools.h"
|
||||||
|
|
||||||
// aide pour la version en ligne de commande
|
// command-line help
|
||||||
#include "htshelp.h"
|
#include "htshelp.h"
|
||||||
|
|
||||||
// génération du nom de fichier à sauver
|
// build the on-disk save filename
|
||||||
#include "htsname.h"
|
#include "htsname.h"
|
||||||
|
|
||||||
// gestion ftp
|
// FTP support
|
||||||
#include "htsftp.h"
|
#include "htsftp.h"
|
||||||
|
|
||||||
// gestion interception d'URL
|
// URL interception
|
||||||
#include "htscatchurl.h"
|
#include "htscatchurl.h"
|
||||||
|
|
||||||
// gestion robots.txt
|
// robots.txt handling
|
||||||
#include "htsrobots.h"
|
#include "htsrobots.h"
|
||||||
|
|
||||||
// routines d'acceptation de liens
|
// link-acceptance rules
|
||||||
#include "htswizard.h"
|
#include "htswizard.h"
|
||||||
|
|
||||||
// routines de regexp
|
// regexp/filter routines
|
||||||
#include "htsfilters.h"
|
#include "htsfilters.h"
|
||||||
|
|
||||||
// gestion backing
|
// download backing (the back[] slot ring)
|
||||||
#include "htsback.h"
|
#include "htsback.h"
|
||||||
|
|
||||||
// gestion cache
|
// cache handling
|
||||||
#include "htscache.h"
|
#include "htscache.h"
|
||||||
|
|
||||||
// gestion hashage
|
// hashing
|
||||||
#include "htshash.h"
|
#include "htshash.h"
|
||||||
#include "coucal.h"
|
#include "coucal.h"
|
||||||
|
|
||||||
@@ -129,65 +131,74 @@ typedef struct filecreate_params filecreate_params;
|
|||||||
|
|
||||||
#include "hts-indextmpl.h"
|
#include "hts-indextmpl.h"
|
||||||
|
|
||||||
// adr, fil
|
/** A remote URL split into host and path, each a fixed inline buffer
|
||||||
|
(HTS_URLMAXSIZE*2 bytes, NUL-terminated). */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_lien_adrfil
|
#ifndef HTS_DEF_FWSTRUCT_lien_adrfil
|
||||||
#define HTS_DEF_FWSTRUCT_lien_adrfil
|
#define HTS_DEF_FWSTRUCT_lien_adrfil
|
||||||
typedef struct lien_adrfil lien_adrfil;
|
typedef struct lien_adrfil lien_adrfil;
|
||||||
#endif
|
#endif
|
||||||
struct lien_adrfil {
|
struct lien_adrfil {
|
||||||
char adr[HTS_URLMAXSIZE * 2]; // adresse
|
char adr[HTS_URLMAXSIZE * 2]; /**< host (address) */
|
||||||
char fil[HTS_URLMAXSIZE * 2]; // nom du fichier distant
|
char fil[HTS_URLMAXSIZE * 2]; /**< remote file path */
|
||||||
};
|
};
|
||||||
|
|
||||||
// adr, fil, save
|
/** A remote URL plus the local on-disk path it is saved to. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave
|
#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave
|
||||||
#define HTS_DEF_FWSTRUCT_lien_adrfilsave
|
#define HTS_DEF_FWSTRUCT_lien_adrfilsave
|
||||||
typedef struct lien_adrfilsave lien_adrfilsave;
|
typedef struct lien_adrfilsave lien_adrfilsave;
|
||||||
#endif
|
#endif
|
||||||
struct lien_adrfilsave {
|
struct lien_adrfilsave {
|
||||||
lien_adrfil af;
|
lien_adrfil af;
|
||||||
char save[HTS_URLMAXSIZE * 2]; // nom à sauver sur disque (avec chemin éventuel)
|
char save[HTS_URLMAXSIZE * 2]; /**< local save path (with directory) */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** The download-slot ring: the set of concurrent transfers in flight.
|
||||||
|
Allocated/owned by the engine; consumers (status callbacks, the loop)
|
||||||
|
read it but do not resize or free it. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_struct_back
|
#ifndef HTS_DEF_FWSTRUCT_struct_back
|
||||||
#define HTS_DEF_FWSTRUCT_struct_back
|
#define HTS_DEF_FWSTRUCT_struct_back
|
||||||
typedef struct struct_back struct_back;
|
typedef struct struct_back struct_back;
|
||||||
#endif
|
#endif
|
||||||
struct struct_back {
|
struct struct_back {
|
||||||
lien_back *lnk;
|
lien_back *lnk; /**< slot array, valid indices [0..count-1]
|
||||||
int count;
|
(count+1 entries allocated); a slot is
|
||||||
coucal ready;
|
active iff lnk[i].status != STATUS_FREE.
|
||||||
LLint ready_size_bytes;
|
See struct lien_back in htsopt.h and the
|
||||||
|
STATUS_* codes in htsbasenet.h. */
|
||||||
|
int count; /**< number of usable slots (back_max) */
|
||||||
|
coucal ready; /**< index of slots whose transfer completed */
|
||||||
|
LLint ready_size_bytes; /**< total bytes buffered in completed slots */
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct cache_back_zip_entry cache_back_zip_entry;
|
typedef struct cache_back_zip_entry cache_back_zip_entry;
|
||||||
|
|
||||||
// cache
|
/** Open handle to the mirror cache (the read-from-old / write-to-new state
|
||||||
|
used to resume and to avoid re-fetching unchanged files). Engine-owned. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_cache_back
|
#ifndef HTS_DEF_FWSTRUCT_cache_back
|
||||||
#define HTS_DEF_FWSTRUCT_cache_back
|
#define HTS_DEF_FWSTRUCT_cache_back
|
||||||
typedef struct cache_back cache_back;
|
typedef struct cache_back cache_back;
|
||||||
#endif
|
#endif
|
||||||
struct cache_back {
|
struct cache_back {
|
||||||
int version; // 0 ou 1
|
int version; /**< cache-file format version being read */
|
||||||
/* */
|
/* */
|
||||||
int type;
|
int type;
|
||||||
int ro;
|
int ro; /**< read-only: no new cache is written */
|
||||||
FILE *dat, *ndx, *olddat;
|
FILE *dat, *ndx, *olddat; /**< new data, new index, old data files */
|
||||||
char *use; // liste des adr+fil
|
char *use; /**< in-memory list of cached adr+fil keys */
|
||||||
FILE *lst; // liste des fichiers pour la "purge"
|
FILE *lst; /**< file list, used for purge */
|
||||||
FILE *txt; // liste des fichiers (info)
|
FILE *txt; /**< human-readable file list (info) */
|
||||||
char lastmodified[256];
|
char lastmodified[256];
|
||||||
// HASH
|
// HASH
|
||||||
coucal hashtable;
|
coucal hashtable;
|
||||||
// HASH for tests (naming subsystem)
|
// HASH for tests (naming subsystem)
|
||||||
coucal cached_tests;
|
coucal cached_tests;
|
||||||
// fichiers log optionnels
|
/* optional log files */
|
||||||
FILE *log;
|
FILE *log;
|
||||||
FILE *errlog;
|
FILE *errlog;
|
||||||
// variables
|
/* read-ahead cursors into the old cache */
|
||||||
int ptr_ant; // pointeur pour anticiper
|
int ptr_ant;
|
||||||
int ptr_last; // pointeur pour anticiper
|
int ptr_last;
|
||||||
//
|
/* ZIP-backed cache backend (newer format) */
|
||||||
void *zipInput;
|
void *zipInput;
|
||||||
void *zipOutput;
|
void *zipOutput;
|
||||||
cache_back_zip_entry *zipEntries;
|
cache_back_zip_entry *zipEntries;
|
||||||
@@ -199,16 +210,19 @@ struct cache_back {
|
|||||||
#define HTS_DEF_FWSTRUCT_hash_struct
|
#define HTS_DEF_FWSTRUCT_hash_struct
|
||||||
typedef struct hash_struct hash_struct;
|
typedef struct hash_struct hash_struct;
|
||||||
#endif
|
#endif
|
||||||
|
/** Lookup indexes over the link heap: map save-name / URL back to a link, so a
|
||||||
|
URL seen twice resolves to one entry. The coucal tables index into liens;
|
||||||
|
they do not own the links. */
|
||||||
struct hash_struct {
|
struct hash_struct {
|
||||||
/* Links big array reference */
|
/* points at the engine's link array (opt->liens); not owned */
|
||||||
const lien_url *const*const*liens;
|
const lien_url *const*const*liens;
|
||||||
/* Savename (case insensitive ; lowercased) */
|
/* save-name -> link index (case-insensitive: keys lowercased) */
|
||||||
coucal sav;
|
coucal sav;
|
||||||
/* Address and path */
|
/* address+path -> link index */
|
||||||
coucal adrfil;
|
coucal adrfil;
|
||||||
/* Former address and path */
|
/* former address+path -> link index (renamed/moved entries) */
|
||||||
coucal former_adrfil;
|
coucal former_adrfil;
|
||||||
/** Buffers **/
|
/* scratch buffers reused across lookups (not reentrant) */
|
||||||
int normalized;
|
int normalized;
|
||||||
char normfil[HTS_URLMAXSIZE * 2];
|
char normfil[HTS_URLMAXSIZE * 2];
|
||||||
char normfil2[HTS_URLMAXSIZE * 2];
|
char normfil2[HTS_URLMAXSIZE * 2];
|
||||||
@@ -219,113 +233,175 @@ struct hash_struct {
|
|||||||
#define HTS_DEF_FWSTRUCT_filecreate_params
|
#define HTS_DEF_FWSTRUCT_filecreate_params
|
||||||
typedef struct filecreate_params filecreate_params;
|
typedef struct filecreate_params filecreate_params;
|
||||||
#endif
|
#endif
|
||||||
|
/** Parameters threaded through file-creation callbacks (filenote). */
|
||||||
struct filecreate_params {
|
struct filecreate_params {
|
||||||
FILE *lst;
|
FILE *lst; /**< open file list to append created paths to */
|
||||||
char path[HTS_URLMAXSIZE * 2];
|
char path[HTS_URLMAXSIZE * 2];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Access macros. */
|
/* Convenience accessors over the link heap; assume `opt` (and where used,
|
||||||
|
`ptr`/`parent_relative`) are in scope. heap(N) is the Nth link;
|
||||||
|
heap_top_index() is the last recorded link's index. */
|
||||||
#define heap(N) (opt->liens[N])
|
#define heap(N) (opt->liens[N])
|
||||||
|
|
||||||
#define heap_top_index() (opt->lien_tot - 1)
|
#define heap_top_index() (opt->lien_tot - 1)
|
||||||
|
|
||||||
#define heap_top() (heap(heap_top_index()))
|
#define heap_top() (heap(heap_top_index()))
|
||||||
|
|
||||||
#define urladr() (heap(ptr)->adr)
|
#define urladr() (heap(ptr)->adr)
|
||||||
|
|
||||||
#define urlfil() (heap(ptr)->fil)
|
#define urlfil() (heap(ptr)->fil)
|
||||||
|
|
||||||
#define savename() (heap(ptr)->sav)
|
#define savename() (heap(ptr)->sav)
|
||||||
|
|
||||||
#define parenturladr() (heap(heap(ptr)->precedent)->adr)
|
#define parenturladr() (heap(heap(ptr)->precedent)->adr)
|
||||||
|
|
||||||
#define parenturlfil() (heap(heap(ptr)->precedent)->fil)
|
#define parenturlfil() (heap(heap(ptr)->precedent)->fil)
|
||||||
|
|
||||||
#define parentsavename() (heap(heap(ptr)->precedent)->sav)
|
#define parentsavename() (heap(heap(ptr)->precedent)->sav)
|
||||||
|
|
||||||
#define relativeurladr() ((!parent_relative)?urladr():parenturladr())
|
#define relativeurladr() ((!parent_relative)?urladr():parenturladr())
|
||||||
|
|
||||||
#define relativeurlfil() ((!parent_relative)?urlfil():parenturlfil())
|
#define relativeurlfil() ((!parent_relative)?urlfil():parenturlfil())
|
||||||
|
|
||||||
#define relativesavename() ((!parent_relative)?savename():parentsavename())
|
#define relativesavename() ((!parent_relative)?savename():parentsavename())
|
||||||
|
|
||||||
/* Library internal definictions */
|
/* Library-internal helpers (engine-only, HTS_INTERNAL_BYTECODE). */
|
||||||
#ifdef HTS_INTERNAL_BYTECODE
|
#ifdef HTS_INTERNAL_BYTECODE
|
||||||
|
|
||||||
|
/* True if a new cache is being written (plain or zip backend). */
|
||||||
HTS_STATIC int cache_writable(cache_back * cache) {
|
HTS_STATIC int cache_writable(cache_back * cache) {
|
||||||
return (cache != NULL && (cache->dat != NULL || cache->zipOutput != NULL));
|
return (cache != NULL && (cache->dat != NULL || cache->zipOutput != NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* True if an old cache is available to read (plain or zip backend). */
|
||||||
HTS_STATIC int cache_readable(cache_back * cache) {
|
HTS_STATIC int cache_readable(cache_back * cache) {
|
||||||
return (cache != NULL && (cache->olddat != NULL || cache->zipInput != NULL));
|
return (cache != NULL && (cache->olddat != NULL || cache->zipInput != NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Fonctions
|
// Functions
|
||||||
|
|
||||||
// INCLUDES .H PARTIES DE CODE HTTRACK
|
/* Library-internal only (engine TUs). */
|
||||||
|
|
||||||
/* Library internal definictions */
|
|
||||||
#ifdef HTS_INTERNAL_BYTECODE
|
#ifdef HTS_INTERNAL_BYTECODE
|
||||||
|
|
||||||
char *hts_cancel_file_pop(httrackp * opt);
|
char *hts_cancel_file_pop(httrackp * opt);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// add a link on the heap
|
/* Record a link on the heap. All strings are copied (caller keeps ownership).
|
||||||
|
Returns 1 on success, 0 if the link limit (opt->maxlink) is reached. */
|
||||||
int hts_record_link(httrackp * opt,
|
int hts_record_link(httrackp * opt,
|
||||||
const char *address, const char *file, const char *save,
|
const char *address, const char *file, const char *save,
|
||||||
const char *ref_address, const char *ref_file,
|
const char *ref_address, const char *ref_file,
|
||||||
const char *codebase);
|
const char *codebase);
|
||||||
|
|
||||||
// index of the latest added link
|
/* Index of the most recently recorded link. */
|
||||||
size_t hts_record_link_latest(httrackp *opt);
|
size_t hts_record_link_latest(httrackp *opt);
|
||||||
|
|
||||||
// invalidate an entry
|
/* Mark link at index lpos as not to be processed (sets pass2 = -1). */
|
||||||
void hts_invalidate_link(httrackp * opt, int lpos);
|
void hts_invalidate_link(httrackp * opt, int lpos);
|
||||||
|
|
||||||
// wipe all records
|
/* Reset / free the engine's link heap. */
|
||||||
void hts_record_init(httrackp *opt);
|
void hts_record_init(httrackp *opt);
|
||||||
|
|
||||||
void hts_record_free(httrackp *opt);
|
void hts_record_free(httrackp *opt);
|
||||||
|
|
||||||
//int httpmirror(char* url,int level,httrackp opt);
|
/* Run the mirror for the given start URL(s) under opt. Top-level engine entry.
|
||||||
|
*/
|
||||||
int httpmirror(char *url1, httrackp * opt);
|
int httpmirror(char *url1, httrackp * opt);
|
||||||
|
|
||||||
|
/* Write len bytes of adr to local path s. url_adr/url_fil (may be NULL) name
|
||||||
|
the source URL for logging/notification. */
|
||||||
int filesave(httrackp * opt, const char *adr, int len, const char *s,
|
int filesave(httrackp * opt, const char *adr, int len, const char *s,
|
||||||
const char *url_adr /* = NULL */ ,
|
const char *url_adr /* = NULL */ ,
|
||||||
const char *url_fil /* = NULL */ );
|
const char *url_fil /* = NULL */ );
|
||||||
|
|
||||||
char *hts_cancel_file_pop(httrackp * opt);
|
char *hts_cancel_file_pop(httrackp * opt);
|
||||||
|
|
||||||
int check_fatal_io_errno(void);
|
int check_fatal_io_errno(void);
|
||||||
|
|
||||||
int engine_stats(void);
|
int engine_stats(void);
|
||||||
|
|
||||||
void host_ban(httrackp * opt, int ptr, struct_back * sback, const char *host);
|
void host_ban(httrackp * opt, int ptr, struct_back * sback, const char *host);
|
||||||
|
|
||||||
|
/* Open local file s for writing (filecreate, truncate) or appending
|
||||||
|
(fileappend), creating parent directories as needed. Return an open FILE*
|
||||||
|
the caller must fclose(), or NULL on failure. */
|
||||||
FILE *filecreate(filenote_strc * strct, const char *s);
|
FILE *filecreate(filenote_strc * strct, const char *s);
|
||||||
|
|
||||||
FILE *fileappend(filenote_strc * strct, const char *s);
|
FILE *fileappend(filenote_strc * strct, const char *s);
|
||||||
|
|
||||||
|
/* Create an empty file, return 1 on success, 0 on failure. */
|
||||||
int filecreateempty(filenote_strc * strct, const char *filename);
|
int filecreateempty(filenote_strc * strct, const char *filename);
|
||||||
|
|
||||||
int filenote(filenote_strc * strct, const char *s, filecreate_params * params);
|
int filenote(filenote_strc * strct, const char *s, filecreate_params * params);
|
||||||
|
|
||||||
void file_notify(httrackp * opt, const char *adr, const char *fil,
|
void file_notify(httrackp * opt, const char *adr, const char *fil,
|
||||||
const char *save, int create, int modify, int wasupdated);
|
const char *save, int create, int modify, int wasupdated);
|
||||||
|
|
||||||
void usercommand(httrackp * opt, int exe, const char *cmd, const char *file,
|
void usercommand(httrackp * opt, int exe, const char *cmd, const char *file,
|
||||||
const char *adr, const char *fil);
|
const char *adr, const char *fil);
|
||||||
|
|
||||||
void usercommand_exe(const char *cmd, const char *file);
|
void usercommand_exe(const char *cmd, const char *file);
|
||||||
|
|
||||||
int filters_init(char ***ptrfilters, int maxfilter, int filterinc);
|
int filters_init(char ***ptrfilters, int maxfilter, int filterinc);
|
||||||
|
|
||||||
int fspc(httrackp * opt, FILE * fp, const char *type);
|
int fspc(httrackp * opt, FILE * fp, const char *type);
|
||||||
|
|
||||||
char *next_token(char *p, int flag);
|
char *next_token(char *p, int flag);
|
||||||
|
|
||||||
//
|
/* Read a whole file into a freshly malloc'd, NUL-terminated buffer; the caller
|
||||||
|
owns it and must release it with freet(). Return NULL on missing/unreadable
|
||||||
|
file (readfile_or substitutes defaultdata instead). The byte content is NOT
|
||||||
|
transcoded except readfile_utf8, which expects a UTF-8 path. readfile2
|
||||||
|
reports the byte size (excluding the NUL) via *size when non-NULL. */
|
||||||
char *readfile(const char *fil);
|
char *readfile(const char *fil);
|
||||||
|
|
||||||
char *readfile2(const char *fil, LLint * size);
|
char *readfile2(const char *fil, LLint * size);
|
||||||
|
|
||||||
char *readfile_utf8(const char *fil);
|
char *readfile_utf8(const char *fil);
|
||||||
|
|
||||||
char *readfile_or(const char *fil, const char *defaultdata);
|
char *readfile_or(const char *fil, const char *defaultdata);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
void check_rate(TStamp stat_timestart, int maxrate);
|
void check_rate(TStamp stat_timestart, int maxrate);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// liens
|
// links
|
||||||
int liens_record(char *adr, char *fil, char *save, char *former_adr,
|
int liens_record(char *adr, char *fil, char *save, char *former_adr,
|
||||||
char *former_fil, char *codebase);
|
char *former_fil, char *codebase);
|
||||||
|
|
||||||
// backing, routines externes
|
/* Backing (download-slot) scheduler. Operate on the back[] ring (struct_back).
|
||||||
|
Not thread-safe; call from the single crawl loop. */
|
||||||
|
|
||||||
|
/* How many new sockets may be opened now, honoring maxsoc and the maxconn rate
|
||||||
|
limit (>=0). _strict ignores reserved-slot headroom; the plain form leaves
|
||||||
|
room for naming tests and stops at 0 when the stack is nearly full. */
|
||||||
int back_pluggable_sockets(struct_back * sback, httrackp * opt);
|
int back_pluggable_sockets(struct_back * sback, httrackp * opt);
|
||||||
|
|
||||||
int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt);
|
int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt);
|
||||||
|
|
||||||
|
/* Schedule more links from the heap into free slots. Returns the number queued,
|
||||||
|
or <=0 if none could be added (no free slot / paused / stopped). */
|
||||||
int back_fill(struct_back * sback, httrackp * opt, cache_back * cache,
|
int back_fill(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||||
int ptr, int numero_passe);
|
int ptr, int numero_passe);
|
||||||
int backlinks_done(const struct_back * sback, lien_url ** liens,
|
|
||||||
int lien_tot, int ptr);
|
/* Count of links already finished (in background or served from cache). */
|
||||||
|
int backlinks_done(const struct_back *sback, lien_url **liens, int lien_tot,
|
||||||
|
int ptr);
|
||||||
|
|
||||||
|
/* Like back_fill, but a no-op (returns -1) when in-memory buffered data already
|
||||||
|
exceeds opt->maxcache. */
|
||||||
int back_fillmax(struct_back * sback, httrackp * opt, cache_back * cache,
|
int back_fillmax(struct_back * sback, httrackp * opt, cache_back * cache,
|
||||||
int ptr, int numero_passe);
|
int ptr, int numero_passe);
|
||||||
|
|
||||||
|
/* Interactive prompt: continue an interrupted mirror? Returns nonzero to go on.
|
||||||
|
*/
|
||||||
int ask_continue(httrackp * opt);
|
int ask_continue(httrackp * opt);
|
||||||
|
|
||||||
|
/* Number of decimal digits in n. */
|
||||||
int nombre_digit(int n);
|
int nombre_digit(int n);
|
||||||
|
|
||||||
// Java
|
// Java
|
||||||
@@ -336,17 +412,23 @@ int hts_add_file(char *file, int file_position);
|
|||||||
// Polling
|
// Polling
|
||||||
#if HTS_POLL
|
#if HTS_POLL
|
||||||
int check_flot(T_SOC s);
|
int check_flot(T_SOC s);
|
||||||
|
|
||||||
int check_stdin(void);
|
int check_stdin(void);
|
||||||
|
|
||||||
int read_stdin(char *s, int max);
|
int read_stdin(char *s, int max);
|
||||||
#endif
|
#endif
|
||||||
|
/* Socket readiness probes: nonzero if the socket has an error / has data. */
|
||||||
int check_sockerror(T_SOC s);
|
int check_sockerror(T_SOC s);
|
||||||
|
|
||||||
int check_sockdata(T_SOC s);
|
int check_sockdata(T_SOC s);
|
||||||
|
|
||||||
/* external modules */
|
/* external modules: register a link discovered by a parser plugin. */
|
||||||
int htsAddLink(htsmoduleStruct * str, char *link);
|
int htsAddLink(htsmoduleStruct * str, char *link);
|
||||||
|
|
||||||
// Void
|
/* No-op function (used as a do-nothing callback / to defeat optimizers). */
|
||||||
void voidf(void);
|
void voidf(void);
|
||||||
|
|
||||||
|
/* HTML marker comment marking where the top index is spliced. */
|
||||||
#define HTS_TOPINDEX "TOP_INDEX_HTTRACK"
|
#define HTS_TOPINDEX "TOP_INDEX_HTTRACK"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
113
src/htsdefines.h
113
src/htsdefines.h
@@ -30,11 +30,16 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
// Fichier librairie .h
|
/** @file htsdefines.h
|
||||||
|
* Public callback prototypes and the wrapper/plug-in interface: the function
|
||||||
|
* pointer types a parser or wrapper module implements, and the callback table
|
||||||
|
* the engine dispatches through. */
|
||||||
#ifndef HTS_DEFINES_DEFH
|
#ifndef HTS_DEFINES_DEFH
|
||||||
#define HTS_DEFINES_DEFH
|
#define HTS_DEFINES_DEFH
|
||||||
|
|
||||||
/* Forward definitions */
|
/* Forward declarations of engine structs, so this header is usable without
|
||||||
|
pulling in their full definitions. Each is guarded so multiple public
|
||||||
|
headers can repeat the typedef without clashing. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
||||||
#define HTS_DEF_FWSTRUCT_httrackp
|
#define HTS_DEF_FWSTRUCT_httrackp
|
||||||
typedef struct httrackp httrackp;
|
typedef struct httrackp httrackp;
|
||||||
@@ -64,7 +69,8 @@ typedef struct t_hts_callbackarg t_hts_callbackarg;
|
|||||||
typedef struct t_hts_callbackarg t_hts_callbackarg;
|
typedef struct t_hts_callbackarg t_hts_callbackarg;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* External callbacks */
|
/* Marks a symbol an external wrapper module exports back to the engine
|
||||||
|
(dllexport on Windows, nothing elsewhere). */
|
||||||
#ifndef EXTERNAL_FUNCTION
|
#ifndef EXTERNAL_FUNCTION
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define EXTERNAL_FUNCTION __declspec(dllexport)
|
#define EXTERNAL_FUNCTION __declspec(dllexport)
|
||||||
@@ -73,78 +79,141 @@ typedef struct t_hts_callbackarg t_hts_callbackarg;
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* --wrapper plug function prototype */
|
/* Entry points of a --wrapper plug-in: hts_plug(opt, argv) is called once to
|
||||||
|
install the wrapper (argv is the wrapper's argument string), hts_unplug(opt)
|
||||||
|
once to tear it down. Both return non-zero on success. */
|
||||||
typedef int (*t_hts_plug) (httrackp * opt, const char *argv);
|
typedef int (*t_hts_plug) (httrackp * opt, const char *argv);
|
||||||
|
|
||||||
typedef int (*t_hts_unplug) (httrackp * opt);
|
typedef int (*t_hts_unplug) (httrackp * opt);
|
||||||
|
|
||||||
/* htsopt function callbacks definitions */
|
/* Engine callback prototypes. Each is one hook the engine fires at a defined
|
||||||
|
point of a mirror; a wrapper installs the ones it cares about in the
|
||||||
|
callback table below. carg carries the user-defined argument chain; int
|
||||||
|
returns are 1 to continue/accept, 0 to abort/refuse unless noted. */
|
||||||
|
|
||||||
|
/* Called once when the wrapper is installed; allocate per-run state here. */
|
||||||
typedef void (*t_hts_htmlcheck_init) (t_hts_callbackarg * carg);
|
typedef void (*t_hts_htmlcheck_init) (t_hts_callbackarg * carg);
|
||||||
|
|
||||||
|
/* Called once when the wrapper is removed; release per-run state here. */
|
||||||
typedef void (*t_hts_htmlcheck_uninit) (t_hts_callbackarg * carg);
|
typedef void (*t_hts_htmlcheck_uninit) (t_hts_callbackarg * carg);
|
||||||
|
|
||||||
|
/* Fired at the start of a mirror, after options are parsed. */
|
||||||
typedef int (*t_hts_htmlcheck_start) (t_hts_callbackarg * carg, httrackp * opt);
|
typedef int (*t_hts_htmlcheck_start) (t_hts_callbackarg * carg, httrackp * opt);
|
||||||
|
|
||||||
|
/* Fired at the end of a mirror. */
|
||||||
typedef int (*t_hts_htmlcheck_end) (t_hts_callbackarg * carg, httrackp * opt);
|
typedef int (*t_hts_htmlcheck_end) (t_hts_callbackarg * carg, httrackp * opt);
|
||||||
|
|
||||||
|
/* Fired while options are being changed, to validate or adjust them. */
|
||||||
typedef int (*t_hts_htmlcheck_chopt) (t_hts_callbackarg * carg, httrackp * opt);
|
typedef int (*t_hts_htmlcheck_chopt) (t_hts_callbackarg * carg, httrackp * opt);
|
||||||
|
|
||||||
|
/* Rewrite hook over an in-memory page: the html and len arguments point at the
|
||||||
|
buffer and its length (the callback may reallocate and resize it),
|
||||||
|
url_adresse and url_fichier name it. */
|
||||||
typedef int (*t_hts_htmlcheck_process) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_process) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, char **html, int *len,
|
httrackp * opt, char **html, int *len,
|
||||||
const char *url_adresse,
|
const char *url_adresse,
|
||||||
const char *url_fichier);
|
const char *url_fichier);
|
||||||
|
|
||||||
|
/* Same shape as process, run before HTML parsing. */
|
||||||
typedef t_hts_htmlcheck_process t_hts_htmlcheck_preprocess;
|
typedef t_hts_htmlcheck_process t_hts_htmlcheck_preprocess;
|
||||||
|
|
||||||
|
/* Same shape as process, run after HTML parsing. */
|
||||||
typedef t_hts_htmlcheck_process t_hts_htmlcheck_postprocess;
|
typedef t_hts_htmlcheck_process t_hts_htmlcheck_postprocess;
|
||||||
|
|
||||||
|
/* Inspect a page (read-only html/len) without rewriting it. */
|
||||||
typedef int (*t_hts_htmlcheck_check_html) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_check_html) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, char *html, int len,
|
httrackp * opt, char *html, int len,
|
||||||
const char *url_adresse,
|
const char *url_adresse,
|
||||||
const char *url_fichier);
|
const char *url_fichier);
|
||||||
|
|
||||||
|
/* Answer an engine query identified by 'question'; returns the answer string
|
||||||
|
(owned by the callback, must stay valid until the next call). */
|
||||||
typedef const char *(*t_hts_htmlcheck_query) (t_hts_callbackarg * carg,
|
typedef const char *(*t_hts_htmlcheck_query) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt,
|
httrackp * opt,
|
||||||
const char *question);
|
const char *question);
|
||||||
|
|
||||||
|
/* Second query channel, same contract as query. */
|
||||||
typedef const char *(*t_hts_htmlcheck_query2) (t_hts_callbackarg * carg,
|
typedef const char *(*t_hts_htmlcheck_query2) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt,
|
httrackp * opt,
|
||||||
const char *question);
|
const char *question);
|
||||||
|
|
||||||
|
/* Third query channel, same contract as query. */
|
||||||
typedef const char *(*t_hts_htmlcheck_query3) (t_hts_callbackarg * carg,
|
typedef const char *(*t_hts_htmlcheck_query3) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt,
|
httrackp * opt,
|
||||||
const char *question);
|
const char *question);
|
||||||
|
|
||||||
|
/* Per-tick progress hook: 'back' is the transfer slot array of 'back_max'
|
||||||
|
entries, back_index the active one; lien_tot/lien_ntot and stats report
|
||||||
|
queue size and running totals, stat_time the elapsed time. */
|
||||||
typedef int (*t_hts_htmlcheck_loop) (t_hts_callbackarg * carg, httrackp * opt,
|
typedef int (*t_hts_htmlcheck_loop) (t_hts_callbackarg * carg, httrackp * opt,
|
||||||
lien_back * back, int back_max,
|
lien_back * back, int back_max,
|
||||||
int back_index, int lien_tot,
|
int back_index, int lien_tot,
|
||||||
int lien_ntot, int stat_time,
|
int lien_ntot, int stat_time,
|
||||||
hts_stat_struct * stats);
|
hts_stat_struct * stats);
|
||||||
|
|
||||||
|
/* Veto a link (adr host, fil path) after its transfer; status is the result.
|
||||||
|
Return 0 to drop the link. */
|
||||||
typedef int (*t_hts_htmlcheck_check_link) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_check_link) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, const char *adr,
|
httrackp * opt, const char *adr,
|
||||||
const char *fil, int status);
|
const char *fil, int status);
|
||||||
|
|
||||||
|
/* Veto a link by its MIME type before download; return 0 to skip it. */
|
||||||
typedef int (*t_hts_htmlcheck_check_mime) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_check_mime) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, const char *adr,
|
httrackp * opt, const char *adr,
|
||||||
const char *fil, const char *mime,
|
const char *fil, const char *mime,
|
||||||
int status);
|
int status);
|
||||||
|
|
||||||
|
/* Fired when the mirror pauses, waiting on 'lockfile' to be removed. */
|
||||||
typedef void (*t_hts_htmlcheck_pause) (t_hts_callbackarg * carg, httrackp * opt,
|
typedef void (*t_hts_htmlcheck_pause) (t_hts_callbackarg * carg, httrackp * opt,
|
||||||
const char *lockfile);
|
const char *lockfile);
|
||||||
|
|
||||||
|
/* Fired after a file is written to disk; 'file' is the local path. */
|
||||||
typedef void (*t_hts_htmlcheck_filesave) (t_hts_callbackarg * carg,
|
typedef void (*t_hts_htmlcheck_filesave) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, const char *file);
|
httrackp * opt, const char *file);
|
||||||
|
|
||||||
|
/* Richer file-saved notification: source host/filename, local path, and flags
|
||||||
|
telling whether the file is new, modified, or left unchanged. */
|
||||||
typedef void (*t_hts_htmlcheck_filesave2) (t_hts_callbackarg * carg,
|
typedef void (*t_hts_htmlcheck_filesave2) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, const char *hostname,
|
httrackp * opt, const char *hostname,
|
||||||
const char *filename,
|
const char *filename,
|
||||||
const char *localfile, int is_new,
|
const char *localfile, int is_new,
|
||||||
int is_modified, int not_updated);
|
int is_modified, int not_updated);
|
||||||
|
|
||||||
|
/* Fired for each link parsed out of a page; 'link' may be edited in place. */
|
||||||
typedef int (*t_hts_htmlcheck_linkdetected) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_linkdetected) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, char *link);
|
httrackp * opt, char *link);
|
||||||
|
|
||||||
|
/* As linkdetected, plus tag_start, the markup the link was found in. */
|
||||||
typedef int (*t_hts_htmlcheck_linkdetected2) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_linkdetected2) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, char *link,
|
httrackp * opt, char *link,
|
||||||
const char *tag_start);
|
const char *tag_start);
|
||||||
|
|
||||||
|
/* Fired on each transfer-status change of slot 'back'. */
|
||||||
typedef int (*t_hts_htmlcheck_xfrstatus) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_xfrstatus) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, lien_back * back);
|
httrackp * opt, lien_back * back);
|
||||||
|
|
||||||
|
/* Choose the local save path for a URL; write it into 'save'. adr/fil name the
|
||||||
|
target, referer_adr/referer_fil the page that linked it. */
|
||||||
typedef int (*t_hts_htmlcheck_savename) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_savename) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt,
|
httrackp * opt,
|
||||||
const char *adr_complete,
|
const char *adr_complete,
|
||||||
const char *fil_complete,
|
const char *fil_complete,
|
||||||
const char *referer_adr,
|
const char *referer_adr,
|
||||||
const char *referer_fil, char *save);
|
const char *referer_fil, char *save);
|
||||||
|
|
||||||
|
/* Extended save-name hook, same signature as savename. */
|
||||||
typedef t_hts_htmlcheck_savename t_hts_htmlcheck_extsavename;
|
typedef t_hts_htmlcheck_savename t_hts_htmlcheck_extsavename;
|
||||||
|
|
||||||
|
/* Inspect or edit the outgoing request headers in 'buff' before they are sent.
|
||||||
|
*/
|
||||||
typedef int (*t_hts_htmlcheck_sendhead) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_sendhead) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, char *buff,
|
httrackp * opt, char *buff,
|
||||||
const char *adr, const char *fil,
|
const char *adr, const char *fil,
|
||||||
const char *referer_adr,
|
const char *referer_adr,
|
||||||
const char *referer_fil,
|
const char *referer_fil,
|
||||||
htsblk * outgoing);
|
htsblk * outgoing);
|
||||||
|
|
||||||
|
/* Inspect the incoming response headers in 'buff' after they are received. */
|
||||||
typedef int (*t_hts_htmlcheck_receivehead) (t_hts_callbackarg * carg,
|
typedef int (*t_hts_htmlcheck_receivehead) (t_hts_callbackarg * carg,
|
||||||
httrackp * opt, char *buff,
|
httrackp * opt, char *buff,
|
||||||
const char *adr, const char *fil,
|
const char *adr, const char *fil,
|
||||||
@@ -152,9 +221,11 @@ typedef int (*t_hts_htmlcheck_receivehead) (t_hts_callbackarg * carg,
|
|||||||
const char *referer_fil,
|
const char *referer_fil,
|
||||||
htsblk * incoming);
|
htsblk * incoming);
|
||||||
|
|
||||||
/* External additional parsing module(s) */
|
/* External parser module hooks: detect claims a document type (return 1 to
|
||||||
|
take it), parse then extracts its links. 'str' carries the document. */
|
||||||
typedef int (*t_hts_htmlcheck_detect) (t_hts_callbackarg * carg, httrackp * opt,
|
typedef int (*t_hts_htmlcheck_detect) (t_hts_callbackarg * carg, httrackp * opt,
|
||||||
htsmoduleStruct * str);
|
htsmoduleStruct * str);
|
||||||
|
|
||||||
typedef int (*t_hts_htmlcheck_parse) (t_hts_callbackarg * carg, httrackp * opt,
|
typedef int (*t_hts_htmlcheck_parse) (t_hts_callbackarg * carg, httrackp * opt,
|
||||||
htsmoduleStruct * str);
|
htsmoduleStruct * str);
|
||||||
|
|
||||||
@@ -164,20 +235,24 @@ typedef int (*t_hts_htmlcheck_parse) (t_hts_callbackarg * carg, httrackp * opt,
|
|||||||
typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks;
|
typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Callabck array */
|
/* Declares one named callback slot: its function pointer (typed
|
||||||
|
t_hts_htmlcheck_<NAME>) paired with the carg passed to it. */
|
||||||
#define DEFCALLBACK(NAME) \
|
#define DEFCALLBACK(NAME) \
|
||||||
struct NAME { \
|
struct NAME { \
|
||||||
t_hts_htmlcheck_ ##NAME fun; \
|
t_hts_htmlcheck_ ##NAME fun; \
|
||||||
t_hts_callbackarg *carg; \
|
t_hts_callbackarg *carg; \
|
||||||
} NAME
|
} NAME
|
||||||
|
|
||||||
/* Callback items */
|
/* Generic, type-erased callback slot used where the hook type is opaque. */
|
||||||
typedef void *t_hts_htmlcheck_t_hts_htmlcheck_callbacks_item;
|
typedef void *t_hts_htmlcheck_t_hts_htmlcheck_callbacks_item;
|
||||||
|
|
||||||
typedef DEFCALLBACK(t_hts_htmlcheck_callbacks_item);
|
typedef DEFCALLBACK(t_hts_htmlcheck_callbacks_item);
|
||||||
|
|
||||||
/* Linked list, which should be used for the 'arg' user-defined argument */
|
/* Per-callback argument node. Wrappers chain these so a new hook can wrap an
|
||||||
|
existing one: userdef is the wrapper's own data, prev points back to the
|
||||||
|
function and carg it displaced (call it to keep the previous behavior). */
|
||||||
struct t_hts_callbackarg {
|
struct t_hts_callbackarg {
|
||||||
/* User-defined agument for the called function */
|
/* User-defined argument for the called function */
|
||||||
void *userdef;
|
void *userdef;
|
||||||
|
|
||||||
/* Previous function, if any (fun != NULL) */
|
/* Previous function, if any (fun != NULL) */
|
||||||
@@ -187,7 +262,9 @@ struct t_hts_callbackarg {
|
|||||||
} prev;
|
} prev;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Callback structure */
|
/* The full callback table, one slot per hook; installed in httrackp options
|
||||||
|
and dispatched by the engine. The trailing comments mark the API version a
|
||||||
|
slot first appeared in. */
|
||||||
struct t_hts_htmlcheck_callbacks {
|
struct t_hts_htmlcheck_callbacks {
|
||||||
/* v3.41 */
|
/* v3.41 */
|
||||||
DEFCALLBACK(init);
|
DEFCALLBACK(init);
|
||||||
@@ -219,9 +296,11 @@ struct t_hts_htmlcheck_callbacks {
|
|||||||
DEFCALLBACK(extsavename);
|
DEFCALLBACK(extsavename);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Library internal definitions */
|
/* Library-internal helpers, compiled only inside the engine. */
|
||||||
#ifdef HTS_INTERNAL_BYTECODE
|
#ifdef HTS_INTERNAL_BYTECODE
|
||||||
|
|
||||||
|
/* Maps a callback slot's name to its byte offset in the callback table, so a
|
||||||
|
slot can be installed by name. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_t_hts_callback_ref
|
#ifndef HTS_DEF_FWSTRUCT_t_hts_callback_ref
|
||||||
#define HTS_DEF_FWSTRUCT_t_hts_callback_ref
|
#define HTS_DEF_FWSTRUCT_t_hts_callback_ref
|
||||||
typedef struct t_hts_callback_ref t_hts_callback_ref;
|
typedef struct t_hts_callback_ref t_hts_callback_ref;
|
||||||
@@ -235,18 +314,26 @@ struct t_hts_callback_ref {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Default (no-op) callback table the engine starts from. */
|
||||||
extern const t_hts_htmlcheck_callbacks default_callbacks;
|
extern const t_hts_htmlcheck_callbacks default_callbacks;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Internal helpers for building an HTTP request/response into the engine's
|
||||||
|
scratch buffer (opt->state.HTbuff): START resets it, PRINT appends; the
|
||||||
|
PANIC variant records a fatal error message. */
|
||||||
#define HT_PRINT(A) strcatbuff(opt->state.HTbuff,A);
|
#define HT_PRINT(A) strcatbuff(opt->state.HTbuff,A);
|
||||||
|
|
||||||
#define HT_REQUEST_START opt->state.HTbuff[0]='\0';
|
#define HT_REQUEST_START opt->state.HTbuff[0]='\0';
|
||||||
|
|
||||||
#define HT_REQUEST_END
|
#define HT_REQUEST_END
|
||||||
#define HTT_REQUEST_START opt->state.HTbuff[0]='\0';
|
#define HTT_REQUEST_START opt->state.HTbuff[0]='\0';
|
||||||
|
|
||||||
#define HTT_REQUEST_END
|
#define HTT_REQUEST_END
|
||||||
#define HTS_REQUEST_START opt->state.HTbuff[0]='\0';
|
#define HTS_REQUEST_START opt->state.HTbuff[0]='\0';
|
||||||
|
|
||||||
#define HTS_REQUEST_END
|
#define HTS_REQUEST_END
|
||||||
#define HTS_PANIC_PRINTF(S) strcpybuff(opt->state._hts_errmsg,S);
|
#define HTS_PANIC_PRINTF(S) strcpybuff(opt->state._hts_errmsg,S);
|
||||||
|
|
||||||
|
|||||||
105
src/htsglobal.h
105
src/htsglobal.h
@@ -30,12 +30,19 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
// Fichier réunissant l'ensemble des defines
|
/** @file htsglobal.h
|
||||||
|
* Foundational portability layer included by every other public header:
|
||||||
|
* version strings, platform/feature switches, the HTSEXT_API export marker,
|
||||||
|
* the integer/time/socket typedefs (LLint, TStamp, INTsys, T_SOC), printf
|
||||||
|
* format helpers, and the file-access mode constants. */
|
||||||
|
|
||||||
#ifndef HTTRACK_GLOBAL_DEFH
|
#ifndef HTTRACK_GLOBAL_DEFH
|
||||||
#define HTTRACK_GLOBAL_DEFH
|
#define HTTRACK_GLOBAL_DEFH
|
||||||
|
|
||||||
// Version (also check external version information)
|
/* Package version strings (the library ABI version is VERSION_INFO in
|
||||||
|
configure.ac, decoupled from these). VERSION is the display form, VERSIONID
|
||||||
|
the dotted numeric form, AFF_VERSION the short form shown in footers,
|
||||||
|
LIB_VERSION the data/cache format generation. */
|
||||||
#define HTTRACK_VERSION "3.49-8"
|
#define HTTRACK_VERSION "3.49-8"
|
||||||
#define HTTRACK_VERSIONID "3.49.8"
|
#define HTTRACK_VERSIONID "3.49.8"
|
||||||
#define HTTRACK_AFF_VERSION "3.x"
|
#define HTTRACK_AFF_VERSION "3.x"
|
||||||
@@ -46,7 +53,7 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Définition plate-forme
|
// Platform detection (sizes, feature macros)
|
||||||
#include "htsconfig.h"
|
#include "htsconfig.h"
|
||||||
|
|
||||||
// WIN32 types
|
// WIN32 types
|
||||||
@@ -57,11 +64,17 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* GCC extension */
|
/* Compiler-attribute helpers, no-ops where unsupported.
|
||||||
|
HTS_UNUSED: suppress unused-symbol warnings. HTS_STATIC: an unused-safe
|
||||||
|
static. HTS_PRINTF_FUN(fmt, arg): mark a printf-like function so the
|
||||||
|
compiler type-checks the format string at argument index fmt against the
|
||||||
|
varargs starting at arg. */
|
||||||
#ifndef HTS_UNUSED
|
#ifndef HTS_UNUSED
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#define HTS_UNUSED __attribute__ ((unused))
|
#define HTS_UNUSED __attribute__ ((unused))
|
||||||
|
|
||||||
#define HTS_STATIC static __attribute__ ((unused))
|
#define HTS_STATIC static __attribute__ ((unused))
|
||||||
|
|
||||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
||||||
#else
|
#else
|
||||||
#define HTS_UNUSED
|
#define HTS_UNUSED
|
||||||
@@ -86,6 +99,7 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#endif
|
#endif
|
||||||
#ifndef S_ISREG
|
#ifndef S_ISREG
|
||||||
#define S_ISREG(m) ((m) & _S_IFREG)
|
#define S_ISREG(m) ((m) & _S_IFREG)
|
||||||
|
|
||||||
#define S_ISDIR(m) ((m) & _S_IFDIR)
|
#define S_ISDIR(m) ((m) & _S_IFDIR)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -132,7 +146,7 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#define BIGSTK
|
#define BIGSTK
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// compatibilité DOS
|
// DOS-style 8.3 filenames? 1 on Windows, 0 elsewhere
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define HTS_DOSNAME 1
|
#define HTS_DOSNAME 1
|
||||||
#else
|
#else
|
||||||
@@ -168,7 +182,10 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#define __cdecl
|
#define __cdecl
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* rc file */
|
/* Install paths and config-file names. HTTRACKRC is the per-user rc filename,
|
||||||
|
HTTRACKCNF the system-wide config, HTTRACKDIR the shared data directory; the
|
||||||
|
ETC/BIN/LIB/PREFIX paths are the defaults these derive from when not set by
|
||||||
|
the build. */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define HTS_HTTRACKRC "httrackrc"
|
#define HTS_HTTRACKRC "httrackrc"
|
||||||
#else
|
#else
|
||||||
@@ -197,9 +214,11 @@ Please visit our Website: http://www.httrack.com
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Max URL length */
|
/* Maximum URL length, in bytes. Callers size URL/path string buffers to this;
|
||||||
|
anything longer is rejected. */
|
||||||
#define HTS_URLMAXSIZE 1024
|
#define HTS_URLMAXSIZE 1024
|
||||||
/* Max command-line length (>=HTS_URLMAXSIZE*2) */
|
/* Maximum command-line argument length, in bytes (kept >= HTS_URLMAXSIZE*2 so
|
||||||
|
an addr+path pair always fits). */
|
||||||
#define HTS_CDLMAXSIZE 1024
|
#define HTS_CDLMAXSIZE 1024
|
||||||
/* MIME-type buffer contract (htsblk.contenttype/charset/contentencoding); holds
|
/* MIME-type buffer contract (htsblk.contenttype/charset/contentencoding); holds
|
||||||
the longest registered MIME type, the Office OOXML ones reaching 73 chars */
|
the longest registered MIME type, the Office OOXML ones reaching 73 chars */
|
||||||
@@ -219,24 +238,30 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#define LF "\x0a"
|
#define LF "\x0a"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* équivaut à "paramètre vide", par exemple -F (none) */
|
/* Sentinel meaning "empty parameter", e.g. -F (none) */
|
||||||
#define HTS_NOPARAM "(none)"
|
#define HTS_NOPARAM "(none)"
|
||||||
#define HTS_NOPARAM2 "\"(none)\""
|
#define HTS_NOPARAM2 "\"(none)\""
|
||||||
|
|
||||||
/* maximum et minimum */
|
/* Larger/smaller of two values. Macros: arguments are evaluated twice. */
|
||||||
#define maximum(A,B) ( (A) > (B) ? (A) : (B) )
|
#define maximum(A,B) ( (A) > (B) ? (A) : (B) )
|
||||||
|
|
||||||
#define minimum(A,B) ( (A) < (B) ? (A) : (B) )
|
#define minimum(A,B) ( (A) < (B) ? (A) : (B) )
|
||||||
|
|
||||||
/* chaine no empty ? (and not null) */
|
/* True when A is a non-NULL, non-empty string. */
|
||||||
#define strnotempty(A) (((A) != NULL && (A)[0] != '\0'))
|
#define strnotempty(A) (((A) != NULL && (A)[0] != '\0'))
|
||||||
|
|
||||||
/* optimisation inline si possible */
|
/* 'inline' where the dialect supports it (C++), nothing in plain C. */
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
#define HTS_INLINE inline
|
#define HTS_INLINE inline
|
||||||
#else
|
#else
|
||||||
#define HTS_INLINE
|
#define HTS_INLINE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Marks a symbol as part of the library's public ABI: exported from
|
||||||
|
libhttrack and visible to callers. Symbols without it stay internal (hidden
|
||||||
|
under -fvisibility=hidden). Expands to dllexport when building the library,
|
||||||
|
dllimport when consuming it, and the visibility("default") attribute on
|
||||||
|
ELF. */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#ifdef LIBHTTRACK_EXPORTS
|
#ifdef LIBHTTRACK_EXPORTS
|
||||||
#define HTSEXT_API __declspec(dllexport)
|
#define HTSEXT_API __declspec(dllexport)
|
||||||
@@ -247,6 +272,7 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* See <http://gcc.gnu.org/wiki/Visibility> */
|
/* See <http://gcc.gnu.org/wiki/Visibility> */
|
||||||
#if ( ( defined(__GNUC__) && ( __GNUC__ >= 4 ) ) \
|
#if ( ( defined(__GNUC__) && ( __GNUC__ >= 4 ) ) \
|
||||||
|| ( defined(HAVE_VISIBILITY) && HAVE_VISIBILITY ) )
|
|| ( defined(HAVE_VISIBILITY) && HAVE_VISIBILITY ) )
|
||||||
|
|
||||||
#define HTSEXT_API __attribute__ ((visibility ("default")))
|
#define HTSEXT_API __attribute__ ((visibility ("default")))
|
||||||
#else
|
#else
|
||||||
#define HTSEXT_API
|
#define HTSEXT_API
|
||||||
@@ -260,10 +286,13 @@ Please visit our Website: http://www.httrack.com
|
|||||||
*/
|
*/
|
||||||
#if defined(__GNUC__) && \
|
#if defined(__GNUC__) && \
|
||||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
|
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
|
||||||
|
|
||||||
#define HTS_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
#define HTS_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
|
|
||||||
#define HTS_DEPRECATED(msg) __attribute__((deprecated))
|
#define HTS_DEPRECATED(msg) __attribute__((deprecated))
|
||||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||||
|
|
||||||
#define HTS_DEPRECATED(msg) __declspec(deprecated(msg))
|
#define HTS_DEPRECATED(msg) __declspec(deprecated(msg))
|
||||||
#else
|
#else
|
||||||
#define HTS_DEPRECATED(msg)
|
#define HTS_DEPRECATED(msg)
|
||||||
@@ -277,12 +306,16 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// long long int? (or int)
|
/* Wide integer types, chosen per platform.
|
||||||
// (and int cast for system functions like malloc() )
|
LLint: signed 64-bit counter for byte counts and large sizes (falls back to
|
||||||
|
plain int where 64-bit is unavailable).
|
||||||
|
TStamp: timestamp/duration in the same width (a double in the no-64-bit
|
||||||
|
fallback).
|
||||||
|
LLintP: the printf conversion for an LLint. */
|
||||||
#if HTS_LONGLONG
|
#if HTS_LONGLONG
|
||||||
#ifdef LLINT_FORMAT
|
#ifdef LLINT_FORMAT
|
||||||
typedef LLINT_TYPE LLint;
|
typedef LLINT_TYPE LLint;
|
||||||
|
|
||||||
typedef LLINT_TYPE TStamp;
|
typedef LLINT_TYPE TStamp;
|
||||||
|
|
||||||
#define LLintP LLINT_FORMAT
|
#define LLintP LLINT_FORMAT
|
||||||
@@ -290,17 +323,21 @@ typedef LLINT_TYPE TStamp;
|
|||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
typedef __int64 LLint;
|
typedef __int64 LLint;
|
||||||
|
|
||||||
typedef __int64 TStamp;
|
typedef __int64 TStamp;
|
||||||
|
|
||||||
#define LLintP "%I64d"
|
#define LLintP "%I64d"
|
||||||
#elif (defined(_LP64) || defined(__x86_64__) \
|
#elif (defined(_LP64) || defined(__x86_64__) \
|
||||||
|| defined(__powerpc64__) || defined(__64BIT__))
|
|| defined(__powerpc64__) || defined(__64BIT__))
|
||||||
|
|
||||||
typedef long int LLint;
|
typedef long int LLint;
|
||||||
|
|
||||||
typedef long int TStamp;
|
typedef long int TStamp;
|
||||||
|
|
||||||
#define LLintP "%ld"
|
#define LLintP "%ld"
|
||||||
#else
|
#else
|
||||||
typedef long long int LLint;
|
typedef long long int LLint;
|
||||||
|
|
||||||
typedef long long int TStamp;
|
typedef long long int TStamp;
|
||||||
|
|
||||||
#define LLintP "%lld"
|
#define LLintP "%lld"
|
||||||
@@ -315,6 +352,9 @@ typedef int LLint;
|
|||||||
typedef double TStamp;
|
typedef double TStamp;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Integer type for file offsets/sizes passed to the C library. Widens to
|
||||||
|
LLint (with HTS_FSEEKO for fseeko/ftello) under large-file support, plain
|
||||||
|
int otherwise; INTsysP is its printf conversion. */
|
||||||
#ifdef LFS_FLAG
|
#ifdef LFS_FLAG
|
||||||
typedef LLint INTsys;
|
typedef LLint INTsys;
|
||||||
|
|
||||||
@@ -328,8 +368,11 @@ typedef int INTsys;
|
|||||||
#define INTsysP "%d"
|
#define INTsysP "%d"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Socket-handle type. An unsigned integer wide enough for a Windows SOCKET;
|
||||||
|
a plain int file descriptor on POSIX. */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#if defined(_WIN64)
|
#if defined(_WIN64)
|
||||||
|
|
||||||
typedef unsigned __int64 T_SOC;
|
typedef unsigned __int64 T_SOC;
|
||||||
#else
|
#else
|
||||||
typedef unsigned __int32 T_SOC;
|
typedef unsigned __int32 T_SOC;
|
||||||
@@ -338,7 +381,7 @@ typedef unsigned __int32 T_SOC;
|
|||||||
typedef int T_SOC;
|
typedef int T_SOC;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* IPV4, IPV6 and various unified structures */
|
/* Buffer size for a printed network address (IPv4 or IPv6, NUL included). */
|
||||||
#define HTS_MAXADDRLEN 64
|
#define HTS_MAXADDRLEN 64
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@@ -346,17 +389,22 @@ typedef int T_SOC;
|
|||||||
#define __cdecl
|
#define __cdecl
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* mode pour mkdir ET chmod (accès aux fichiers) */
|
/* Permission bits for created folders and files (mkdir and chmod).
|
||||||
|
PROTECT_FOLDER is owner-only. With HTS_ACCESS set (the default) the ACCESS_
|
||||||
|
modes also grant group/other read; otherwise they stay owner-only. */
|
||||||
#define HTS_PROTECT_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
|
#define HTS_PROTECT_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
|
||||||
|
|
||||||
#if HTS_ACCESS
|
#if HTS_ACCESS
|
||||||
#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)
|
#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)
|
||||||
|
|
||||||
#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
|
#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
|
||||||
#else
|
#else
|
||||||
#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR)
|
#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR)
|
||||||
|
|
||||||
#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
|
#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* vérifier la déclaration des variables préprocesseur */
|
/* Sanity-check that the required preprocessor switches are defined */
|
||||||
#ifndef HTS_DOSNAME
|
#ifndef HTS_DOSNAME
|
||||||
#error | HTS_DOSNAME Has not been defined.
|
#error | HTS_DOSNAME Has not been defined.
|
||||||
#error | Set it to 1 if you are under DOS, 0 under Unix.
|
#error | Set it to 1 if you are under DOS, 0 under Unix.
|
||||||
@@ -366,7 +414,7 @@ typedef int T_SOC;
|
|||||||
#error
|
#error
|
||||||
#endif
|
#endif
|
||||||
#ifndef HTS_ACCESS
|
#ifndef HTS_ACCESS
|
||||||
/* Par défaut, accès à tous les utilisateurs */
|
/* Default: files readable by all users */
|
||||||
#define HTS_ACCESS 1
|
#define HTS_ACCESS 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -375,13 +423,13 @@ typedef int T_SOC;
|
|||||||
|
|
||||||
/* HTSLib */
|
/* HTSLib */
|
||||||
|
|
||||||
// Cache DNS, accélère les résolution d'adresses
|
// Enable the DNS cache (speeds up address resolution)
|
||||||
#define HTS_DNSCACHE 1
|
#define HTS_DNSCACHE 1
|
||||||
|
|
||||||
// ID d'une pseudo-socket locale pour les file://
|
// Pseudo-socket id standing in for a local file:// transfer
|
||||||
#define LOCAL_SOCKET_ID -2
|
#define LOCAL_SOCKET_ID -2
|
||||||
|
|
||||||
// taille de chaque buffer (10 sockets 650 ko)
|
// Per-connection transfer buffer size, in bytes
|
||||||
#define TAILLE_BUFFER 65536
|
#define TAILLE_BUFFER 65536
|
||||||
|
|
||||||
#ifdef HTS_DO_NOT_USE_PTHREAD
|
#ifdef HTS_DO_NOT_USE_PTHREAD
|
||||||
@@ -405,6 +453,7 @@ struct mlink {
|
|||||||
int id;
|
int id;
|
||||||
struct mlink *next;
|
struct mlink *next;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const t_htsboundary htsboundary = 0xDEADBEEF;
|
static const t_htsboundary htsboundary = 0xDEADBEEF;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
@@ -418,7 +467,7 @@ static const t_htsboundary htsboundary = 0xDEADBEEF;
|
|||||||
/* Debugging */
|
/* Debugging */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
// débuggage types
|
// type-detection debug
|
||||||
#define DEBUG_SHOWTYPES 0
|
#define DEBUG_SHOWTYPES 0
|
||||||
// backing debug
|
// backing debug
|
||||||
#define BDEBUG 0
|
#define BDEBUG 0
|
||||||
@@ -436,28 +485,28 @@ static const t_htsboundary htsboundary = 0xDEADBEEF;
|
|||||||
#define DEBUG_ROBOTS 0
|
#define DEBUG_ROBOTS 0
|
||||||
// debug hash
|
// debug hash
|
||||||
#define DEBUG_HASH 0
|
#define DEBUG_HASH 0
|
||||||
// Vérification d'intégrité
|
// integrity-check debug
|
||||||
#define DEBUG_CHECKINT 0
|
#define DEBUG_CHECKINT 0
|
||||||
// nbr sockets debug
|
// nbr sockets debug
|
||||||
#define NSDEBUG 0
|
#define NSDEBUG 0
|
||||||
|
|
||||||
// débuggage HTSLib
|
// HTSLib debug
|
||||||
#define HDEBUG 0
|
#define HDEBUG 0
|
||||||
// surveillance de la connexion
|
// surveillance de la connexion
|
||||||
#define CNXDEBUG 0
|
#define CNXDEBUG 0
|
||||||
// debuggage cookies
|
// debuggage cookies
|
||||||
#define DEBUG_COOK 0
|
#define DEBUG_COOK 0
|
||||||
// débuggage hard..
|
// heavy/low-level debug
|
||||||
#define HTS_WIDE_DEBUG 0
|
#define HTS_WIDE_DEBUG 0
|
||||||
// debuggage deletehttp et cie
|
// debuggage deletehttp et cie
|
||||||
#define HTS_DEBUG_CLOSESOCK 0
|
#define HTS_DEBUG_CLOSESOCK 0
|
||||||
// debug tracage mémoire
|
// memory-tracing debug
|
||||||
#define MEMDEBUG 0
|
#define MEMDEBUG 0
|
||||||
|
|
||||||
// htsmain
|
// htsmain
|
||||||
#define DEBUG_STEPS 0
|
#define DEBUG_STEPS 0
|
||||||
|
|
||||||
// Débuggage de contrôle
|
// Derived debug control switches
|
||||||
#if HTS_DEBUG_CLOSESOCK
|
#if HTS_DEBUG_CLOSESOCK
|
||||||
#define _HTS_WIDE 1
|
#define _HTS_WIDE 1
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -31,10 +31,15 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** @file htsmodules.h
|
||||||
|
Loadable-parser (external module) interface. The engine hands a downloaded
|
||||||
|
object to a module via htsmoduleStruct; the module reports discovered links
|
||||||
|
back through the addLink callback. */
|
||||||
|
|
||||||
#ifndef HTS_MODULES
|
#ifndef HTS_MODULES
|
||||||
#define HTS_MODULES
|
#define HTS_MODULES
|
||||||
|
|
||||||
/* Forware definitions */
|
/* Forward definitions */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_lien_url
|
#ifndef HTS_DEF_FWSTRUCT_lien_url
|
||||||
#define HTS_DEF_FWSTRUCT_lien_url
|
#define HTS_DEF_FWSTRUCT_lien_url
|
||||||
typedef struct lien_url lien_url;
|
typedef struct lien_url lien_url;
|
||||||
@@ -56,18 +61,18 @@ typedef struct cache_back cache_back;
|
|||||||
typedef struct hash_struct hash_struct;
|
typedef struct hash_struct hash_struct;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Function type to add links inside the module
|
/** Callback a module invokes to report a discovered link.
|
||||||
link : link to add (absolute or relative)
|
str: the per-object context the module was called with.
|
||||||
str : structure defined below
|
link: link to add (absolute or relative); the engine copies it.
|
||||||
Returns 1 if the link was added, 0 if not
|
Returns 1 if the engine accepted/queued the link, 0 if it was rejected. */
|
||||||
*/
|
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsmoduleStruct
|
#ifndef HTS_DEF_FWSTRUCT_htsmoduleStruct
|
||||||
#define HTS_DEF_FWSTRUCT_htsmoduleStruct
|
#define HTS_DEF_FWSTRUCT_htsmoduleStruct
|
||||||
typedef struct htsmoduleStruct htsmoduleStruct;
|
typedef struct htsmoduleStruct htsmoduleStruct;
|
||||||
#endif
|
#endif
|
||||||
typedef int (*t_htsAddLink) (htsmoduleStruct * str, char *link);
|
typedef int (*t_htsAddLink) (htsmoduleStruct * str, char *link);
|
||||||
|
|
||||||
/* Structure passed to the module */
|
/** Per-object context passed to a parser module for one downloaded file.
|
||||||
|
Field access classes are noted; engine owns all pointers unless stated. */
|
||||||
struct htsmoduleStruct {
|
struct htsmoduleStruct {
|
||||||
/* Read-only elements */
|
/* Read-only elements */
|
||||||
const char *filename; /* filename (C:\My Web Sites\...) */
|
const char *filename; /* filename (C:\My Web Sites\...) */
|
||||||
@@ -119,21 +124,39 @@ struct htsmoduleStruct {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Used to wrap module initialization */
|
/** Module lifecycle hooks. Init/PlugInit return 1 on success, 0 on failure;
|
||||||
/* return 1 if init was ok */
|
Exit returns its own status (ignored by the engine). */
|
||||||
typedef int (*t_htsWrapperInit) (char *fn, char *args);
|
typedef int (*t_htsWrapperInit) (char *fn, char *args);
|
||||||
|
|
||||||
typedef int (*t_htsWrapperExit) (void);
|
typedef int (*t_htsWrapperExit) (void);
|
||||||
|
|
||||||
typedef int (*t_htsWrapperPlugInit) (char *args);
|
typedef int (*t_htsWrapperPlugInit) (char *args);
|
||||||
|
|
||||||
/* Library internal definictions */
|
/* Library internal definictions */
|
||||||
#ifdef HTS_INTERNAL_BYTECODE
|
#ifdef HTS_INTERNAL_BYTECODE
|
||||||
|
|
||||||
|
/** Capabilities string ("-noV6", "-nossl", ...) followed by "+name" for each
|
||||||
|
loaded module. Returned pointer aliases opt->state.HTbuff; do not free, and
|
||||||
|
it is overwritten by the next call. */
|
||||||
HTSEXT_API const char *hts_get_version_info(httrackp * opt);
|
HTSEXT_API const char *hts_get_version_info(httrackp * opt);
|
||||||
|
|
||||||
|
/** Static capabilities string set by htspe_init(); valid for the process
|
||||||
|
lifetime, do not free. */
|
||||||
HTSEXT_API const char *hts_is_available(void);
|
HTSEXT_API const char *hts_is_available(void);
|
||||||
|
|
||||||
|
/** Initialize the module subsystem (idempotent): builds the capabilities
|
||||||
|
string and, on Windows, hardens the DLL search path. */
|
||||||
extern void htspe_init(void);
|
extern void htspe_init(void);
|
||||||
|
|
||||||
|
/** Tear-down counterpart of htspe_init(); currently a no-op. */
|
||||||
extern void htspe_uninit(void);
|
extern void htspe_uninit(void);
|
||||||
|
|
||||||
|
/** Run the external-parser callbacks for the object described by str.
|
||||||
|
Returns the parse callback result (>=0) on a handled object, or -1 if no
|
||||||
|
module claimed it or its wrapper_name is blacklisted. */
|
||||||
extern int hts_parse_externals(htsmoduleStruct * str);
|
extern int hts_parse_externals(htsmoduleStruct * str);
|
||||||
|
|
||||||
/*extern int swf_is_available;*/
|
/** Nonzero if IPv6 support was compiled in (== HTS_INET6). */
|
||||||
extern int V6_is_available;
|
extern int V6_is_available;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
89
src/htsnet.h
89
src/htsnet.h
@@ -32,6 +32,11 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** @file htsnet.h
|
||||||
|
Socket/connection layer. Provides SOCaddr, an opaque IPv4/IPv6
|
||||||
|
socket-address wrapper, plus accessor macros so callers never branch on
|
||||||
|
address family. Builds on htsbasenet.h. */
|
||||||
|
|
||||||
#ifndef HTS_DEFNETH
|
#ifndef HTS_DEFNETH
|
||||||
#define HTS_DEFNETH
|
#define HTS_DEFNETH
|
||||||
|
|
||||||
@@ -43,32 +48,32 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// pour read
|
// for read
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
// pour FindFirstFile
|
// for FindFirstFile
|
||||||
#include <winbase.h>
|
#include <winbase.h>
|
||||||
typedef USHORT in_port_t;
|
typedef USHORT in_port_t;
|
||||||
|
|
||||||
typedef ADDRESS_FAMILY sa_family_t;
|
typedef ADDRESS_FAMILY sa_family_t;
|
||||||
#else
|
#else
|
||||||
//typedef int T_SOC;
|
|
||||||
#define INVALID_SOCKET -1
|
#define INVALID_SOCKET -1
|
||||||
#include <netdb.h>
|
#include <netdb.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
/* Force for sun env. */
|
/* Force BSD_COMP for Sun environments. */
|
||||||
#ifndef BSD_COMP
|
#ifndef BSD_COMP
|
||||||
#define BSD_COMP
|
#define BSD_COMP
|
||||||
#endif
|
#endif
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
/* gethostname & co */
|
/* gethostname & co */
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
/* inet_addr */
|
/* inet_addr */
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
// pas la peine normalement..
|
/* normally not needed; provide in_addr_t where the platform lacks it */
|
||||||
#ifndef HTS_DO_NOT_REDEFINE_in_addr_t
|
#ifndef HTS_DO_NOT_REDEFINE_in_addr_t
|
||||||
typedef unsigned long in_addr_t;
|
typedef unsigned long in_addr_t;
|
||||||
#endif
|
#endif
|
||||||
@@ -78,14 +83,16 @@ typedef unsigned long in_addr_t;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Ipv4 structures */
|
/** Raw IP address type: in6_addr when IPv6 is enabled, else in_addr. */
|
||||||
#if HTS_INET6 != 0
|
#if HTS_INET6 != 0
|
||||||
typedef struct in6_addr INaddr;
|
typedef struct in6_addr INaddr;
|
||||||
#else
|
#else
|
||||||
typedef struct in_addr INaddr;
|
typedef struct in_addr INaddr;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This should handle all cases */
|
/** Opaque socket address holding either an IPv4 or IPv6 endpoint. Use the
|
||||||
|
SOCaddr_* accessors rather than touching m_addr; sa_family selects the
|
||||||
|
active union member. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_SOCaddr
|
#ifndef HTS_DEF_FWSTRUCT_SOCaddr
|
||||||
#define HTS_DEF_FWSTRUCT_SOCaddr
|
#define HTS_DEF_FWSTRUCT_SOCaddr
|
||||||
typedef struct SOCaddr SOCaddr;
|
typedef struct SOCaddr SOCaddr;
|
||||||
@@ -103,6 +110,8 @@ struct SOCaddr {
|
|||||||
} m_addr;
|
} m_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Pointer to the port field (network byte order) for the active family.
|
||||||
|
Asserts on NULL or an unset/unknown family. */
|
||||||
static HTS_INLINE HTS_UNUSED in_port_t* SOCaddr_sinport_(SOCaddr *const addr,
|
static HTS_INLINE HTS_UNUSED in_port_t* SOCaddr_sinport_(SOCaddr *const addr,
|
||||||
const char *file, const int line) {
|
const char *file, const int line) {
|
||||||
assertf_(addr != NULL, file, line);
|
assertf_(addr != NULL, file, line);
|
||||||
@@ -122,6 +131,8 @@ static HTS_INLINE HTS_UNUSED in_port_t* SOCaddr_sinport_(SOCaddr *const addr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Length of the active sockaddr (sockaddr_in or sockaddr_in6), or 0 if the
|
||||||
|
family is unset/unknown. The 0 case doubles as the "not valid" test. */
|
||||||
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_size_(const SOCaddr*const addr,
|
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_size_(const SOCaddr*const addr,
|
||||||
const char *file, const int line) {
|
const char *file, const int line) {
|
||||||
assertf_(addr != NULL, file, line);
|
assertf_(addr != NULL, file, line);
|
||||||
@@ -140,33 +151,52 @@ static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_size_(const SOCaddr*const addr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Reset to the unset state (family AF_UNSPEC), making the address invalid. */
|
||||||
static HTS_INLINE HTS_UNUSED void SOCaddr_clear_(SOCaddr*const addr,
|
static HTS_INLINE HTS_UNUSED void SOCaddr_clear_(SOCaddr*const addr,
|
||||||
const char *file, const int line) {
|
const char *file, const int line) {
|
||||||
assertf_(addr != NULL, file, line);
|
assertf_(addr != NULL, file, line);
|
||||||
addr->m_addr.sa.sa_family = AF_UNSPEC;
|
addr->m_addr.sa.sa_family = AF_UNSPEC;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ipv4/6 structure members */
|
/* SOCaddr accessors; server is an lvalue SOCaddr, not a pointer. */
|
||||||
#define SOCaddr_sinfamily(server) ((server).m_addr.sa.sa_family)
|
#define SOCaddr_sinfamily(server) \
|
||||||
#define SOCaddr_sinport(server) (*SOCaddr_sinport_(&(server), __FILE__, __LINE__))
|
((server).m_addr.sa.sa_family) /* AF_INET / AF_INET6 */
|
||||||
#define SOCaddr_size(server) (SOCaddr_size_(&(server), __FILE__, __LINE__))
|
|
||||||
#define SOCaddr_is_valid(server) (SOCaddr_size_(&(server), __FILE__, __LINE__) != 0 )
|
|
||||||
#define SOCaddr_clear(server) SOCaddr_clear_(&(server), __FILE__, __LINE__)
|
|
||||||
#define SOCaddr_sockaddr(server) ((server).m_addr.sa)
|
|
||||||
#define SOCaddr_capacity(server) sizeof((server).m_addr)
|
|
||||||
|
|
||||||
/* AF_xx */
|
#define SOCaddr_sinport(server) \
|
||||||
|
(*SOCaddr_sinport_(&(server), __FILE__, \
|
||||||
|
__LINE__)) /* port lvalue (network order) */
|
||||||
|
|
||||||
|
#define SOCaddr_size(server) \
|
||||||
|
(SOCaddr_size_(&(server), __FILE__, __LINE__)) /* active sockaddr length */
|
||||||
|
|
||||||
|
#define SOCaddr_is_valid(server) \
|
||||||
|
(SOCaddr_size_(&(server), __FILE__, __LINE__) != \
|
||||||
|
0) /* nonzero if family is set */
|
||||||
|
|
||||||
|
#define SOCaddr_clear(server) SOCaddr_clear_(&(server), __FILE__, __LINE__)
|
||||||
|
|
||||||
|
#define SOCaddr_sockaddr(server) \
|
||||||
|
((server).m_addr.sa) /* generic struct sockaddr view */
|
||||||
|
|
||||||
|
#define SOCaddr_capacity(server) \
|
||||||
|
sizeof((server).m_addr) /* full union size, for recvfrom() etc. */
|
||||||
|
|
||||||
|
/** Address family to bind/listen with: AF_INET6 when IPv6 is enabled (dual
|
||||||
|
stack), else AF_INET. */
|
||||||
#if HTS_INET6 != 0
|
#if HTS_INET6 != 0
|
||||||
#define AFinet AF_INET6
|
#define AFinet AF_INET6
|
||||||
#else
|
#else
|
||||||
#define AFinet AF_INET
|
#define AFinet AF_INET
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Set port to sockaddr structure */
|
/** Set the port (host-order argument, stored network-order) on the active
|
||||||
|
* family. */
|
||||||
#define SOCaddr_initport(server, port) do { \
|
#define SOCaddr_initport(server, port) do { \
|
||||||
SOCaddr_sinport(server) = htons((in_port_t) (port)); \
|
SOCaddr_sinport(server) = htons((in_port_t) (port)); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/** Initialize as an all-zero IPv4 wildcard (INADDR_ANY) address; returns its
|
||||||
|
sockaddr length. */
|
||||||
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_initany_(SOCaddr*const addr,
|
static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_initany_(SOCaddr*const addr,
|
||||||
const char *file, const int line) {
|
const char *file, const int line) {
|
||||||
assertf_(addr != NULL, file, line);
|
assertf_(addr != NULL, file, line);
|
||||||
@@ -175,13 +205,15 @@ static HTS_INLINE HTS_UNUSED socklen_t SOCaddr_initany_(SOCaddr*const addr,
|
|||||||
return SOCaddr_size_(addr, file, line);
|
return SOCaddr_size_(addr, file, line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Initialize server as an IPv4 wildcard (INADDR_ANY) address. */
|
||||||
#define SOCaddr_initany(server) do { \
|
#define SOCaddr_initany(server) do { \
|
||||||
SOCaddr_initany_(&(server), __FILE__, __LINE__); \
|
SOCaddr_initany_(&(server), __FILE__, __LINE__); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/*
|
/** Populate server from data. data_size selects the source form: a full
|
||||||
Copy sockaddr_in/sockaddr_in6/raw IPv4/raw IPv6 to our opaque SOCaddr
|
sockaddr_in / sockaddr_in6, or a raw 4-byte (IPv4) / 16-byte (IPv6) address
|
||||||
*/
|
with port zeroed. Any other size leaves an AF_INET shell. Returns the
|
||||||
|
resulting sockaddr length. */
|
||||||
static HTS_UNUSED socklen_t SOCaddr_copyaddr_(SOCaddr*const server,
|
static HTS_UNUSED socklen_t SOCaddr_copyaddr_(SOCaddr*const server,
|
||||||
const void *data, const size_t data_size,
|
const void *data, const size_t data_size,
|
||||||
const char *file, const int line) {
|
const char *file, const int line) {
|
||||||
@@ -214,20 +246,24 @@ static HTS_UNUSED socklen_t SOCaddr_copyaddr_(SOCaddr*const server,
|
|||||||
return SOCaddr_size_(server, file, line);
|
return SOCaddr_size_(server, file, line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Copy hpaddr (length hpsize) into server, writing the result length into the
|
||||||
|
lvalue server_len (int). See SOCaddr_copyaddr_ for accepted forms. */
|
||||||
#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
|
#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
|
||||||
server_len = (int) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
server_len = (int) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/** Like SOCaddr_copyaddr but discards the result length. */
|
||||||
#define SOCaddr_copyaddr2(server, hpaddr, hpsize) do { \
|
#define SOCaddr_copyaddr2(server, hpaddr, hpsize) do { \
|
||||||
(void) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
(void) SOCaddr_copyaddr_(&(server), hpaddr, hpsize, __FILE__, __LINE__); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/** Copy one SOCaddr (src) into another (dest), preserving family and port. */
|
||||||
#define SOCaddr_copy_SOCaddr(dest, src) do { \
|
#define SOCaddr_copy_SOCaddr(dest, src) do { \
|
||||||
SOCaddr_copyaddr_(&(dest), &(src).m_addr.sa, SOCaddr_size(src), __FILE__, __LINE__); \
|
SOCaddr_copyaddr_(&(dest), &(src).m_addr.sa, SOCaddr_size(src), __FILE__, __LINE__); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/* Get dotted address */
|
/** Write the numeric (dotted/colon) host of ss into namebuf (capacity
|
||||||
|
namebuflen), scope id stripped. On failure namebuf becomes "". */
|
||||||
static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
||||||
SOCaddr *const ss,
|
SOCaddr *const ss,
|
||||||
const char *file, const int line) {
|
const char *file, const int line) {
|
||||||
@@ -248,13 +284,14 @@ static HTS_UNUSED void SOCaddr_inetntoa_(char *namebuf, size_t namebuflen,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Numeric host of ss into namebuf (capacity namebuflen); "" on failure. */
|
||||||
#define SOCaddr_inetntoa(namebuf, namebuflen, ss) \
|
#define SOCaddr_inetntoa(namebuf, namebuflen, ss) \
|
||||||
SOCaddr_inetntoa_(namebuf, namebuflen, &(ss), __FILE__, __LINE__)
|
SOCaddr_inetntoa_(namebuf, namebuflen, &(ss), __FILE__, __LINE__)
|
||||||
|
|
||||||
/* Get protocol ID */
|
/** Single-char family tag: '1' for IPv4, '2' otherwise (used in the cache). */
|
||||||
#define SOCaddr_getproto(ss) ( SOCaddr_size(ss) == sizeof(struct sockaddr_in) ? '1' : '2')
|
#define SOCaddr_getproto(ss) ( SOCaddr_size(ss) == sizeof(struct sockaddr_in) ? '1' : '2')
|
||||||
|
|
||||||
/* Socket length type */
|
/** Length type for socket APIs (getsockname, accept, ...). */
|
||||||
typedef socklen_t SOClen;
|
typedef socklen_t SOClen;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
533
src/htsopt.h
533
src/htsopt.h
@@ -81,38 +81,41 @@ struct String {
|
|||||||
|
|
||||||
/* Defines */
|
/* Defines */
|
||||||
#define CATBUFF_SIZE (STRING_SIZE*2*2)
|
#define CATBUFF_SIZE (STRING_SIZE*2*2)
|
||||||
|
|
||||||
#define STRING_SIZE 2048
|
#define STRING_SIZE 2048
|
||||||
|
|
||||||
/* Proxy structure */
|
/* Proxy configuration. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_t_proxy
|
#ifndef HTS_DEF_FWSTRUCT_t_proxy
|
||||||
#define HTS_DEF_FWSTRUCT_t_proxy
|
#define HTS_DEF_FWSTRUCT_t_proxy
|
||||||
typedef struct t_proxy t_proxy;
|
typedef struct t_proxy t_proxy;
|
||||||
#endif
|
#endif
|
||||||
struct t_proxy {
|
struct t_proxy {
|
||||||
int active;
|
int active; /**< nonzero if a proxy is configured */
|
||||||
String name;
|
String name; /**< proxy host name */
|
||||||
int port;
|
int port; /**< proxy port */
|
||||||
String bindhost; // bind this host
|
String bindhost; /**< local address to bind the outgoing socket to */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Structure utile pour copier en bloc les paramètres */
|
/* Bundle of filter pointers, kept together for bulk copy. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsfilters
|
#ifndef HTS_DEF_FWSTRUCT_htsfilters
|
||||||
#define HTS_DEF_FWSTRUCT_htsfilters
|
#define HTS_DEF_FWSTRUCT_htsfilters
|
||||||
typedef struct htsfilters htsfilters;
|
typedef struct htsfilters htsfilters;
|
||||||
#endif
|
#endif
|
||||||
struct htsfilters {
|
struct htsfilters {
|
||||||
char ***filters;
|
char ***filters; /**< pointer to the +/-pattern filter array */
|
||||||
int *filptr;
|
int *filptr; /**< pointer to the current filter count */
|
||||||
//int* filter_max;
|
// int* filter_max;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* User callbacks chain */
|
/* User callbacks chain */
|
||||||
typedef int (*htscallbacksfncptr) (void);
|
typedef int (*htscallbacksfncptr) (void);
|
||||||
|
|
||||||
typedef struct htscallbacks htscallbacks;
|
typedef struct htscallbacks htscallbacks;
|
||||||
|
|
||||||
struct htscallbacks {
|
struct htscallbacks {
|
||||||
void *moduleHandle;
|
void *moduleHandle; /**< handle of the module that registered the callback */
|
||||||
htscallbacksfncptr exitFnc;
|
htscallbacksfncptr exitFnc; /**< function to run on engine exit */
|
||||||
htscallbacks *next;
|
htscallbacks *next; /**< next entry in the callback chain */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* filenote() internal file structure */
|
/* filenote() internal file structure */
|
||||||
@@ -188,14 +191,14 @@ typedef enum hts_log_type {
|
|||||||
} hts_log_type;
|
} hts_log_type;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Structure état du miroir */
|
/* Mirror cancellation list node. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsoptstatecancel
|
#ifndef HTS_DEF_FWSTRUCT_htsoptstatecancel
|
||||||
#define HTS_DEF_FWSTRUCT_htsoptstatecancel
|
#define HTS_DEF_FWSTRUCT_htsoptstatecancel
|
||||||
typedef struct htsoptstatecancel htsoptstatecancel;
|
typedef struct htsoptstatecancel htsoptstatecancel;
|
||||||
#endif
|
#endif
|
||||||
struct htsoptstatecancel {
|
struct htsoptstatecancel {
|
||||||
char *url;
|
char *url; /**< URL flagged to be cancelled */
|
||||||
htsoptstatecancel *next;
|
htsoptstatecancel *next; /**< next cancellation entry */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Mutexes */
|
/* Mutexes */
|
||||||
@@ -210,48 +213,48 @@ typedef struct htsmutex_s htsmutex_s, *htsmutex;
|
|||||||
typedef struct struct_coucal struct_coucal, *coucal;
|
typedef struct struct_coucal struct_coucal, *coucal;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Structure état du miroir */
|
/* Mirror runtime state (mutable engine state, not user options). */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsoptstate
|
#ifndef HTS_DEF_FWSTRUCT_htsoptstate
|
||||||
#define HTS_DEF_FWSTRUCT_htsoptstate
|
#define HTS_DEF_FWSTRUCT_htsoptstate
|
||||||
typedef struct htsoptstate htsoptstate;
|
typedef struct htsoptstate htsoptstate;
|
||||||
#endif
|
#endif
|
||||||
struct htsoptstate {
|
struct htsoptstate {
|
||||||
htsmutex lock; /* 3.41 */
|
htsmutex lock; /**< guards this state block */
|
||||||
/* */
|
/* */
|
||||||
int stop;
|
int stop; /**< set to request the mirror to stop */
|
||||||
int exit_xh;
|
int exit_xh;
|
||||||
int back_add_stats;
|
int back_add_stats;
|
||||||
/* */
|
/* */
|
||||||
int mimehtml_created;
|
int mimehtml_created; /**< MIME/MHTML output already started */
|
||||||
String mimemid;
|
String mimemid; /**< MIME multipart boundary id */
|
||||||
FILE *mimefp;
|
FILE *mimefp; /**< MIME/MHTML output file */
|
||||||
int delayedId;
|
int delayedId; /**< counter for delayed-type-check ids */
|
||||||
/* */
|
/* */
|
||||||
filenote_strc strc;
|
filenote_strc strc; /**< filenote() listing state */
|
||||||
/* Functions context (avoir thread variables!) */
|
/* Per-call function contexts (thread-local scratch, avoids globals) */
|
||||||
htscallbacks callbacks;
|
htscallbacks callbacks; /**< user callback chain head */
|
||||||
concat_strc concat;
|
concat_strc concat; /**< concat() rotating buffers */
|
||||||
usercommand_strc usercmd;
|
usercommand_strc usercmd; /**< pending user shell command */
|
||||||
fspc_strc fspc;
|
fspc_strc fspc; /**< error/warning/info counters */
|
||||||
char *userhttptype;
|
char *userhttptype;
|
||||||
int verif_backblue_done;
|
int verif_backblue_done; /**< backblue.gif/fade.gif already emitted */
|
||||||
int verif_external_status;
|
int verif_external_status;
|
||||||
t_dnscache *dns_cache;
|
t_dnscache *dns_cache; /**< DNS resolution cache */
|
||||||
int dns_cache_nthreads;
|
int dns_cache_nthreads; /**< number of in-flight DNS resolver threads */
|
||||||
/* HTML parsing state */
|
/* HTML parsing state */
|
||||||
char _hts_errmsg[HTS_CDLMAXSIZE + 256];
|
char _hts_errmsg[HTS_CDLMAXSIZE + 256]; /**< last engine error message */
|
||||||
int _hts_in_html_parsing;
|
int _hts_in_html_parsing;
|
||||||
int _hts_in_html_done;
|
int _hts_in_html_done;
|
||||||
int _hts_in_html_poll;
|
int _hts_in_html_poll;
|
||||||
int _hts_setpause;
|
int _hts_setpause;
|
||||||
int _hts_in_mirror;
|
int _hts_in_mirror; /**< nonzero while a mirror is running */
|
||||||
char **_hts_addurl;
|
char **_hts_addurl; /**< extra URLs to inject at runtime */
|
||||||
int _hts_cancel;
|
int _hts_cancel;
|
||||||
htsoptstatecancel *cancel; /* 3.41 */
|
htsoptstatecancel *cancel; /**< list of URLs flagged for cancellation */
|
||||||
char HTbuff[2048];
|
char HTbuff[2048];
|
||||||
unsigned int debug_state;
|
unsigned int debug_state;
|
||||||
unsigned int tmpnameid; /* 3.41 */
|
unsigned int tmpnameid; /**< counter for temporary file names */
|
||||||
int is_ended; /* 3.48-14 */
|
int is_ended; /**< mirror has finished */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Library handles */
|
/* Library handles */
|
||||||
@@ -264,12 +267,13 @@ typedef struct htslibhandles htslibhandles;
|
|||||||
typedef struct htslibhandle htslibhandle;
|
typedef struct htslibhandle htslibhandle;
|
||||||
#endif
|
#endif
|
||||||
struct htslibhandle {
|
struct htslibhandle {
|
||||||
char *moduleName;
|
char *moduleName; /**< name of a loaded external module */
|
||||||
void *handle;
|
void *handle; /**< dlopen() handle for it */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct htslibhandles {
|
struct htslibhandles {
|
||||||
int count;
|
int count; /**< number of loaded module handles */
|
||||||
htslibhandle *handles;
|
htslibhandle *handles; /**< array of loaded module handles */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Javascript parser flags */
|
/* Javascript parser flags */
|
||||||
@@ -286,176 +290,192 @@ typedef enum htsparsejava_flags {
|
|||||||
typedef struct lien_buffers lien_buffers;
|
typedef struct lien_buffers lien_buffers;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// paramètres httrack (options)
|
/*
|
||||||
|
* Per-mirror options and state block. This is the central HTTrack parameters
|
||||||
|
* structure: created by hts_create_opt(), it carries every tunable option for
|
||||||
|
* one mirror and embeds the live engine state, and is then consumed by
|
||||||
|
* hts_main2().
|
||||||
|
*
|
||||||
|
* Callers normally configure it through the command-line argv vector (the
|
||||||
|
* option parser), not by writing fields directly. The only fields real
|
||||||
|
* consumers poke directly are 'log' and 'errlog' (set either to NULL to
|
||||||
|
* silence logging).
|
||||||
|
*/
|
||||||
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
#ifndef HTS_DEF_FWSTRUCT_httrackp
|
||||||
#define HTS_DEF_FWSTRUCT_httrackp
|
#define HTS_DEF_FWSTRUCT_httrackp
|
||||||
typedef struct httrackp httrackp;
|
typedef struct httrackp httrackp;
|
||||||
#endif
|
#endif
|
||||||
struct httrackp {
|
struct httrackp {
|
||||||
size_t size_httrackp; // size of this structure
|
size_t size_httrackp; /**< size of this structure (version/ABI guard) */
|
||||||
/* */
|
/* */
|
||||||
int wizard; // wizard aucun/grand/petit
|
int wizard; /**< interactive wizard level (none/full/light) */
|
||||||
int flush; // fflush sur les fichiers log
|
int flush; /**< fflush() log files after each write */
|
||||||
int travel; // type de déplacements (same domain etc)
|
int travel; /**< link-following scope (same domain, etc.) */
|
||||||
int seeker; // up & down
|
int seeker; /**< allowed direction: go up and/or down the tree */
|
||||||
int depth; // nombre de niveaux de récursion
|
int depth; /**< maximum recursion depth (-rN) */
|
||||||
int extdepth; // nombre de niveaux de récursion à l'éxtérieur
|
int extdepth; /**< maximum recursion depth outside the start domain */
|
||||||
int urlmode; // liens relatifs etc
|
int urlmode; /**< saved-link rewriting style (relative, absolute, etc.) */
|
||||||
int no_type_change; // do not change file type according to MIME
|
int no_type_change; // do not change file type according to MIME
|
||||||
int debug; // mode débug log
|
int debug; /**< debug logging level */
|
||||||
int getmode; // sauver html, images..
|
int getmode; /**< what to fetch (HTML, images, ...) bitmask */
|
||||||
FILE *log; // fichier log
|
FILE *log; /**< informational log stream; NULL mutes it */
|
||||||
FILE *errlog; // et erreur
|
FILE *errlog; /**< error log stream; NULL mutes it */
|
||||||
LLint maxsite; // taille max site
|
LLint maxsite; /**< max total bytes for the whole mirror */
|
||||||
LLint maxfile_nonhtml; // taille max non html
|
LLint maxfile_nonhtml; /**< max bytes per non-HTML file */
|
||||||
LLint maxfile_html; // taille max html
|
LLint maxfile_html; /**< max bytes per HTML file */
|
||||||
int maxsoc; // nbre sockets
|
int maxsoc; /**< max simultaneous sockets (-cN) */
|
||||||
LLint fragment; // fragmentation d'un site
|
LLint fragment; /**< split site after this many bytes */
|
||||||
int nearlink; // prendre les images/data proche d'une page mais à l'extérieur
|
int nearlink; /**< also fetch images/data adjacent to a page but off-site */
|
||||||
int makeindex; // faire un index
|
int makeindex; /**< build a top-level index.html */
|
||||||
int kindex; // et un index 'keyword'
|
int kindex; /**< build a keyword index */
|
||||||
int delete_old; // effacer anciens fichiers
|
int delete_old; /**< delete locally obsolete files after update */
|
||||||
int timeout; // nombre de secondes de timeout
|
int timeout; /**< connection timeout in seconds */
|
||||||
int rateout; // nombre d'octets minium pour le transfert
|
int rateout; /**< minimum transfer rate (bytes/s) before abort */
|
||||||
int maxtime; // temps max en secondes
|
int maxtime; /**< max total mirror duration in seconds */
|
||||||
int maxrate; // taux de transfert max
|
int maxrate; /**< max transfer rate cap (bytes/s) */
|
||||||
float maxconn; // nombre max de connexions/s
|
float maxconn; /**< max connections per second */
|
||||||
int waittime; // démarrage programmé
|
int waittime; /**< scheduled start time (wall-clock seconds) */
|
||||||
int cache; // génération d'un cache
|
int cache; /**< cache generation mode */
|
||||||
//int aff_progress; // barre de progression
|
// int aff_progress; // progress bar
|
||||||
int shell; // gestion d'un shell par pipe stdin/stdout
|
int shell; /**< driven by a shell over stdin/stdout pipes */
|
||||||
t_proxy proxy; // configuration du proxy
|
t_proxy proxy; /**< proxy configuration */
|
||||||
int savename_83; // conversion 8-3 pour les noms de fichiers
|
int savename_83; /**< force 8.3 (DOS) file names */
|
||||||
int savename_type; // type de noms: structure originale/html-images en un seul niveau
|
int savename_type; /**< saved-name layout (original tree, flat, ...) */
|
||||||
String savename_userdef; // structure userdef (ex: %h%p/%n%q.%t)
|
String
|
||||||
|
savename_userdef; /**< user-defined name template (e.g. %h%p/%n%q.%t) */
|
||||||
int savename_delayed; // delayed type check
|
int savename_delayed; // delayed type check
|
||||||
int delayed_cached; // delayed type check can be cached to speedup updates
|
int delayed_cached; // delayed type check can be cached to speedup updates
|
||||||
int mimehtml; // MIME-html
|
int mimehtml; /**< produce a single MIME/MHTML archive */
|
||||||
int user_agent_send; // user agent (ex: httrack/1.0 [sun])
|
int user_agent_send; /**< send a User-Agent header */
|
||||||
String user_agent; //
|
String user_agent; /**< User-Agent value (e.g. httrack/1.0) */
|
||||||
String referer; // referer
|
String referer; /**< Referer value to send */
|
||||||
String from; // from
|
String from; /**< From value to send */
|
||||||
String path_log; // chemin pour cache et log
|
String path_log; /**< directory for cache and logs */
|
||||||
String path_html; // chemin pour miroir
|
String path_html; /**< output directory for the mirror */
|
||||||
String path_html_utf8; // chemin pour miroir, UTF-8
|
String path_html_utf8; /**< output directory for the mirror, UTF-8 form */
|
||||||
String path_bin; // chemin pour templates
|
String path_bin; /**< directory for HTML templates */
|
||||||
int retry; // nombre d'essais supplémentaires en cas d'échec
|
int retry; /**< extra retries on a failed transfer */
|
||||||
int makestat; // mettre à jour un fichier log de statistiques de transfert
|
int makestat; /**< maintain a transfer-statistics log */
|
||||||
int maketrack; // mettre à jour un fichier log de statistiques d'opérations
|
int maketrack; /**< maintain an operations-statistics log */
|
||||||
int parsejava; // parsing des classes java pour récupérer les class, gif & cie ; see htsparsejava_flags
|
int parsejava; /**< Java/JS parsing mode; see htsparsejava_flags */
|
||||||
int hostcontrol; // abandon d'un host trop lent etc.
|
int hostcontrol; /**< drop hosts that are too slow, etc. */
|
||||||
int errpage; // générer une page d'erreur en cas de 404 etc.
|
int errpage; /**< generate an error page on 404 and similar */
|
||||||
int check_type; // si type inconnu (cgi,asp,/) alors tester lien (et gérer moved éventuellement)
|
int check_type; /**< probe unknown-type links (cgi/asp/dir) and follow moves
|
||||||
int all_in_cache; // tout mettre en cache!
|
*/
|
||||||
int robots; // traitement des robots
|
int all_in_cache; /**< keep all retrieved data in the cache */
|
||||||
int external; // pages externes->pages d'erreur
|
int robots; /**< robots.txt handling level */
|
||||||
int passprivacy; // pas de mot de pass dans les liens externes?
|
int external; /**< render external links as error pages */
|
||||||
int includequery; // include la query-string
|
int passprivacy; /**< strip passwords from external links */
|
||||||
int mirror_first_page; // miroir des liens
|
int includequery; /**< include the query string in saved names */
|
||||||
String sys_com; // commande système
|
int mirror_first_page; /**< only mirror the links of the first page */
|
||||||
int sys_com_exec; // executer commande
|
String sys_com; /**< system command to run */
|
||||||
int accept_cookie; // gestion des cookies
|
int sys_com_exec; /**< actually execute sys_com */
|
||||||
t_cookie *cookie;
|
int accept_cookie; /**< accept and send cookies */
|
||||||
int http10; // forcer http 1.0
|
t_cookie *cookie; /**< cookie store */
|
||||||
int nokeepalive; // pas de keep-alive
|
int http10; /**< force HTTP/1.0 */
|
||||||
int nocompression; // pas de compression
|
int nokeepalive; /**< disable keep-alive */
|
||||||
int sizehack; // forcer réponse "mis à jour" si taille identique
|
int nocompression; /**< disable content compression */
|
||||||
|
int sizehack; /**< treat same-size response as "updated" */
|
||||||
int urlhack; // force "url normalization" to avoid loops
|
int urlhack; // force "url normalization" to avoid loops
|
||||||
int tolerant; // accepter content-length incorrect
|
int tolerant; /**< accept an incorrect Content-Length */
|
||||||
int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple)
|
int parseall; /**< parse aggressively, including unknown tags with links */
|
||||||
int parsedebug; // débugger parser (debug!)
|
int parsedebug; /**< parser debug mode */
|
||||||
int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur
|
int norecatch; /**< do not re-fetch files the user deleted locally */
|
||||||
int verbosedisplay; // animation textuelle
|
int verbosedisplay; /**< animated text progress display */
|
||||||
String footer; // ligne d'infos
|
String footer; /**< footer/info line injected into pages */
|
||||||
int maxcache; // maximum en mémoire au niveau du cache (backing)
|
int maxcache; /**< in-memory cache backing limit (bytes) */
|
||||||
//int maxcache_anticipate; // maximum de liens à anticiper (majorant)
|
// int maxcache_anticipate; // maximum links to anticipate (upper bound)
|
||||||
int ftp_proxy; // proxy http pour ftp
|
int ftp_proxy; /**< use the HTTP proxy for FTP too */
|
||||||
String filelist; // fichier liste URL à inclure
|
String filelist; /**< file listing URLs to include */
|
||||||
String urllist; // fichier liste de filtres à inclure
|
String urllist; /**< file listing filters to include */
|
||||||
htsfilters filters; // contient les pointeurs pour les filtres
|
htsfilters filters; /**< filter pointers (+/-pattern rules) */
|
||||||
hash_struct *hash; // hash structure
|
hash_struct *hash; // hash structure
|
||||||
lien_url **liens; // links
|
lien_url **liens; // links
|
||||||
int lien_tot; // top index of "links" heap (always out-of-range)
|
int lien_tot; // top index of "links" heap (always out-of-range)
|
||||||
lien_buffers *liensbuf; // links buffers
|
lien_buffers *liensbuf; // links buffers
|
||||||
robots_wizard *robotsptr; // robots ptr
|
robots_wizard *robotsptr; // robots ptr
|
||||||
String lang_iso; // en, fr ..
|
String lang_iso; /**< Accept-Language value (en, fr, ...) */
|
||||||
String accept; // Accept:
|
String accept; // Accept:
|
||||||
String headers; // Additional headers
|
String headers; // Additional headers
|
||||||
String mimedefs; // ext1=mimetype1\next2=mimetype2..
|
String mimedefs; // ext1=mimetype1\next2=mimetype2..
|
||||||
String mod_blacklist; // (3.41)
|
String mod_blacklist; /**< blacklisted modules */
|
||||||
int convert_utf8; // filenames UTF-8 conversion (3.46)
|
int convert_utf8; // filenames UTF-8 conversion (3.46)
|
||||||
//
|
//
|
||||||
int maxlink; // nombre max de liens
|
int maxlink; /**< max number of links */
|
||||||
int maxfilter; // nombre max de filtres
|
int maxfilter; /**< max number of filters */
|
||||||
//
|
//
|
||||||
const char *exec; // adresse du nom de l'éxecutable
|
const char *exec; /**< path of the running executable */
|
||||||
//
|
//
|
||||||
int quiet; // poser des questions autres que wizard?
|
int quiet; /**< suppress non-wizard questions */
|
||||||
int keyboard; // vérifier stdin
|
int keyboard; /**< poll stdin for keyboard input */
|
||||||
int bypass_limits; // bypass built-in limits
|
int bypass_limits; // bypass built-in limits
|
||||||
int background_on_suspend; // background process on suspend signal
|
int background_on_suspend; // background process on suspend signal
|
||||||
//
|
//
|
||||||
int is_update; // c'est une update (afficher "File updated...")
|
int is_update; /**< this run is an update (show "File updated...") */
|
||||||
int dir_topindex; // reconstruire top index par la suite
|
int dir_topindex; /**< rebuild the top index afterwards */
|
||||||
//
|
//
|
||||||
// callbacks
|
// callbacks
|
||||||
t_hts_htmlcheck_callbacks *callbacks_fun;
|
t_hts_htmlcheck_callbacks
|
||||||
|
*callbacks_fun; /**< user HTML/parsing callback table */
|
||||||
// store library handles
|
// store library handles
|
||||||
htslibhandles libHandles;
|
htslibhandles libHandles; /**< loaded external module handles */
|
||||||
//
|
//
|
||||||
htsoptstate state; // state
|
htsoptstate state; /**< embedded live engine state */
|
||||||
};
|
};
|
||||||
|
|
||||||
// stats for httrack
|
/* Running statistics for a mirror. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct
|
#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct
|
||||||
#define HTS_DEF_FWSTRUCT_hts_stat_struct
|
#define HTS_DEF_FWSTRUCT_hts_stat_struct
|
||||||
typedef struct hts_stat_struct hts_stat_struct;
|
typedef struct hts_stat_struct hts_stat_struct;
|
||||||
#endif
|
#endif
|
||||||
struct hts_stat_struct {
|
struct hts_stat_struct {
|
||||||
LLint HTS_TOTAL_RECV; // flux entrant reçu
|
LLint HTS_TOTAL_RECV; /**< total bytes received from the network */
|
||||||
LLint stat_bytes; // octets écrits sur disque
|
LLint stat_bytes; /**< total bytes written to disk */
|
||||||
// int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
|
// int HTS_TOTAL_RECV_STATE; // status: 0 ok 1: slow down a little 2: slow
|
||||||
TStamp stat_timestart; // départ
|
// down 3: a lot
|
||||||
|
TStamp stat_timestart; /**< mirror start time */
|
||||||
//
|
//
|
||||||
LLint total_packed; // flux entrant compressé reçu
|
LLint total_packed; /**< compressed bytes received (on the wire) */
|
||||||
LLint total_unpacked; // flux entrant compressé reçu
|
LLint total_unpacked; /**< bytes after decompression */
|
||||||
int total_packedfiles; // fichiers compressés
|
int total_packedfiles; /**< number of compressed files */
|
||||||
//
|
//
|
||||||
TStamp istat_timestart[2]; // départ pour calcul instantanné
|
TStamp
|
||||||
LLint istat_bytes[2]; // calcul pour instantanné
|
istat_timestart[2]; /**< window start times for the instantaneous rate */
|
||||||
TStamp istat_reference01; // top départ donné par #0 à #1
|
LLint istat_bytes[2]; /**< window byte counts for the instantaneous rate */
|
||||||
int istat_idlasttimer; // id du timer qui a récemment donné une stat
|
TStamp
|
||||||
|
istat_reference01; /**< reference timestamp handed from window #0 to #1 */
|
||||||
|
int istat_idlasttimer; /**< id of the timer that last produced a stat */
|
||||||
//
|
//
|
||||||
int stat_files; // nombre de fichiers écrits
|
int stat_files; /**< number of files written */
|
||||||
int stat_updated_files; // nombre de fichiers mis à jour
|
int stat_updated_files; /**< number of files updated */
|
||||||
int stat_background; // nombre de fichiers écrits en arrière plan
|
int stat_background; /**< number of files written in the background */
|
||||||
//
|
//
|
||||||
int stat_nrequests; // nombre de requêtes sur socket
|
int stat_nrequests; /**< number of requests issued on sockets */
|
||||||
int stat_sockid; // nombre de sockets allouées au total
|
int stat_sockid; /**< total number of sockets ever allocated */
|
||||||
int stat_nsocket; // nombre de sockets
|
int stat_nsocket; /**< current number of open sockets */
|
||||||
int stat_errors; // nombre d'erreurs
|
int stat_errors; /**< number of errors */
|
||||||
int stat_errors_front; // idem, mais au tout premier niveau
|
int stat_errors_front; /**< errors at the very first level */
|
||||||
int stat_warnings; // '' warnings
|
int stat_warnings; /**< number of warnings */
|
||||||
int stat_infos; // '' infos
|
int stat_infos; /**< number of info messages */
|
||||||
int nbk; // fichiers anticipés en arrière plan et terminés
|
int nbk; /**< background-anticipated files now completed */
|
||||||
LLint nb; // données transférées actuellement (estimation)
|
LLint nb; /**< bytes currently being transferred (estimate) */
|
||||||
//
|
//
|
||||||
LLint rate;
|
LLint rate; /**< current transfer rate */
|
||||||
//
|
//
|
||||||
TStamp last_connect; // last connect() call
|
TStamp last_connect; /**< time of the last connect() call */
|
||||||
TStamp last_request; // last request issued
|
TStamp last_request; /**< time of the last request issued */
|
||||||
};
|
};
|
||||||
|
|
||||||
// structure pour paramètres supplémentaires lors de la requête
|
/* Extra per-request parameters (mirrors httrackp request options). */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsrequest_proxy
|
#ifndef HTS_DEF_FWSTRUCT_htsrequest_proxy
|
||||||
#define HTS_DEF_FWSTRUCT_htsrequest_proxy
|
#define HTS_DEF_FWSTRUCT_htsrequest_proxy
|
||||||
typedef struct htsrequest_proxy htsrequest_proxy;
|
typedef struct htsrequest_proxy htsrequest_proxy;
|
||||||
#endif
|
#endif
|
||||||
struct htsrequest_proxy {
|
struct htsrequest_proxy {
|
||||||
int active;
|
int active; /**< nonzero if a proxy is used for this request */
|
||||||
const char* name;
|
const char *name; /**< proxy host name */
|
||||||
int port;
|
int port; /**< proxy port */
|
||||||
const char* bindhost; // bind this host
|
const char *bindhost; /**< local address to bind the outgoing socket to */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsrequest
|
#ifndef HTS_DEF_FWSTRUCT_htsrequest
|
||||||
@@ -463,93 +483,93 @@ struct htsrequest_proxy {
|
|||||||
typedef struct htsrequest htsrequest;
|
typedef struct htsrequest htsrequest;
|
||||||
#endif
|
#endif
|
||||||
struct htsrequest {
|
struct htsrequest {
|
||||||
short int user_agent_send; // user agent (ex: httrack/1.0 [sun])
|
short int user_agent_send; /**< send a User-Agent header */
|
||||||
short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0
|
short int http11; /**< sign the request as HTTP/1.1 rather than HTTP/1.0 */
|
||||||
short int nokeepalive; // pas de keep-alive
|
short int nokeepalive; /**< disable keep-alive */
|
||||||
short int range_used; // Range utilisé
|
short int range_used; /**< a Range header is in use */
|
||||||
short int nocompression; // Pas de compression
|
short int nocompression; /**< disable compression */
|
||||||
short int flush_garbage; // recycled
|
short int flush_garbage; // recycled
|
||||||
const char* user_agent;
|
const char *user_agent; /**< User-Agent value */
|
||||||
const char* referer;
|
const char *referer; /**< Referer value */
|
||||||
const char* from;
|
const char *from; /**< From value */
|
||||||
const char* lang_iso;
|
const char *lang_iso; /**< Accept-Language value */
|
||||||
const char* accept;
|
const char *accept; /**< Accept value */
|
||||||
const char* headers;
|
const char *headers; /**< extra request headers */
|
||||||
htsrequest_proxy proxy; // proxy
|
htsrequest_proxy proxy; /**< proxy for this request */
|
||||||
};
|
};
|
||||||
|
|
||||||
// structure pour retour d'une connexion/prise d'en tête
|
/* Result of a connection / header fetch. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsblk
|
#ifndef HTS_DEF_FWSTRUCT_htsblk
|
||||||
#define HTS_DEF_FWSTRUCT_htsblk
|
#define HTS_DEF_FWSTRUCT_htsblk
|
||||||
typedef struct htsblk htsblk;
|
typedef struct htsblk htsblk;
|
||||||
#endif
|
#endif
|
||||||
struct htsblk {
|
struct htsblk {
|
||||||
int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
|
int statuscode; /**< HTTP status code; -1=error, 200=OK, ... (RFC1945) */
|
||||||
short int notmodified; // page ou fichier NON modifié (transféré)
|
short int notmodified; /**< page/file was not modified (not transferred) */
|
||||||
short int is_write; // sortie sur disque (out) ou en mémoire (adr)
|
short int is_write; /**< output goes to disk (out) vs memory (adr) */
|
||||||
short int is_chunk; // mode chunk
|
short int is_chunk; /**< chunked transfer encoding */
|
||||||
short int compressed; // compressé?
|
short int compressed; /**< body is compressed */
|
||||||
short int empty; // vide?
|
short int empty; /**< body is empty */
|
||||||
short int keep_alive; // Keep-Alive?
|
short int keep_alive; /**< connection is keep-alive */
|
||||||
short int keep_alive_trailers; // ..with trailers extension
|
short int keep_alive_trailers; /**< keep-alive with trailers extension */
|
||||||
int keep_alive_t; // KA timeout
|
int keep_alive_t; /**< keep-alive timeout (seconds) */
|
||||||
int keep_alive_max; // KA number of requests
|
int keep_alive_max; /**< keep-alive max number of requests */
|
||||||
char *adr; // adresse du bloc de mémoire, NULL=vide
|
char *adr; /**< in-memory body buffer; NULL if empty */
|
||||||
char *headers; // adresse des en têtes si présents
|
char *headers; /**< received headers, if any */
|
||||||
FILE *out; // écriture directe sur disque (si is_write=1)
|
FILE *out; /**< destination file when is_write=1 */
|
||||||
LLint size; // taille fichier
|
LLint size; /**< body size */
|
||||||
char msg[80]; // message éventuel si échec ("\0"=non précisé)
|
char msg[80]; /**< failure message ("" if none) */
|
||||||
char contenttype[HTS_MIMETYPE_SIZE]; // content-type (e.g. "text/html")
|
char contenttype[HTS_MIMETYPE_SIZE]; // content-type (e.g. "text/html")
|
||||||
char charset[HTS_MIMETYPE_SIZE]; // charset (e.g. "iso-8859-1")
|
char charset[HTS_MIMETYPE_SIZE]; // charset (e.g. "iso-8859-1")
|
||||||
char contentencoding[HTS_MIMETYPE_SIZE]; // content-encoding (e.g. "gzip")
|
char contentencoding[HTS_MIMETYPE_SIZE]; // content-encoding (e.g. "gzip")
|
||||||
char *location; // on copie dedans éventuellement la véritable 'location'
|
char *location; /**< resolved Location target, if any */
|
||||||
LLint totalsize; // taille totale à télécharger (-1=inconnue)
|
LLint totalsize; /**< total size to download (-1=unknown) */
|
||||||
short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1
|
short int is_file; /**< 1 if a file descriptor rather than a socket */
|
||||||
T_SOC soc; // ID socket
|
T_SOC soc; /**< socket id */
|
||||||
SOCaddr address; // IP address
|
SOCaddr address; /**< peer IP address */
|
||||||
int address_size; // IP address structure length (unused internally)
|
int address_size; // IP address structure length (unused internally)
|
||||||
FILE *fp; // fichier pour file://
|
FILE *fp; /**< file handle for file:// */
|
||||||
#if HTS_USEOPENSSL
|
#if HTS_USEOPENSSL
|
||||||
short int ssl; // is this connection a SSL one? (https)
|
short int ssl; /**< nonzero if this is an SSL connection (https) */
|
||||||
// BIO* ssl_soc; // SSL structure
|
// BIO* ssl_soc; // SSL structure
|
||||||
SSL *ssl_con; // connection structure
|
SSL *ssl_con; /**< SSL connection structure */
|
||||||
#endif
|
#endif
|
||||||
char lastmodified[64]; // Last-Modified
|
char lastmodified[64]; /**< Last-Modified value */
|
||||||
char etag[256]; // Etag
|
char etag[256]; /**< ETag value */
|
||||||
char cdispo[256]; // Content-Disposition coupé
|
char cdispo[256]; /**< Content-Disposition filename (truncated) */
|
||||||
LLint crange; // Content-Range
|
LLint crange; /**< Content-Range length */
|
||||||
LLint crange_start; // Content-Range
|
LLint crange_start; /**< Content-Range start offset */
|
||||||
LLint crange_end; // Content-Range
|
LLint crange_end; /**< Content-Range end offset */
|
||||||
int debugid; // debug connection
|
int debugid; /**< connection debug id */
|
||||||
/* */
|
/* */
|
||||||
htsrequest req; // paramètres pour la requête
|
htsrequest req; /**< parameters used for the request */
|
||||||
/*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré) */
|
/*char digest[32+2]; // md5 digest generated by the engine ("" if none) */
|
||||||
};
|
};
|
||||||
|
|
||||||
// structure d'un lien
|
/* A single link in the crawl. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_lien_url
|
#ifndef HTS_DEF_FWSTRUCT_lien_url
|
||||||
#define HTS_DEF_FWSTRUCT_lien_url
|
#define HTS_DEF_FWSTRUCT_lien_url
|
||||||
typedef struct lien_url lien_url;
|
typedef struct lien_url lien_url;
|
||||||
#endif
|
#endif
|
||||||
struct lien_url {
|
struct lien_url {
|
||||||
char *adr; // adresse
|
char *adr; /**< host/address part of the URL */
|
||||||
char *fil; // nom du fichier distant
|
char *fil; /**< remote file path */
|
||||||
char *sav; // nom à sauver sur disque (avec chemin éventuel)
|
char *sav; /**< local save name (with any path) */
|
||||||
char *cod; // chemin codebase éventuel si classe java
|
char *cod; /**< codebase path for a Java class, if any */
|
||||||
char *former_adr; // adresse initiale (avant éventuel moved), peut être nulle
|
char *former_adr; /**< original address before a move; may be NULL */
|
||||||
char *former_fil; // nom du fichier distant initial (avant éventuel moved), peut être nul
|
char *former_fil; /**< original remote file before a move; may be NULL */
|
||||||
|
|
||||||
int premier; // pointeur sur le premier lien qui a donné lieu aux autres liens du domaine
|
int premier; /**< index of the first link that seeded this domain */
|
||||||
int precedent; // pointeur sur le lien qui a donné lieu à ce lien précis
|
int precedent; /**< index of the link that referenced this one */
|
||||||
int depth; // profondeur autorisée lien ; >0 forte 0=faible
|
int depth; /**< remaining allowed depth; >0 strong, 0 weak */
|
||||||
int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background
|
int pass2; /**< second-pass marker; -1 means handled in background */
|
||||||
char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down
|
char link_import; /**< imported after a move; skip the usual up/down rules */
|
||||||
//int moved; // pointeur sur moved
|
// int moved; // pointer to moved
|
||||||
int retry; // nombre de retry restants
|
int retry; /**< remaining retries */
|
||||||
int testmode; // mode test uniquement, envoyer juste un head!
|
int testmode; /**< test only: send just a HEAD */
|
||||||
};
|
};
|
||||||
|
|
||||||
// chargement de fichiers en 'arrière plan'
|
/* A file being fetched in the background. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_lien_back
|
#ifndef HTS_DEF_FWSTRUCT_lien_back
|
||||||
#define HTS_DEF_FWSTRUCT_lien_back
|
#define HTS_DEF_FWSTRUCT_lien_back
|
||||||
typedef struct lien_back lien_back;
|
typedef struct lien_back lien_back;
|
||||||
@@ -558,43 +578,44 @@ struct lien_back {
|
|||||||
#if DEBUG_CHECKINT
|
#if DEBUG_CHECKINT
|
||||||
char magic;
|
char magic;
|
||||||
#endif
|
#endif
|
||||||
char url_adr[HTS_URLMAXSIZE * 2]; // adresse
|
char url_adr[HTS_URLMAXSIZE * 2]; /**< host/address part of the URL */
|
||||||
char url_fil[HTS_URLMAXSIZE * 2]; // nom du fichier distant
|
char url_fil[HTS_URLMAXSIZE * 2]; /**< remote file path */
|
||||||
char url_sav[HTS_URLMAXSIZE * 2]; // nom à sauver sur disque (avec chemin éventuel)
|
char url_sav[HTS_URLMAXSIZE * 2]; /**< local save name (with any path) */
|
||||||
char referer_adr[HTS_URLMAXSIZE * 2]; // adresse host page referer
|
char referer_adr[HTS_URLMAXSIZE * 2]; /**< referer page host/address */
|
||||||
char referer_fil[HTS_URLMAXSIZE * 2]; // fichier page referer
|
char referer_fil[HTS_URLMAXSIZE * 2]; /**< referer page file */
|
||||||
char location_buffer[HTS_URLMAXSIZE * 2]; // "location" en cas de "moved" (302,..)
|
char
|
||||||
char *tmpfile; // nom à sauver temporairement (compressé)
|
location_buffer[HTS_URLMAXSIZE * 2]; /**< Location on a move (302, ...) */
|
||||||
char tmpfile_buffer[HTS_URLMAXSIZE * 2]; // buffer pour le nom à sauver temporairement
|
char *tmpfile; /**< temporary save name (compressed) */
|
||||||
char send_too[1024]; // données à envoyer en même temps que le header
|
char tmpfile_buffer[HTS_URLMAXSIZE * 2]; /**< storage for tmpfile */
|
||||||
int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours)
|
char send_too[1024]; /**< data to send together with the header */
|
||||||
int locked; // locked (to be used soon)
|
int status; /**< -1=unused, 0=ready, >0=operation in progress */
|
||||||
int testmode; // mode de test
|
int locked; /**< locked (reserved) */
|
||||||
int timeout; // gérer des timeouts? (!=0 : nombre de secondes)
|
int testmode; /**< test mode */
|
||||||
TStamp timeout_refresh; // si oui, time refresh
|
int timeout; /**< timeout in seconds (0=none) */
|
||||||
int rateout; // timeout refresh? (!=0 : taux minimum toléré en octets/s)
|
TStamp timeout_refresh; /**< last activity time, for timeout tracking */
|
||||||
TStamp rateout_time; // si oui, date de départ
|
int rateout; /**< minimum tolerated rate in bytes/s (0=none) */
|
||||||
LLint maxfile_nonhtml; // taille max d'un fichier non html
|
TStamp rateout_time; /**< start time for the rate window */
|
||||||
LLint maxfile_html; // idem pour un ficheir html
|
LLint maxfile_nonhtml; /**< max bytes for a non-HTML file */
|
||||||
htsblk r; // structure htsblk de chaque objet en background
|
LLint maxfile_html; /**< max bytes for an HTML file */
|
||||||
int is_update; // mode update
|
htsblk r; /**< per-object result block */
|
||||||
int head_request; // requète HEAD?
|
int is_update; /**< update mode */
|
||||||
LLint range_req_size; // range utilisé
|
int head_request; /**< this is a HEAD request */
|
||||||
TStamp ka_time_start; // refresh time for KA
|
LLint range_req_size; /**< Range request size used */
|
||||||
|
TStamp ka_time_start; /**< keep-alive refresh start time */
|
||||||
//
|
//
|
||||||
int http11; // L'en tête doit être signé HTTP/1.1 et non HTTP/1.0
|
int http11; /**< sign the request as HTTP/1.1 rather than HTTP/1.0 */
|
||||||
int is_chunk; // chunk?
|
int is_chunk; /**< chunked transfer */
|
||||||
char *chunk_adr; // adresse chunk en cours de chargement
|
char *chunk_adr; /**< buffer for the chunk being loaded */
|
||||||
LLint chunk_size; // taille chunk en cours de chargement
|
LLint chunk_size; /**< size of the chunk being loaded */
|
||||||
LLint chunk_blocksize; // taille data declaree par le chunk
|
LLint chunk_blocksize; /**< data size declared by the chunk */
|
||||||
LLint compressed_size; // taille compressés (stats uniquement)
|
LLint compressed_size; /**< compressed size (stats only) */
|
||||||
//
|
//
|
||||||
//int links_index; // to access liens[links_index]
|
//int links_index; // to access liens[links_index]
|
||||||
//
|
//
|
||||||
char info[256]; // éventuel status pour le ftp
|
char info[256]; /**< status text, e.g. for FTP */
|
||||||
int stop_ftp; // flag stop pour ftp
|
int stop_ftp; /**< stop flag for FTP */
|
||||||
int finalized; // finalized (optim memory)
|
int finalized; /**< finalized (memory optimization) */
|
||||||
int early_add; // was added before link heap saw it
|
int early_add; /**< was added before the link heap saw it */
|
||||||
#if DEBUG_CHECKINT
|
#if DEBUG_CHECKINT
|
||||||
char magic2;
|
char magic2;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -104,6 +104,7 @@ static HTS_UNUSED void abortf_(const char *exp, const char *file, int line) {
|
|||||||
* Check whether 'VAR' is of type char[].
|
* Check whether 'VAR' is of type char[].
|
||||||
*/
|
*/
|
||||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||||
|
|
||||||
/* Note: char[] and const char[] are compatible */
|
/* Note: char[] and const char[] are compatible */
|
||||||
#define HTS_IS_CHAR_BUFFER(VAR) ( __builtin_types_compatible_p ( typeof (VAR), char[] ) )
|
#define HTS_IS_CHAR_BUFFER(VAR) ( __builtin_types_compatible_p ( typeof (VAR), char[] ) )
|
||||||
#else
|
#else
|
||||||
@@ -139,8 +140,11 @@ static HTS_UNUSED void htssafe_compile_time_check_(void) {
|
|||||||
* (MSVC, ...) keep the previous behavior via the #else branches.
|
* (MSVC, ...) keep the previous behavior via the #else branches.
|
||||||
*/
|
*/
|
||||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||||
|
|
||||||
#if defined(__has_attribute)
|
#if defined(__has_attribute)
|
||||||
|
|
||||||
#if __has_attribute(warning)
|
#if __has_attribute(warning)
|
||||||
|
|
||||||
#define HTS_BUFF_PTR_ATTR(msg) __attribute__((unused, noinline, warning(msg)))
|
#define HTS_BUFF_PTR_ATTR(msg) __attribute__((unused, noinline, warning(msg)))
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
@@ -152,29 +156,51 @@ static HTS_UNUSED void htssafe_compile_time_check_(void) {
|
|||||||
|
|
||||||
HTS_BUFF_PTR_ATTR("strcpybuff() destination is a pointer (capacity unknown): "
|
HTS_BUFF_PTR_ATTR("strcpybuff() destination is a pointer (capacity unknown): "
|
||||||
"NOT bounds-checked; use strlcpybuff(dst, src, size)")
|
"NOT bounds-checked; use strlcpybuff(dst, src, size)")
|
||||||
|
|
||||||
static char *strcpybuff_ptr_(char *dest, const char *src) {
|
static char *strcpybuff_ptr_(char *dest, const char *src) {
|
||||||
return strcpy(dest, src);
|
return strcpy(dest, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
HTS_BUFF_PTR_ATTR("strcatbuff() destination is a pointer (capacity unknown): "
|
HTS_BUFF_PTR_ATTR("strcatbuff() destination is a pointer (capacity unknown): "
|
||||||
"NOT bounds-checked; use strlcatbuff(dst, src, size)")
|
"NOT bounds-checked; use strlcatbuff(dst, src, size)")
|
||||||
|
|
||||||
static char *strcatbuff_ptr_(char *dest, const char *src) {
|
static char *strcatbuff_ptr_(char *dest, const char *src) {
|
||||||
return strcat(dest, src);
|
return strcat(dest, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
HTS_BUFF_PTR_ATTR("strncatbuff() destination is a pointer (capacity unknown): "
|
HTS_BUFF_PTR_ATTR("strncatbuff() destination is a pointer (capacity unknown): "
|
||||||
"NOT bounds-checked; use strlcatbuff(dst, src, size)")
|
"NOT bounds-checked; use strlcatbuff(dst, src, size)")
|
||||||
|
|
||||||
static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
||||||
return strncat(dest, src, n);
|
return strncat(dest, src, n);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SIZE CONTRACT shared by strcpybuff/strcatbuff/strncatbuff (the "buff"
|
||||||
|
* family): the destination bound is taken from sizeof(A), so A MUST be a real
|
||||||
|
* char[] array in scope. The bound is the full array size in bytes, INCLUDING
|
||||||
|
* the terminating NUL. On overflow the *_safe_ helpers do NOT truncate: they
|
||||||
|
* abort() (assertf). On success the result is always NUL-terminated.
|
||||||
|
*
|
||||||
|
* CRITICAL CAVEAT: if A is a bare char* pointer (not an array), sizeof(A) is
|
||||||
|
* the pointer size, not the buffer capacity. There is no way to recover the
|
||||||
|
* real capacity, so these macros SILENTLY DEGRADE to the unbounded raw
|
||||||
|
* strcpy()/strcat()/strncat() while still looking like a checked call. The
|
||||||
|
* bound is lost. On GCC/Clang (C) the pointer case routes through the
|
||||||
|
* *_ptr_ stubs above, which carry a 'warning' attribute to flag the site at
|
||||||
|
* compile time; on other compilers it is silent. When the destination is a
|
||||||
|
* pointer of known capacity, call the explicit-size strlcpybuff/strlcatbuff
|
||||||
|
* (passing the capacity, NUL included) instead.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append at most N characters from "B" to "A".
|
* Append at most N characters from "B" to "A".
|
||||||
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
* If "A" is a char[] variable whose size is not sizeof(char*), then the size
|
||||||
* is assumed to be the capacity of this array.
|
* is assumed to be the capacity of this array.
|
||||||
*/
|
*/
|
||||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||||
|
|
||||||
#define strncatbuff(A, B, N) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
#define strncatbuff(A, B, N) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||||
strncat_safe_(A, sizeof(A), B, \
|
strncat_safe_(A, sizeof(A), B, \
|
||||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), N, \
|
||||||
@@ -195,6 +221,7 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
|||||||
* is assumed to be the capacity of this array.
|
* is assumed to be the capacity of this array.
|
||||||
*/
|
*/
|
||||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||||
|
|
||||||
#define strcatbuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
#define strcatbuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||||
strncat_safe_(A, sizeof(A), B, \
|
strncat_safe_(A, sizeof(A), B, \
|
||||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
|
||||||
@@ -215,6 +242,7 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
|||||||
* is assumed to be the capacity of this array.
|
* is assumed to be the capacity of this array.
|
||||||
*/
|
*/
|
||||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||||
|
|
||||||
#define strcpybuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
#define strcpybuff(A, B) __builtin_choose_expr( HTS_IS_CHAR_BUFFER(A), \
|
||||||
strcpy_safe_(A, sizeof(A), B, \
|
strcpy_safe_(A, sizeof(A), B, \
|
||||||
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
|
||||||
@@ -229,6 +257,14 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
|||||||
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
"overflow while copying '" #B "' to '"#A"'", __FILE__, __LINE__) )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Explicit-size variants (strlcatbuff/strlncatbuff/strlcpybuff): the
|
||||||
|
* destination capacity is the caller-supplied S (total bytes, NUL included),
|
||||||
|
* NOT derived from sizeof(A). Use these when A is a pointer or its capacity is
|
||||||
|
* not its sizeof. Same abort-on-overflow, always-NUL-terminated contract; no
|
||||||
|
* silent pointer degradation since the bound is passed in.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append characters of "B" to "A", "A" having a maximum capacity of "S".
|
* Append characters of "B" to "A", "A" having a maximum capacity of "S".
|
||||||
*/
|
*/
|
||||||
@@ -256,6 +292,7 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
|
|||||||
|
|
||||||
/** strnlen replacement (autotools). **/
|
/** strnlen replacement (autotools). **/
|
||||||
#if ( ! defined(_WIN32) && ! defined(HAVE_STRNLEN) )
|
#if ( ! defined(_WIN32) && ! defined(HAVE_STRNLEN) )
|
||||||
|
|
||||||
static HTS_UNUSED size_t strnlen(const char *s, size_t maxlen) {
|
static HTS_UNUSED size_t strnlen(const char *s, size_t maxlen) {
|
||||||
size_t i;
|
size_t i;
|
||||||
for(i = 0 ; i < maxlen && s[i] != '\0' ; i++) ;
|
for(i = 0 ; i < maxlen && s[i] != '\0' ; i++) ;
|
||||||
@@ -263,6 +300,10 @@ static HTS_UNUSED size_t strnlen(const char *s, size_t maxlen) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* strlen of source, but bounded by sizeof_source (its capacity, NUL included).
|
||||||
|
Aborts if source is NULL or has no NUL within that capacity. The sentinel
|
||||||
|
sizeof_source == (size_t)-1 means "capacity unknown", and falls back to the
|
||||||
|
unbounded strlen (used when the source is a pointer rather than an array). */
|
||||||
static HTS_INLINE HTS_UNUSED size_t strlen_safe_(const char *source, const size_t sizeof_source,
|
static HTS_INLINE HTS_UNUSED size_t strlen_safe_(const char *source, const size_t sizeof_source,
|
||||||
const char *file, int line) {
|
const char *file, int line) {
|
||||||
size_t size;
|
size_t size;
|
||||||
@@ -273,6 +314,11 @@ static HTS_INLINE HTS_UNUSED size_t strlen_safe_(const char *source, const size_
|
|||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Core bounded append. Appends min(strlen(source), n) bytes of source onto
|
||||||
|
dest. sizeof_dest is dest's total capacity (NUL included); sizeof_source is
|
||||||
|
source's capacity or (size_t)-1 if unknown. Aborts if the result (existing
|
||||||
|
dest length + appended bytes + NUL) would not fit sizeof_dest: this NEVER
|
||||||
|
truncates. Always NUL-terminates on success. */
|
||||||
static HTS_INLINE HTS_UNUSED char* strncat_safe_(char *const dest, const size_t sizeof_dest,
|
static HTS_INLINE HTS_UNUSED char* strncat_safe_(char *const dest, const size_t sizeof_dest,
|
||||||
const char *const source, const size_t sizeof_source,
|
const char *const source, const size_t sizeof_source,
|
||||||
const size_t n,
|
const size_t n,
|
||||||
@@ -288,6 +334,9 @@ static HTS_INLINE HTS_UNUSED char* strncat_safe_(char *const dest, const size_t
|
|||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Core bounded copy: empties dest then appends all of source via
|
||||||
|
strncat_safe_. sizeof_dest is dest's total capacity (NUL included). Aborts
|
||||||
|
(no truncation) if source plus its NUL would not fit. */
|
||||||
static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t sizeof_dest,
|
static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t sizeof_dest,
|
||||||
const char *const source, const size_t sizeof_source,
|
const char *const source, const size_t sizeof_source,
|
||||||
const char *exp, const char *file, int line) {
|
const char *exp, const char *file, int line) {
|
||||||
@@ -333,9 +382,11 @@ static HTS_INLINE HTS_UNUSED htsbuff htsbuff_ptr_(char *buf, size_t cap) {
|
|||||||
* On other compilers there is no such guard, so pass only true arrays there.
|
* On other compilers there is no such guard, so pass only true arrays there.
|
||||||
*/
|
*/
|
||||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||||
|
|
||||||
/* 0 for an array, a -1 array-size compile error for a pointer. */
|
/* 0 for an array, a -1 array-size compile error for a pointer. */
|
||||||
#define htsbuff_must_be_array_(A) \
|
#define htsbuff_must_be_array_(A) \
|
||||||
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), typeof(&(A)[0]))]) - 1)
|
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), typeof(&(A)[0]))]) - 1)
|
||||||
|
|
||||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
|
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
|
||||||
#else
|
#else
|
||||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR))
|
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR))
|
||||||
@@ -378,11 +429,20 @@ static HTS_INLINE HTS_UNUSED const char *htsbuff_str(const htsbuff *b) {
|
|||||||
return b->buf;
|
return b->buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Thin aliases over the libc allocator/memcpy (historical "t" suffix); no
|
||||||
|
added bounds checking. freet() also NULLs the freed pointer and tolerates
|
||||||
|
NULL. memcpybuff() despite the name is a raw memcpy: the caller owns the
|
||||||
|
bounds. */
|
||||||
#define malloct(A) malloc(A)
|
#define malloct(A) malloc(A)
|
||||||
|
|
||||||
#define calloct(A,B) calloc((A), (B))
|
#define calloct(A,B) calloc((A), (B))
|
||||||
|
|
||||||
#define freet(A) do { if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
|
#define freet(A) do { if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
|
||||||
|
|
||||||
#define strdupt(A) strdup(A)
|
#define strdupt(A) strdup(A)
|
||||||
|
|
||||||
#define realloct(A,B) realloc(A, B)
|
#define realloct(A,B) realloc(A, B)
|
||||||
|
|
||||||
#define memcpybuff(A, B, N) memcpy((A), (B), (N))
|
#define memcpybuff(A, B, N) memcpy((A), (B), (N))
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
112
src/htsstrings.h
112
src/htsstrings.h
@@ -42,7 +42,9 @@ Please visit our Website: http://www.httrack.com
|
|||||||
#ifndef HTS_UNUSED
|
#ifndef HTS_UNUSED
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#define HTS_UNUSED __attribute__ ((unused))
|
#define HTS_UNUSED __attribute__ ((unused))
|
||||||
|
|
||||||
#define HTS_STATIC static __attribute__ ((unused))
|
#define HTS_STATIC static __attribute__ ((unused))
|
||||||
|
|
||||||
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
#define HTS_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg)))
|
||||||
#else
|
#else
|
||||||
#define HTS_UNUSED
|
#define HTS_UNUSED
|
||||||
@@ -58,6 +60,23 @@ typedef struct String String;
|
|||||||
#endif
|
#endif
|
||||||
#ifndef HTS_DEF_STRUCT_String
|
#ifndef HTS_DEF_STRUCT_String
|
||||||
#define HTS_DEF_STRUCT_String
|
#define HTS_DEF_STRUCT_String
|
||||||
|
/**
|
||||||
|
* Growable owned string.
|
||||||
|
*
|
||||||
|
* Ownership/lifetime: the String owns buffer_ and frees it (StringFree).
|
||||||
|
* buffer_ is allocated lazily, so a freshly STRING_EMPTY/StringInit'd String,
|
||||||
|
* or one just StringFree'd/StringAcquire'd, has buffer_ == NULL and
|
||||||
|
* length_ == capacity_ == 0. Any growing operation may realloc, so a pointer
|
||||||
|
* obtained from StringBuff/StringBuffRW is invalidated by the next append,
|
||||||
|
* copy, or room request; do not cache it across such calls.
|
||||||
|
*
|
||||||
|
* Invariants when buffer_ != NULL: length_ < capacity_, and buffer_[length_]
|
||||||
|
* is a NUL (the content is always NUL-terminated). length_ excludes that NUL;
|
||||||
|
* capacity_ counts it. The empty state (buffer_ == NULL) has no readable NUL,
|
||||||
|
* so callers must not treat StringBuff() of an untouched String as "".
|
||||||
|
*
|
||||||
|
* Direct field access is internal (trailing underscore); use the macros below.
|
||||||
|
*/
|
||||||
struct String {
|
struct String {
|
||||||
char *buffer_;
|
char *buffer_;
|
||||||
size_t length_;
|
size_t length_;
|
||||||
@@ -68,6 +87,7 @@ struct String {
|
|||||||
/** Allocator **/
|
/** Allocator **/
|
||||||
#ifndef STRING_REALLOC
|
#ifndef STRING_REALLOC
|
||||||
#define STRING_REALLOC(BUFF, SIZE) ( (char*) realloc(BUFF, SIZE) )
|
#define STRING_REALLOC(BUFF, SIZE) ( (char*) realloc(BUFF, SIZE) )
|
||||||
|
|
||||||
#define STRING_FREE(BUFF) free(BUFF)
|
#define STRING_FREE(BUFF) free(BUFF)
|
||||||
#endif
|
#endif
|
||||||
#ifndef STRING_ASSERT
|
#ifndef STRING_ASSERT
|
||||||
@@ -75,45 +95,49 @@ struct String {
|
|||||||
#define STRING_ASSERT(EXP) assert(EXP)
|
#define STRING_ASSERT(EXP) assert(EXP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** An empty string **/
|
/** Initializer for an empty String (NULL buffer). Use to declare or reset. **/
|
||||||
#define STRING_EMPTY { (char*) NULL, 0, 0 }
|
#define STRING_EMPTY { (char*) NULL, 0, 0 }
|
||||||
|
|
||||||
/** String buffer **/
|
/** Read-only buffer pointer. NULL until the String has been written to.
|
||||||
|
Invalidated by any subsequent growing operation. **/
|
||||||
#define StringBuff(BLK) ( (const char*) ((BLK).buffer_) )
|
#define StringBuff(BLK) ( (const char*) ((BLK).buffer_) )
|
||||||
|
|
||||||
/** String buffer (read/write) **/
|
/** Read/write buffer pointer. Same NULL/invalidation rules as StringBuff. **/
|
||||||
#define StringBuffRW(BLK) ((BLK).buffer_)
|
#define StringBuffRW(BLK) ((BLK).buffer_)
|
||||||
|
|
||||||
/** String length **/
|
/** Current length in bytes, excluding the terminating NUL. **/
|
||||||
#define StringLength(BLK) ((BLK).length_)
|
#define StringLength(BLK) ((BLK).length_)
|
||||||
|
|
||||||
/** String not empty ? **/
|
/** Non-zero if the String holds at least one byte. **/
|
||||||
#define StringNotEmpty(BLK) ( StringLength(BLK) > 0 )
|
#define StringNotEmpty(BLK) ( StringLength(BLK) > 0 )
|
||||||
|
|
||||||
/** String capacity **/
|
/** Allocated capacity in bytes, including room for the terminating NUL. **/
|
||||||
#define StringCapacity(BLK) ((BLK).capacity_)
|
#define StringCapacity(BLK) ((BLK).capacity_)
|
||||||
|
|
||||||
/** Subcharacter **/
|
/** Byte at POS (read). No bounds check; POS must be < StringLength. **/
|
||||||
#define StringSub(BLK, POS) ( StringBuff(BLK)[POS] )
|
#define StringSub(BLK, POS) ( StringBuff(BLK)[POS] )
|
||||||
|
|
||||||
/** Subcharacter (read/write) **/
|
/** Byte at POS (read/write). No bounds check; POS must be < StringLength. **/
|
||||||
#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] )
|
#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] )
|
||||||
|
|
||||||
/** Subcharacter (read/write) **/
|
/** Subcharacter (read/write) **/
|
||||||
#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] )
|
#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] )
|
||||||
|
|
||||||
/** Right subcharacter **/
|
/** Byte POS positions from the end (read). POS==1 is the last byte. **/
|
||||||
#define StringRight(BLK, POS) ( StringBuff(BLK)[StringLength(BLK) - POS] )
|
#define StringRight(BLK, POS) ( StringBuff(BLK)[StringLength(BLK) - POS] )
|
||||||
|
|
||||||
/** Right subcharacter (read/write) **/
|
/** Byte POS positions from the end (read/write). POS==1 is the last byte. **/
|
||||||
#define StringRightRW(BLK, POS) ( StringBuffRW(BLK)[StringLength(BLK) - POS] )
|
#define StringRightRW(BLK, POS) ( StringBuffRW(BLK)[StringLength(BLK) - POS] )
|
||||||
|
|
||||||
/** Remove the utter right character from the string. **/
|
/** Drop the last byte and re-terminate. Undefined if the String is empty
|
||||||
|
(no length check; would underflow). **/
|
||||||
#define StringPopRight(BLK) do { \
|
#define StringPopRight(BLK) do { \
|
||||||
StringBuffRW(BLK)[--StringLength(BLK)] = '\0'; \
|
StringBuffRW(BLK)[--StringLength(BLK)] = '\0'; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Ensure the string is large enough for exactly CAPACITY bytes overall (including \0). **/
|
/** Grow so capacity_ >= CAPACITY (total bytes, including the NUL). May realloc
|
||||||
|
(invalidating prior buffer pointers); aborts via STRING_ASSERT on OOM.
|
||||||
|
Never shrinks. **/
|
||||||
#define StringRoomTotal(BLK, CAPACITY) do { \
|
#define StringRoomTotal(BLK, CAPACITY) do { \
|
||||||
const size_t capacity_ = (size_t) (CAPACITY); \
|
const size_t capacity_ = (size_t) (CAPACITY); \
|
||||||
while ((BLK).capacity_ < capacity_) { \
|
while ((BLK).capacity_ < capacity_) { \
|
||||||
@@ -127,31 +151,37 @@ struct String {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Ensure the string is large enough for exactly SIZE more characters (not including \0). **/
|
/** Reserve room for SIZE more bytes beyond the current length (plus the NUL).
|
||||||
|
May realloc, invalidating prior buffer pointers. **/
|
||||||
#define StringRoom(BLK, SIZE) StringRoomTotal(BLK, StringLength(BLK) + (SIZE) + 1)
|
#define StringRoom(BLK, SIZE) StringRoomTotal(BLK, StringLength(BLK) + (SIZE) + 1)
|
||||||
|
|
||||||
/** Return the RW buffer for a strcat() operation of at most SIZE characters. **/
|
/** Reserve room for SIZE more bytes and return the (post-realloc) RW buffer,
|
||||||
|
for appending in place. Does not update length_; the caller must. **/
|
||||||
#define StringBuffN(BLK, SIZE) StringBuffN_(&(BLK), SIZE)
|
#define StringBuffN(BLK, SIZE) StringBuffN_(&(BLK), SIZE)
|
||||||
HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
||||||
StringRoom(*blk, size);
|
StringRoom(*blk, size);
|
||||||
return StringBuffRW(*blk);
|
return StringBuffRW(*blk);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Initialize a string. **/
|
/** Zero the fields (NULL buffer, no allocation). Use on an uninitialized
|
||||||
|
String only; does NOT free an existing buffer (use StringFree to reset
|
||||||
|
an owned one), so calling it on a live String leaks. **/
|
||||||
#define StringInit(BLK) do { \
|
#define StringInit(BLK) do { \
|
||||||
(BLK).buffer_ = NULL; \
|
(BLK).buffer_ = NULL; \
|
||||||
(BLK).capacity_ = 0; \
|
(BLK).capacity_ = 0; \
|
||||||
(BLK).length_ = 0; \
|
(BLK).length_ = 0; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Clear a string (set its length to 0) **/
|
/** Truncate to length 0, keeping the allocation. Forces a non-NULL buffer
|
||||||
|
(allocates if empty) and writes the leading NUL, so StringBuff is "". **/
|
||||||
#define StringClear(BLK) do { \
|
#define StringClear(BLK) do { \
|
||||||
(BLK).length_ = 0; \
|
(BLK).length_ = 0; \
|
||||||
StringRoom(BLK, 0); \
|
StringRoom(BLK, 0); \
|
||||||
(BLK).buffer_[0] = '\0'; \
|
(BLK).buffer_[0] = '\0'; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Set the length of a string to 'SIZE'. If SIZE is negative, check the size using strlen(). **/
|
/** Set length_ to SIZE, or to strlen(buffer_) if SIZE is negative. Caller
|
||||||
|
asserts SIZE fits the existing content; does not (re)allocate. **/
|
||||||
#define StringSetLength(BLK, SIZE) do { \
|
#define StringSetLength(BLK, SIZE) do { \
|
||||||
if (SIZE >= 0) { \
|
if (SIZE >= 0) { \
|
||||||
(BLK).length_ = SIZE; \
|
(BLK).length_ = SIZE; \
|
||||||
@@ -160,7 +190,8 @@ HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Free a string (release memory) **/
|
/** Release the owned buffer and reset to the empty state (NULL buffer).
|
||||||
|
Idempotent; safe on an already-empty String. **/
|
||||||
#define StringFree(BLK) do { \
|
#define StringFree(BLK) do { \
|
||||||
if ((BLK).buffer_ != NULL) { \
|
if ((BLK).buffer_ != NULL) { \
|
||||||
STRING_FREE((BLK).buffer_); \
|
STRING_FREE((BLK).buffer_); \
|
||||||
@@ -170,8 +201,12 @@ HTS_STATIC char *StringBuffN_(String * blk, int size) {
|
|||||||
(BLK).length_ = 0; \
|
(BLK).length_ = 0; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Assign an allocated pointer to a a string.
|
/** Take ownership of a NUL-terminated heap string STR (the String will free
|
||||||
The pointer _MUST_ be compatible with STRING_REALLOC() and STRING_FREE() **/
|
it). Frees any current buffer first. STR MUST have been allocated by an
|
||||||
|
allocator compatible with STRING_REALLOC()/STRING_FREE(), and must not be
|
||||||
|
freed or used by the caller afterwards. length_/capacity_ are set to
|
||||||
|
strlen(STR) (capacity_ here excludes the NUL, so the next append reallocs).
|
||||||
|
**/
|
||||||
#define StringSetBuffer(BLK, STR) do { \
|
#define StringSetBuffer(BLK, STR) do { \
|
||||||
size_t len__ = strlen( STR ); \
|
size_t len__ = strlen( STR ); \
|
||||||
StringFree(BLK); \
|
StringFree(BLK); \
|
||||||
@@ -180,7 +215,9 @@ The pointer _MUST_ be compatible with STRING_REALLOC() and STRING_FREE() **/
|
|||||||
(BLK).length_ = len__; \
|
(BLK).length_ = len__; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Append a memory block to a string **/
|
/** Append SIZE raw bytes from STR (NULs allowed as data). Grows as needed and
|
||||||
|
re-terminates with a NUL after the appended bytes. STR must not alias
|
||||||
|
BLK's buffer (a realloc would invalidate it). **/
|
||||||
#define StringMemcat(BLK, STR, SIZE) do { \
|
#define StringMemcat(BLK, STR, SIZE) do { \
|
||||||
const char* str_mc_ = (STR); \
|
const char* str_mc_ = (STR); \
|
||||||
const size_t size_mc_ = (size_t) (SIZE); \
|
const size_t size_mc_ = (size_t) (SIZE); \
|
||||||
@@ -192,13 +229,14 @@ The pointer _MUST_ be compatible with STRING_REALLOC() and STRING_FREE() **/
|
|||||||
*((BLK).buffer_ + (BLK).length_) = '\0'; \
|
*((BLK).buffer_ + (BLK).length_) = '\0'; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Copy a memory block to a string **/
|
/** Replace content with SIZE raw bytes from STR (NULs allowed as data).
|
||||||
|
Same non-aliasing requirement as StringMemcat. **/
|
||||||
#define StringMemcpy(BLK, STR, SIZE) do { \
|
#define StringMemcpy(BLK, STR, SIZE) do { \
|
||||||
(BLK).length_ = 0; \
|
(BLK).length_ = 0; \
|
||||||
StringMemcat(BLK, STR, SIZE); \
|
StringMemcat(BLK, STR, SIZE); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Add a character **/
|
/** Append one byte and re-terminate. Grows as needed. **/
|
||||||
#define StringAddchar(BLK, c) do { \
|
#define StringAddchar(BLK, c) do { \
|
||||||
String * const s__ = &(BLK); \
|
String * const s__ = &(BLK); \
|
||||||
char c__ = (c); \
|
char c__ = (c); \
|
||||||
@@ -207,7 +245,9 @@ The pointer _MUST_ be compatible with STRING_REALLOC() and STRING_FREE() **/
|
|||||||
StringBuffRW(*s__)[StringLength(*s__) ] = 0; \
|
StringBuffRW(*s__)[StringLength(*s__) ] = 0; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Acquire a string ; it's the client's responsability to free() it **/
|
/** Hand the buffer to the caller and reset the String to empty (NULL buffer).
|
||||||
|
The returned pointer is now owned by the caller, who must STRING_FREE() it.
|
||||||
|
Returns NULL if the String was empty. **/
|
||||||
HTS_STATIC char *StringAcquire(String * blk) {
|
HTS_STATIC char *StringAcquire(String * blk) {
|
||||||
char *buff = StringBuffRW(*blk);
|
char *buff = StringBuffRW(*blk);
|
||||||
|
|
||||||
@@ -217,7 +257,8 @@ HTS_STATIC char *StringAcquire(String * blk) {
|
|||||||
return buff;
|
return buff;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Clone a string. **/
|
/** Return an independent deep copy of *src (its own allocation). The caller
|
||||||
|
owns the result and must StringFree it. **/
|
||||||
HTS_STATIC String StringDup(const String * src) {
|
HTS_STATIC String StringDup(const String * src) {
|
||||||
String s = STRING_EMPTY;
|
String s = STRING_EMPTY;
|
||||||
|
|
||||||
@@ -225,7 +266,10 @@ HTS_STATIC String StringDup(const String * src) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Attach a string using a pointer. **/
|
/** Take ownership of *str (a NUL-terminated heap string) and NULL it out, so
|
||||||
|
ownership transfers and the caller keeps no dangling alias. Frees any
|
||||||
|
current buffer first. *str MUST be allocator-compatible (see
|
||||||
|
StringSetBuffer). No-op if str or *str is NULL. **/
|
||||||
HTS_STATIC void StringAttach(String * blk, char **str) {
|
HTS_STATIC void StringAttach(String * blk, char **str) {
|
||||||
StringFree(*blk);
|
StringFree(*blk);
|
||||||
if (str != NULL && *str != NULL) {
|
if (str != NULL && *str != NULL) {
|
||||||
@@ -235,7 +279,8 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Append a string to another one. **/
|
/** Append the C string STR (up to its NUL). No-op if STR is NULL. STR must not
|
||||||
|
alias BLK's buffer. **/
|
||||||
#define StringCat(BLK, STR) do { \
|
#define StringCat(BLK, STR) do { \
|
||||||
const char *const str__ = ( STR ); \
|
const char *const str__ = ( STR ); \
|
||||||
if (str__ != NULL) { \
|
if (str__ != NULL) { \
|
||||||
@@ -244,6 +289,8 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/** Append at most SIZE leading bytes of the C string STR. No-op if STR is
|
||||||
|
NULL. STR must not alias BLK's buffer. **/
|
||||||
#define StringCatN(BLK, STR, SIZE) do { \
|
#define StringCatN(BLK, STR, SIZE) do { \
|
||||||
const char *str__ = ( STR ); \
|
const char *str__ = ( STR ); \
|
||||||
if (str__ != NULL) { \
|
if (str__ != NULL) { \
|
||||||
@@ -255,6 +302,8 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/** Replace content with at most SIZE leading bytes of the C string STR.
|
||||||
|
If STR is NULL, clears to "". STR must not alias BLK's buffer. **/
|
||||||
#define StringCopyN(BLK, STR, SIZE) do { \
|
#define StringCopyN(BLK, STR, SIZE) do { \
|
||||||
const char *str__ = ( STR ); \
|
const char *str__ = ( STR ); \
|
||||||
const size_t usize__ = (SIZE); \
|
const size_t usize__ = (SIZE); \
|
||||||
@@ -270,9 +319,13 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/** Replace blk's content with a copy of String blk2. blk and blk2 must be
|
||||||
|
distinct Strings (use StringCopyOverlapped if they may be the same). **/
|
||||||
#define StringCopyS(blk, blk2) StringCopyN(blk, (blk2).buffer_, (blk2).length_)
|
#define StringCopyS(blk, blk2) StringCopyN(blk, (blk2).buffer_, (blk2).length_)
|
||||||
|
|
||||||
/** Copy a string to another one. **/
|
/** Replace content with a copy of the C string STR. If STR is NULL, clears to
|
||||||
|
"". STR must not alias BLK's buffer (use StringCopyOverlapped if it might).
|
||||||
|
**/
|
||||||
#define StringCopy(BLK, STR) do { \
|
#define StringCopy(BLK, STR) do { \
|
||||||
const char *str__ = ( STR ); \
|
const char *str__ = ( STR ); \
|
||||||
if (str__ != NULL) { \
|
if (str__ != NULL) { \
|
||||||
@@ -283,7 +336,8 @@ HTS_STATIC void StringAttach(String * blk, char **str) {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/** Copy a (potentially overlapping) string to another one. **/
|
/** Like StringCopy but safe when STR aliases BLK's own buffer: copies via a
|
||||||
|
temporary, so a self-copy or overlap is well-defined. **/
|
||||||
#define StringCopyOverlapped(BLK, STR) do { \
|
#define StringCopyOverlapped(BLK, STR) do { \
|
||||||
String s__ = STRING_EMPTY; \
|
String s__ = STRING_EMPTY; \
|
||||||
StringCopy(s__, STR); \
|
StringCopy(s__, STR); \
|
||||||
|
|||||||
@@ -31,6 +31,12 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** @file htswrap.h
|
||||||
|
Legacy entry points of the callback-wrapper subsystem. The live callback
|
||||||
|
registration API now lives on the httrackp options block (hts_set_callback);
|
||||||
|
only the no-op init/free stubs remain exported here for ABI compatibility.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef HTSWRAP_DEFH
|
#ifndef HTSWRAP_DEFH
|
||||||
#define HTSWRAP_DEFH
|
#define HTSWRAP_DEFH
|
||||||
|
|
||||||
@@ -50,7 +56,10 @@ typedef struct httrackp httrackp;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/** Legacy no-op retained for ABI compatibility; always returns 1. */
|
||||||
HTSEXT_API int htswrap_init(void); // LEGACY
|
HTSEXT_API int htswrap_init(void); // LEGACY
|
||||||
|
|
||||||
|
/** Legacy no-op retained for ABI compatibility; always returns 1. */
|
||||||
HTSEXT_API int htswrap_free(void); // LEGACY
|
HTSEXT_API int htswrap_free(void); // LEGACY
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@@ -30,6 +30,25 @@ Please visit our Website: http://www.httrack.com
|
|||||||
/* Author: Xavier Roche */
|
/* Author: Xavier Roche */
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file httrack-library.h
|
||||||
|
* @brief Public C API for embedding the HTTrack mirroring engine.
|
||||||
|
*
|
||||||
|
* Two ways to drive the engine, both supported and used by real consumers:
|
||||||
|
* - argv path: build an argv vector and call hts_main()/hts_main2(), exactly
|
||||||
|
* as the command-line tool is configured.
|
||||||
|
* - struct/callback path: hts_create_opt(), install callbacks with
|
||||||
|
* CHAIN_FUNCTION(), then hts_main2(), then hts_free_opt().
|
||||||
|
*
|
||||||
|
* Typical lifecycle: hts_init() once per process, then per mirror
|
||||||
|
* hts_create_opt() -> CHAIN_FUNCTION() -> hts_main2() (blocking) ->
|
||||||
|
* hts_get_stats()/hts_errmsg() -> hts_free_opt().
|
||||||
|
*
|
||||||
|
* Threading: hts_main2() blocks the calling thread. hts_request_stop() and
|
||||||
|
* hts_has_stopped() are safe to call for the same opt from another thread while
|
||||||
|
* the mirror runs. hts_free_opt() must not run until hts_has_stopped() is true.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef HTTRACK_DEFLIB
|
#ifndef HTTRACK_DEFLIB
|
||||||
#define HTTRACK_DEFLIB
|
#define HTTRACK_DEFLIB
|
||||||
|
|
||||||
@@ -54,15 +73,18 @@ typedef struct strc_int2bytes2 strc_int2bytes2;
|
|||||||
#endif
|
#endif
|
||||||
#ifndef HTS_DEF_DEFSTRUCT_hts_log_type
|
#ifndef HTS_DEF_DEFSTRUCT_hts_log_type
|
||||||
#define HTS_DEF_DEFSTRUCT_hts_log_type
|
#define HTS_DEF_DEFSTRUCT_hts_log_type
|
||||||
|
/** Log severity levels, most to least severe. A message is emitted only if its
|
||||||
|
level is <= opt->debug. LOG_ERRNO is a flag OR'd into the level to append
|
||||||
|
": <strerror(errno)>" to the message. */
|
||||||
typedef enum hts_log_type {
|
typedef enum hts_log_type {
|
||||||
LOG_PANIC,
|
LOG_PANIC, /**< Fatal condition. */
|
||||||
LOG_ERROR,
|
LOG_ERROR, /**< Error. */
|
||||||
LOG_WARNING,
|
LOG_WARNING, /**< Warning. */
|
||||||
LOG_NOTICE,
|
LOG_NOTICE, /**< Notice; the default opt->debug level. */
|
||||||
LOG_INFO,
|
LOG_INFO, /**< Informational. */
|
||||||
LOG_DEBUG,
|
LOG_DEBUG, /**< Debug detail. */
|
||||||
LOG_TRACE,
|
LOG_TRACE, /**< Most verbose tracing. */
|
||||||
LOG_ERRNO = 1 << 8
|
LOG_ERRNO = 1 << 8 /**< Flag: append strerror(errno) to the message. */
|
||||||
} hts_log_type;
|
} hts_log_type;
|
||||||
#endif
|
#endif
|
||||||
#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct
|
#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct
|
||||||
@@ -70,7 +92,9 @@ typedef enum hts_log_type {
|
|||||||
typedef struct hts_stat_struct hts_stat_struct;
|
typedef struct hts_stat_struct hts_stat_struct;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Assert error callback. **/
|
/** Assertion/error handler. Receives the failed expression text, source file,
|
||||||
|
and line. The strings are valid only for the duration of the call; do not
|
||||||
|
retain them. */
|
||||||
#ifndef HTS_DEF_FWSTRUCT_htsErrorCallback
|
#ifndef HTS_DEF_FWSTRUCT_htsErrorCallback
|
||||||
#define HTS_DEF_FWSTRUCT_htsErrorCallback
|
#define HTS_DEF_FWSTRUCT_htsErrorCallback
|
||||||
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
|
typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
|
||||||
@@ -79,10 +103,14 @@ typedef void (*htsErrorCallback) (const char *msg, const char *file, int line);
|
|||||||
/* Helpers for plugging callbacks
|
/* Helpers for plugging callbacks
|
||||||
requires: htsdefines.h */
|
requires: htsdefines.h */
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Add a function callback 'FUNCTION' to the option structure 'OPT' callback member 'MEMBER',
|
* Install callback FUNCTION into OPT->callbacks_fun->MEMBER, chaining it ahead
|
||||||
with an optional (may be NULL) argument 'ARGUMENT'
|
* of any callback already there (whose function and carg are saved for
|
||||||
*/
|
* CALLBACKARG_PREV_FUN/CALLBACKARG_PREV_CARG). ARGUMENT is an optional (may be
|
||||||
|
* NULL) user pointer, later read inside the callback with
|
||||||
|
* CALLBACKARG_USERDEF(). Allocates a t_hts_callbackarg with hts_malloc (not
|
||||||
|
* checked for OOM); it is freed by hts_free_opt().
|
||||||
|
*/
|
||||||
#define CHAIN_FUNCTION(OPT, MEMBER, FUNCTION, ARGUMENT) do { \
|
#define CHAIN_FUNCTION(OPT, MEMBER, FUNCTION, ARGUMENT) do { \
|
||||||
t_hts_callbackarg *carg = (t_hts_callbackarg*) hts_malloc(sizeof(t_hts_callbackarg)); \
|
t_hts_callbackarg *carg = (t_hts_callbackarg*) hts_malloc(sizeof(t_hts_callbackarg)); \
|
||||||
carg->userdef = ( ARGUMENT ); \
|
carg->userdef = ( ARGUMENT ); \
|
||||||
@@ -95,180 +123,544 @@ with an optional (may be NULL) argument 'ARGUMENT'
|
|||||||
/* The following helpers are useful only if you know that an existing callback migh be existing before before the call to CHAIN_FUNCTION()
|
/* The following helpers are useful only if you know that an existing callback migh be existing before before the call to CHAIN_FUNCTION()
|
||||||
If your functions were added just after hts_create_opt(), no need to make the previous function check */
|
If your functions were added just after hts_create_opt(), no need to make the previous function check */
|
||||||
|
|
||||||
/* Get the user-defined pointer initially passed to CHAIN_FUNCTION(), given the callback's carg argument */
|
/** Inside a chained callback, return the ARGUMENT pointer originally passed to
|
||||||
|
CHAIN_FUNCTION(), or NULL when CARG is NULL. */
|
||||||
#define CALLBACKARG_USERDEF(CARG) ( ( (CARG) != NULL ) ? (CARG)->userdef : NULL )
|
#define CALLBACKARG_USERDEF(CARG) ( ( (CARG) != NULL ) ? (CARG)->userdef : NULL )
|
||||||
|
|
||||||
/* Get the previously existing function before the call to CHAIN_FUNCTION(), given the callback's carg argument */
|
/** Return the callback of type NAME that this one chained over, cast to its
|
||||||
|
function-pointer type, or NULL. Call it to forward to the prior handler. */
|
||||||
#define CALLBACKARG_PREV_FUN(CARG, NAME) ( (t_hts_htmlcheck_ ##NAME) ( ( (CARG) != NULL ) ? (CARG)->prev.fun : NULL ) )
|
#define CALLBACKARG_PREV_FUN(CARG, NAME) ( (t_hts_htmlcheck_ ##NAME) ( ( (CARG) != NULL ) ? (CARG)->prev.fun : NULL ) )
|
||||||
|
|
||||||
/* Get the previously existing function argument before the call to CHAIN_FUNCTION(), given the callback's carg argument */
|
/** Return the carg of the callback this one chained over (pass it when
|
||||||
|
forwarding to the CALLBACKARG_PREV_FUN result), or NULL. */
|
||||||
#define CALLBACKARG_PREV_CARG(CARG) ( ( (CARG) != NULL ) ? (CARG)->prev.carg : NULL )
|
#define CALLBACKARG_PREV_CARG(CARG) ( ( (CARG) != NULL ) ? (CARG)->prev.carg : NULL )
|
||||||
|
|
||||||
/* Functions */
|
/* Functions */
|
||||||
|
|
||||||
/* Initialization */
|
/* Initialization */
|
||||||
|
/** Initialize the engine (lazy, idempotent, process-global): threading, the
|
||||||
|
hashtable assert handler, modules, the MD5 self-test, and TLS when built
|
||||||
|
with it. Only the first call does work. Honors $HTS_LOG for the debug level.
|
||||||
|
Always returns 1. Call before hts_create_opt() or hts_main(). */
|
||||||
HTSEXT_API int hts_init(void);
|
HTSEXT_API int hts_init(void);
|
||||||
|
|
||||||
|
/** No-op kept for API compatibility. Frees nothing (the process-global mutexes
|
||||||
|
set up by hts_init() are never released) and always returns 1. */
|
||||||
HTSEXT_API int hts_uninit(void);
|
HTSEXT_API int hts_uninit(void);
|
||||||
|
|
||||||
|
/** Block until all background mirror threads have finished. No-op unless built
|
||||||
|
with threaded fetching. */
|
||||||
HTSEXT_API void htsthread_wait(void);
|
HTSEXT_API void htsthread_wait(void);
|
||||||
|
|
||||||
/* Main functions */
|
/* Main functions */
|
||||||
|
/** Run a full mirror from a command-line argv (argv[0] is ignored, as in
|
||||||
|
main()). Creates a fresh option set, runs the engine, and frees it. Returns
|
||||||
|
the engine exit code. Call hts_init() first. */
|
||||||
HTSEXT_API int hts_main(int argc, char **argv);
|
HTSEXT_API int hts_main(int argc, char **argv);
|
||||||
|
|
||||||
|
/** Run a full mirror using a caller-supplied option set. Use this instead of
|
||||||
|
hts_main() to set options or plug callbacks on opt first. Blocks until the
|
||||||
|
mirror ends and returns the engine exit code. The caller keeps ownership of
|
||||||
|
opt and must release it with hts_free_opt(). */
|
||||||
HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt);
|
HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt);
|
||||||
|
|
||||||
/* Options handling */
|
/* Options handling */
|
||||||
|
/** Allocate and default-initialize an option set, preloading the bundled parser
|
||||||
|
modules. Returns a heap object the caller owns and must release with
|
||||||
|
hts_free_opt(). Does not return NULL on allocation failure. */
|
||||||
HTSEXT_API httrackp *hts_create_opt(void);
|
HTSEXT_API httrackp *hts_create_opt(void);
|
||||||
|
|
||||||
|
/** Free an option set created by hts_create_opt() (callback chains, plugged
|
||||||
|
modules, DNS cache, owned strings, and the structure). NULL is accepted. The
|
||||||
|
pointer is invalid afterward. Do not call while a mirror is running on that
|
||||||
|
opt; wait until hts_has_stopped() is true. */
|
||||||
HTSEXT_API void hts_free_opt(httrackp * opt);
|
HTSEXT_API void hts_free_opt(httrackp * opt);
|
||||||
|
|
||||||
|
/** Return sizeof(httrackp) as the library sees it, for caller-vs-library struct
|
||||||
|
ABI mismatch checks. */
|
||||||
HTSEXT_API size_t hts_sizeof_opt(void);
|
HTSEXT_API size_t hts_sizeof_opt(void);
|
||||||
|
|
||||||
|
/** Snapshot opt's error/warning/info counters and return a pointer to them.
|
||||||
|
Returns NULL if opt is NULL. The result aliases a single process-global
|
||||||
|
static: it is not thread-safe and is overwritten by the next call, so copy
|
||||||
|
out the fields you need. */
|
||||||
HTSEXT_API const hts_stat_struct* hts_get_stats(httrackp * opt);
|
HTSEXT_API const hts_stat_struct* hts_get_stats(httrackp * opt);
|
||||||
|
|
||||||
|
/** Legacy no-op retained for API compatibility. */
|
||||||
HTSEXT_API void set_wrappers(httrackp * opt); /* LEGACY */
|
HTSEXT_API void set_wrappers(httrackp * opt); /* LEGACY */
|
||||||
|
|
||||||
|
/** Load a plugin shared library and run its hts_plug(opt, argv) entry point. On
|
||||||
|
success the handle is recorded in opt and unloaded by hts_free_opt().
|
||||||
|
@return 1 if loaded and hts_plug succeeded; 0 if loaded but hts_plug was
|
||||||
|
missing or refused; -1 if the library could not be loaded. */
|
||||||
HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
|
HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
|
||||||
const char *argv);
|
const char *argv);
|
||||||
|
|
||||||
|
/** Install the process-global assertion/error callback (NULL clears it). Not
|
||||||
|
per-opt, and not safe to change while a mirror runs. */
|
||||||
HTSEXT_API void hts_set_error_callback(htsErrorCallback handler);
|
HTSEXT_API void hts_set_error_callback(htsErrorCallback handler);
|
||||||
|
|
||||||
|
/** Return the current process-global error callback, or NULL. */
|
||||||
HTSEXT_API htsErrorCallback hts_get_error_callback(void);
|
HTSEXT_API htsErrorCallback hts_get_error_callback(void);
|
||||||
|
|
||||||
/* Logging */
|
/* Logging */
|
||||||
|
/** Legacy: write prefix then msg to opt->log. Returns 0 if written, 1 if
|
||||||
|
opt->log is NULL. Prefer hts_log_print(). */
|
||||||
HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg);
|
HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg);
|
||||||
|
|
||||||
|
/** printf-style log at level @p type (an hts_log_type, optionally |LOG_ERRNO).
|
||||||
|
Forwards to the registered log callback, and when the level is <= opt->debug
|
||||||
|
also to opt->log. @p format must be non-NULL. */
|
||||||
HTSEXT_API void hts_log_print(httrackp * opt, int type, const char *format,
|
HTSEXT_API void hts_log_print(httrackp * opt, int type, const char *format,
|
||||||
...) HTS_PRINTF_FUN(3, 4);
|
...) HTS_PRINTF_FUN(3, 4);
|
||||||
|
|
||||||
|
/** va_list form of hts_log_print(). @p opt may be NULL (only the callback
|
||||||
|
runs). Preserves errno. @p format must be non-NULL. */
|
||||||
HTSEXT_API void hts_log_vprint(httrackp * opt, int type, const char *format,
|
HTSEXT_API void hts_log_vprint(httrackp * opt, int type, const char *format,
|
||||||
va_list args);
|
va_list args);
|
||||||
HTSEXT_API void hts_set_log_vprint_callback(void (*callback)(httrackp * opt,
|
|
||||||
int type,
|
/** Install the process-global log callback invoked by hts_log_vprint() for
|
||||||
const char *format, va_list args));
|
every message, regardless of opt->debug (NULL clears it). Not per-opt. */
|
||||||
|
HTSEXT_API void
|
||||||
|
hts_set_log_vprint_callback(void (*callback)(httrackp *opt, int type,
|
||||||
|
const char *format, va_list args));
|
||||||
|
|
||||||
/* Infos */
|
/* Infos */
|
||||||
|
/** Human-readable build/feature string plus the names of plugged modules. The
|
||||||
|
result is written into and aliases a 2048-byte scratch buffer inside opt: it
|
||||||
|
is valid until that buffer is next used, and must not be freed. opt must be
|
||||||
|
non-NULL. */
|
||||||
HTSEXT_API const char *hts_get_version_info(httrackp * opt);
|
HTSEXT_API const char *hts_get_version_info(httrackp * opt);
|
||||||
|
|
||||||
|
/** Static build-features string (TLS, zlib, ipv6, and so on). Process-global
|
||||||
|
storage; do not free or modify. */
|
||||||
HTSEXT_API const char *hts_is_available(void);
|
HTSEXT_API const char *hts_is_available(void);
|
||||||
HTSEXT_API const char* hts_version(void);
|
|
||||||
HTSEXT_API const hts_stat_struct* hts_get_stats(httrackp * opt);
|
/** HTTrack version id string. Static storage; do not free. */
|
||||||
|
HTSEXT_API const char *hts_version(void);
|
||||||
|
|
||||||
/* Wrapper functions */
|
/* Wrapper functions */
|
||||||
HTSEXT_API int htswrap_init(void); // DEPRECATED - DUMMY FUNCTION
|
HTSEXT_API int htswrap_init(void); // DEPRECATED - DUMMY FUNCTION
|
||||||
|
|
||||||
HTSEXT_API int htswrap_free(void); // DEPRECATED - DUMMY FUNCTION
|
HTSEXT_API int htswrap_free(void); // DEPRECATED - DUMMY FUNCTION
|
||||||
|
|
||||||
|
/** Register callback @p fct under @p name in opt's callback table (for example
|
||||||
|
"start", "check-html", "linkdetected"). Returns 1 on success, 0 if @p name
|
||||||
|
is not a known slot. Prefer CHAIN_FUNCTION(), which preserves any prior
|
||||||
|
callback. */
|
||||||
HTSEXT_API int htswrap_add(httrackp * opt, const char *name, void *fct);
|
HTSEXT_API int htswrap_add(httrackp * opt, const char *name, void *fct);
|
||||||
|
|
||||||
|
/** Return the function pointer registered under @p name in opt as a uintptr_t,
|
||||||
|
or 0 if none or unknown. */
|
||||||
HTSEXT_API uintptr_t htswrap_read(httrackp * opt, const char *name);
|
HTSEXT_API uintptr_t htswrap_read(httrackp * opt, const char *name);
|
||||||
|
|
||||||
|
/** @warning No implementation is linked into the library; calling this fails to
|
||||||
|
link. For per-callback user data use the CHAIN_FUNCTION() ARGUMENT and
|
||||||
|
CALLBACKARG_USERDEF() instead. */
|
||||||
HTSEXT_API int htswrap_set_userdef(httrackp * opt, void *userdef);
|
HTSEXT_API int htswrap_set_userdef(httrackp * opt, void *userdef);
|
||||||
|
|
||||||
|
/** @warning No implementation is linked into the library; calling this fails to
|
||||||
|
link. Read per-callback user data with CALLBACKARG_USERDEF() instead. */
|
||||||
HTSEXT_API void *htswrap_get_userdef(httrackp * opt);
|
HTSEXT_API void *htswrap_get_userdef(httrackp * opt);
|
||||||
|
|
||||||
/* Internal library allocators, if a different libc is being used by the client */
|
/* Internal library allocators, if a different libc is being used by the client */
|
||||||
|
/** strdup() through the library allocator. Returns a heap copy freed with
|
||||||
|
hts_free(), or NULL on failure. */
|
||||||
HTSEXT_API char *hts_strdup(const char *string);
|
HTSEXT_API char *hts_strdup(const char *string);
|
||||||
|
|
||||||
|
/** malloc() through the library allocator. Free with hts_free(). NULL on OOM.
|
||||||
|
*/
|
||||||
HTSEXT_API void *hts_malloc(size_t size);
|
HTSEXT_API void *hts_malloc(size_t size);
|
||||||
|
|
||||||
|
/** realloc() through the library allocator. NULL on failure, leaving the
|
||||||
|
original block unchanged. */
|
||||||
HTSEXT_API void *hts_realloc(void *const data, const size_t size);
|
HTSEXT_API void *hts_realloc(void *const data, const size_t size);
|
||||||
|
|
||||||
|
/** free() through the library allocator. NULL is accepted. */
|
||||||
HTSEXT_API void hts_free(void *data);
|
HTSEXT_API void hts_free(void *data);
|
||||||
|
|
||||||
/* Other functions */
|
/* Other functions */
|
||||||
HTSEXT_API int hts_resetvar(void); // DEPRECATED - DUMMY FUNCTION
|
HTSEXT_API int hts_resetvar(void); // DEPRECATED - DUMMY FUNCTION
|
||||||
|
|
||||||
|
/** (Re)build the top-level index.html aggregating every mirror project found
|
||||||
|
under @p path. @p binpath is the data root used to locate the
|
||||||
|
templates/topindex-*.html files, falling back to built-in templates. Writes
|
||||||
|
<path>/index.html. @return 1 on success, 0 on failure. */
|
||||||
HTSEXT_API int hts_buildtopindex(httrackp * opt, const char *path,
|
HTSEXT_API int hts_buildtopindex(httrackp * opt, const char *path,
|
||||||
const char *binpath);
|
const char *binpath);
|
||||||
|
|
||||||
|
/** Scan every mirror project under @p path and return a CRLF-separated list:
|
||||||
|
@p type==1 gives the distinct category names, any other value gives the
|
||||||
|
project directory names. The result is heap-allocated and owned by the
|
||||||
|
caller (free with freet()); it may be NULL. Not UTF-8. @p path is modified in
|
||||||
|
place (a trailing '/' is stripped). */
|
||||||
HTSEXT_API char *hts_getcategories(char *path, int type);
|
HTSEXT_API char *hts_getcategories(char *path, int type);
|
||||||
|
|
||||||
|
/** Read the `category=` value from a winprofile.ini file. The result is
|
||||||
|
heap-allocated and owned by the caller (free with freet()), or NULL when the
|
||||||
|
file is missing or has no category line. Not UTF-8. */
|
||||||
HTSEXT_API char *hts_getcategory(const char *filename);
|
HTSEXT_API char *hts_getcategory(const char *filename);
|
||||||
|
|
||||||
/* Catch-URL */
|
/* Catch-URL */
|
||||||
|
/** Open a local capture socket (a mini-proxy), trying a list of standard ports
|
||||||
|
until one binds. Writes the chosen port to *port_prox and the local host
|
||||||
|
address into adr_prox (a caller buffer of at least 128 bytes), and returns
|
||||||
|
the listening socket. Returns INVALID_SOCKET if no port could be bound. */
|
||||||
HTSEXT_API T_SOC catch_url_init_std(int *port_prox, char *adr_prox);
|
HTSEXT_API T_SOC catch_url_init_std(int *port_prox, char *adr_prox);
|
||||||
|
|
||||||
|
/** Open a local capture socket bound to *port (0 picks a free port). Writes the
|
||||||
|
effective port back to *port and the local dotted address into @p adr (a
|
||||||
|
caller buffer of at least 128 bytes), and returns the listening socket.
|
||||||
|
Returns INVALID_SOCKET on failure. */
|
||||||
HTSEXT_API T_SOC catch_url_init(int *port, char *adr);
|
HTSEXT_API T_SOC catch_url_init(int *port, char *adr);
|
||||||
|
|
||||||
|
/** Block on capture socket @p soc, accept one browser connection, and capture
|
||||||
|
the proxied HTTP request: write the absolute URL to @p url, the upper-cased
|
||||||
|
method to @p method, and the rebuilt request (request line, headers, and any
|
||||||
|
POST body) to @p data, then send a canned response and close.
|
||||||
|
@return 1 on success, 0 on error; on error @p url instead holds the peer's
|
||||||
|
"ip:port". The buffers are caller-allocated and not bounds-checked: @p data
|
||||||
|
must be CATCH_URL_DATA_SIZE bytes, and @p url / @p method must fit the
|
||||||
|
captured request line. */
|
||||||
HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data);
|
HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data);
|
||||||
|
|
||||||
/* State */
|
/* State */
|
||||||
|
/** Whether the engine is parsing HTML. Returns 0 if not, otherwise the percent
|
||||||
|
done (at least 1). @p flag >= 0 also requests a progress refresh; pass a
|
||||||
|
negative value to query without side effects. */
|
||||||
HTSEXT_API int hts_is_parsing(httrackp * opt, int flag);
|
HTSEXT_API int hts_is_parsing(httrackp * opt, int flag);
|
||||||
|
|
||||||
|
/** Current background phase: 0 none, 1 testing links, 2 purge, 3, 4 scheduling,
|
||||||
|
5 waiting for a slot. */
|
||||||
HTSEXT_API int hts_is_testing(httrackp * opt);
|
HTSEXT_API int hts_is_testing(httrackp * opt);
|
||||||
|
|
||||||
|
/** Nonzero once the engine has begun its exit sequence. */
|
||||||
HTSEXT_API int hts_is_exiting(httrackp * opt);
|
HTSEXT_API int hts_is_exiting(httrackp * opt);
|
||||||
|
|
||||||
/*HTSEXT_API int hts_setopt(httrackp* opt); DEPRECATED ; see copy_htsopt() */
|
/*HTSEXT_API int hts_setopt(httrackp* opt); DEPRECATED ; see copy_htsopt() */
|
||||||
|
|
||||||
|
/** Queue extra start URLs to inject into a running mirror. @p url is a
|
||||||
|
caller-owned, NULL-terminated array of strings; the engine stores the
|
||||||
|
pointer without copying, so the array and its strings must stay valid until
|
||||||
|
the engine consumes them. @return nonzero if a list is now set. */
|
||||||
HTSEXT_API int hts_addurl(httrackp * opt, char **url);
|
HTSEXT_API int hts_addurl(httrackp * opt, char **url);
|
||||||
|
|
||||||
|
/** Clear any pending add-URL list set by hts_addurl(). Always returns 0. */
|
||||||
HTSEXT_API int hts_resetaddurl(httrackp * opt);
|
HTSEXT_API int hts_resetaddurl(httrackp * opt);
|
||||||
|
|
||||||
|
/** Apply the runtime-tunable options from @p from onto @p to, to adjust a live
|
||||||
|
mirror. Only fields set to a non-sentinel value are copied; the rest of @p
|
||||||
|
to is left untouched. The user-agent string is deep-copied. @return 0. */
|
||||||
HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to);
|
HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to);
|
||||||
|
|
||||||
|
/** Return the engine's last error message, or NULL. The string is owned by
|
||||||
|
@p opt; do not free it, and use it only while @p opt lives. */
|
||||||
HTSEXT_API char *hts_errmsg(httrackp * opt);
|
HTSEXT_API char *hts_errmsg(httrackp * opt);
|
||||||
|
|
||||||
|
/** Get or set the transfer-pause flag. @p p >= 0 sets it (nonzero means
|
||||||
|
paused); a negative value queries. @return the current pause flag. */
|
||||||
HTSEXT_API int hts_setpause(httrackp * opt, int);
|
HTSEXT_API int hts_setpause(httrackp * opt, int);
|
||||||
|
|
||||||
|
/** Ask the running mirror to terminate (sets the stop flag under the state
|
||||||
|
lock, so it is safe to call from another thread). @p force is currently
|
||||||
|
ignored.
|
||||||
|
@return 0; no-op if @p opt is NULL. */
|
||||||
HTSEXT_API int hts_request_stop(httrackp * opt, int force);
|
HTSEXT_API int hts_request_stop(httrackp * opt, int force);
|
||||||
|
|
||||||
|
/** Queue a single in-progress file, by URL, to be cancelled by the engine.
|
||||||
|
@p url is copied internally. Takes the state lock, so it is thread-safe.
|
||||||
|
@return the underlying push result. */
|
||||||
HTSEXT_API int hts_cancel_file_push(httrackp * opt, const char *url);
|
HTSEXT_API int hts_cancel_file_push(httrackp * opt, const char *url);
|
||||||
|
|
||||||
|
/** Cancel the in-progress link-testing phase. Effective only while a test runs.
|
||||||
|
*/
|
||||||
HTSEXT_API void hts_cancel_test(httrackp * opt);
|
HTSEXT_API void hts_cancel_test(httrackp * opt);
|
||||||
|
|
||||||
|
/** Cancel the in-progress HTML parsing. Effective only while parsing is active.
|
||||||
|
*/
|
||||||
HTSEXT_API void hts_cancel_parsing(httrackp * opt);
|
HTSEXT_API void hts_cancel_parsing(httrackp * opt);
|
||||||
HTSEXT_API void hts_cancel_test(httrackp * opt);
|
|
||||||
HTSEXT_API void hts_cancel_parsing(httrackp * opt);
|
/** Nonzero once the mirror has fully ended. Read under the engine state lock,
|
||||||
|
so safe to poll from another thread. Wait for this before hts_free_opt(). */
|
||||||
HTSEXT_API int hts_has_stopped(httrackp * opt);
|
HTSEXT_API int hts_has_stopped(httrackp * opt);
|
||||||
|
|
||||||
/* Tools */
|
/* Tools */
|
||||||
|
/** Ensure the directory chain leading to @p path exists, creating missing
|
||||||
|
directories. @p path ends either with '/' (a directory) or a filename (its
|
||||||
|
basename is ignored). A regular file blocking a needed directory is renamed
|
||||||
|
to "<name>.txt". @p path is NOT UTF-8. @return 0 on success or if it already
|
||||||
|
exists, -1 on error. */
|
||||||
HTSEXT_API int structcheck(const char *path);
|
HTSEXT_API int structcheck(const char *path);
|
||||||
|
|
||||||
|
/** Like structcheck() but @p path is UTF-8. @return 0 on success, -1 on error.
|
||||||
|
*/
|
||||||
HTSEXT_API int structcheck_utf8(const char *path);
|
HTSEXT_API int structcheck_utf8(const char *path);
|
||||||
|
|
||||||
|
/** Whether the directory containing @p path exists. The basename is stripped
|
||||||
|
first, so passing a file path tests its parent directory. @return 1 if it is
|
||||||
|
a directory, 0 otherwise. */
|
||||||
HTSEXT_API int dir_exists(const char *path);
|
HTSEXT_API int dir_exists(const char *path);
|
||||||
|
|
||||||
|
/** Write the HTTP reason phrase for @p statuscode into @p msg, a caller buffer
|
||||||
|
of at least 64 bytes. For an unknown code a non-empty @p msg is kept,
|
||||||
|
otherwise it is set to "Unknown error". */
|
||||||
HTSEXT_API void infostatuscode(char *msg, int statuscode);
|
HTSEXT_API void infostatuscode(char *msg, int statuscode);
|
||||||
|
|
||||||
|
/** Return the static reason-phrase string for @p statuscode, or NULL if
|
||||||
|
unknown. The pointer is a string literal; do not free it. */
|
||||||
HTSEXT_API const char *infostatuscode_const(int statuscode);
|
HTSEXT_API const char *infostatuscode_const(int statuscode);
|
||||||
|
|
||||||
|
/** Current wall-clock time in milliseconds since the Unix epoch. */
|
||||||
HTSEXT_API TStamp mtime_local(void);
|
HTSEXT_API TStamp mtime_local(void);
|
||||||
|
|
||||||
|
/** Format a duration @p t (in seconds) into a compact string in @p st, for
|
||||||
|
example "3d,02h,04min05s". @p st is caller-allocated and not bounds-checked.
|
||||||
|
*/
|
||||||
HTSEXT_API void qsec2str(char *st, TStamp t);
|
HTSEXT_API void qsec2str(char *st, TStamp t);
|
||||||
|
|
||||||
|
/* The int2* helpers below write into the caller-supplied strc and return
|
||||||
|
pointers into it. No allocation happens; the result is valid only until strc
|
||||||
|
is reused, and a given strc is not reentrant. Use one strc per
|
||||||
|
concurrently-live result. */
|
||||||
|
/** Format @p n as a decimal string into @p strc and return it. */
|
||||||
HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n);
|
HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n);
|
||||||
|
|
||||||
|
/** Format byte count @p n as "<num><unit>" (B/KiB/MiB/GiB and so on) into
|
||||||
|
@p strc and return it. */
|
||||||
HTSEXT_API char *int2bytes(strc_int2bytes2 * strc, LLint n);
|
HTSEXT_API char *int2bytes(strc_int2bytes2 * strc, LLint n);
|
||||||
|
|
||||||
|
/** Format a transfer rate @p n as "<num><unit>/s" into @p strc and return it.
|
||||||
|
*/
|
||||||
HTSEXT_API char *int2bytessec(strc_int2bytes2 * strc, long int n);
|
HTSEXT_API char *int2bytessec(strc_int2bytes2 * strc, long int n);
|
||||||
|
|
||||||
|
/** Split byte count @p n into number and unit, returning a 2-element array
|
||||||
|
{number, unit} stored inside @p strc. */
|
||||||
HTSEXT_API char **int2bytes2(strc_int2bytes2 * strc, LLint n);
|
HTSEXT_API char **int2bytes2(strc_int2bytes2 * strc, LLint n);
|
||||||
|
|
||||||
|
/** Skip any "user[:pass]@" identification prefix in a URL, returning a pointer
|
||||||
|
into the argument past it (or past the protocol if none). The result aliases
|
||||||
|
the input string. */
|
||||||
HTSEXT_API char *jump_identification(char *);
|
HTSEXT_API char *jump_identification(char *);
|
||||||
|
|
||||||
HTSEXT_API const char *jump_identification_const(const char *);
|
HTSEXT_API const char *jump_identification_const(const char *);
|
||||||
|
|
||||||
|
/** Like jump_identification() and also strip a leading "www." host prefix,
|
||||||
|
returning a pointer into the input to the normalized host. */
|
||||||
HTSEXT_API char *jump_normalized(char *);
|
HTSEXT_API char *jump_normalized(char *);
|
||||||
|
|
||||||
HTSEXT_API const char *jump_normalized_const(const char *);
|
HTSEXT_API const char *jump_normalized_const(const char *);
|
||||||
|
|
||||||
|
/** Return a pointer (into the input) to the ":port" part of a URL host, or NULL
|
||||||
|
if there is no explicit port. Handles bracketed IPv6 literals. */
|
||||||
HTSEXT_API char *jump_toport(char *);
|
HTSEXT_API char *jump_toport(char *);
|
||||||
|
|
||||||
HTSEXT_API const char *jump_toport_const(const char *);
|
HTSEXT_API const char *jump_toport_const(const char *);
|
||||||
|
|
||||||
|
/** Canonicalize a URL path into @p dest: collapse duplicate '/' and sort the
|
||||||
|
query-string arguments, so "?b=2&a=1" and "?a=1&b=2" compare equal. Returns
|
||||||
|
@p dest, a caller buffer of at least strlen(source)+1 bytes (the output is
|
||||||
|
never longer than the input). */
|
||||||
HTSEXT_API char *fil_normalized(const char *source, char *dest);
|
HTSEXT_API char *fil_normalized(const char *source, char *dest);
|
||||||
|
|
||||||
|
/** Write the normalized host of @p source (identification and "www." stripped)
|
||||||
|
into @p dest, truncated to @p destsize. Returns @p dest. */
|
||||||
HTSEXT_API char *adr_normalized_sized(const char *source, char *dest,
|
HTSEXT_API char *adr_normalized_sized(const char *source, char *dest,
|
||||||
size_t destsize);
|
size_t destsize);
|
||||||
|
|
||||||
|
/** @deprecated Use adr_normalized_sized(). This form has no destination size
|
||||||
|
and assumes @p dest is the engine URL buffer of HTS_URLMAXSIZE*2 bytes; a
|
||||||
|
smaller buffer can overflow. */
|
||||||
HTS_DEPRECATED("use adr_normalized_sized(source, dest, destsize)")
|
HTS_DEPRECATED("use adr_normalized_sized(source, dest, destsize)")
|
||||||
|
|
||||||
HTSEXT_API char *adr_normalized(const char *source, char *dest);
|
HTSEXT_API char *adr_normalized(const char *source, char *dest);
|
||||||
|
|
||||||
|
/** Get or set the process executable root directory (with trailing '/'). The
|
||||||
|
first call with non-NULL @p file initializes it and returns NULL; later
|
||||||
|
initialization calls are ignored. Call with NULL to query: returns the
|
||||||
|
stored directory, or "" if never set. The result is a static internal buffer;
|
||||||
|
do not free it, and do not set it from multiple threads. */
|
||||||
HTSEXT_API const char *hts_rootdir(char *file);
|
HTSEXT_API const char *hts_rootdir(char *file);
|
||||||
|
|
||||||
/* Escaping URLs */
|
/* Escaping URLs */
|
||||||
|
/*
|
||||||
|
* Size contract shared by the escape/unescape family below.
|
||||||
|
* For the escape_* / append_escape_* / inplace_escape_* /
|
||||||
|
* escape_for_html_print* / make_content_id / x_escape_http functions, `size` is
|
||||||
|
* the total capacity of `dest` including the terminating NUL. The size_t return
|
||||||
|
* is the number of bytes written, NOT counting the NUL; on overflow it returns
|
||||||
|
* `size` and `dest` is still NUL-terminated (truncated). Passing sizeof(a
|
||||||
|
* pointer) as the size trips a runtime assert. The unescape_http* functions
|
||||||
|
* instead return `dest` (the catbuff pointer) and truncate to fit `size`.
|
||||||
|
*/
|
||||||
|
/** Decode HTML entities in @p s in place (for example "&" becomes "&"). */
|
||||||
HTSEXT_API void unescape_amp(char *s);
|
HTSEXT_API void unescape_amp(char *s);
|
||||||
|
|
||||||
|
/** Percent-escape only spaces (' ' becomes "%20"); copy everything else
|
||||||
|
* verbatim. */
|
||||||
HTSEXT_API size_t escape_spc_url(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_spc_url(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Aggressively percent-escape @p src for use as a single URL path segment
|
||||||
|
(reserved, delimiter, unwise, special, avoid and mark characters). */
|
||||||
HTSEXT_API size_t escape_in_url(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_in_url(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Percent-escape @p src as a URI, escaping only what is necessary and keeping
|
||||||
|
'/' and other reserved characters. */
|
||||||
HTSEXT_API size_t escape_uri(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_uri(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Like escape_uri() for a UTF-8 URI: also escapes reserved characters other
|
||||||
|
than '/'. */
|
||||||
HTSEXT_API size_t escape_uri_utf(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_uri_utf(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Minimal "make safe" escape: percent-escapes only '"', ' ' and control
|
||||||
|
characters, leaving an already-formed URL otherwise intact. */
|
||||||
HTSEXT_API size_t escape_check_url(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_check_url(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Append-variant of escape_spc_url(): escapes @p src after the existing
|
||||||
|
NUL-terminated content of @p dest. Returns the bytes appended (excluding the
|
||||||
|
NUL). */
|
||||||
HTSEXT_API size_t append_escape_spc_url(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t append_escape_spc_url(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Append-variant of escape_in_url(). See append_escape_spc_url(). */
|
||||||
HTSEXT_API size_t append_escape_in_url(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t append_escape_in_url(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Append-variant of escape_uri(). See append_escape_spc_url(). */
|
||||||
HTSEXT_API size_t append_escape_uri(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t append_escape_uri(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Append-variant of escape_uri_utf(). See append_escape_spc_url(). */
|
||||||
HTSEXT_API size_t append_escape_uri_utf(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t append_escape_uri_utf(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Append-variant of escape_check_url(). See append_escape_spc_url(). */
|
||||||
HTSEXT_API size_t append_escape_check_url(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API size_t append_escape_check_url(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** In-place variant of escape_spc_url(): escapes the NUL-terminated string in
|
||||||
|
@p dest back into @p dest. */
|
||||||
HTSEXT_API size_t inplace_escape_spc_url(char *const dest, const size_t size);
|
HTSEXT_API size_t inplace_escape_spc_url(char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** In-place variant of escape_in_url(). See inplace_escape_spc_url(). */
|
||||||
HTSEXT_API size_t inplace_escape_in_url(char *const dest, const size_t size);
|
HTSEXT_API size_t inplace_escape_in_url(char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** In-place variant of escape_uri(). See inplace_escape_spc_url(). */
|
||||||
HTSEXT_API size_t inplace_escape_uri(char *const dest, const size_t size);
|
HTSEXT_API size_t inplace_escape_uri(char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** In-place variant of escape_uri_utf(). See inplace_escape_spc_url(). */
|
||||||
HTSEXT_API size_t inplace_escape_uri_utf(char *const dest, const size_t size);
|
HTSEXT_API size_t inplace_escape_uri_utf(char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** In-place variant of escape_check_url(). See inplace_escape_spc_url(). */
|
||||||
HTSEXT_API size_t inplace_escape_check_url(char *const dest, const size_t size);
|
HTSEXT_API size_t inplace_escape_check_url(char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Same escaping as escape_check_url() but returns @p dest instead of the byte
|
||||||
|
count. */
|
||||||
HTSEXT_API char *escape_check_url_addr(const char *const src, char *const dest, const size_t size);
|
HTSEXT_API char *escape_check_url_addr(const char *const src, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Build a MIME/MHTML content-id token in @p dest from @p adr and @p fil:
|
||||||
|
escape_in_url() both, then replace every '%' with 'X' so the result is one
|
||||||
|
opaque token. */
|
||||||
HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil, char *const dest, const size_t size);
|
HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Low-level percent-escaper backing the escape_* family. @p mode selects the
|
||||||
|
character class to escape: 0 check_url, 1 in_url, 2 spc_url, 3 uri,
|
||||||
|
30 uri_utf. @p max_size is the dest capacity including the NUL. */
|
||||||
HTSEXT_API size_t x_escape_http(const char *const s, char *const dest, const size_t max_size, const int mode);
|
HTSEXT_API size_t x_escape_http(const char *const s, char *const dest, const size_t max_size, const int mode);
|
||||||
|
|
||||||
|
/** Strip all control characters (byte value < 32) from @p s in place. */
|
||||||
HTSEXT_API void escape_remove_control(char *const s);
|
HTSEXT_API void escape_remove_control(char *const s);
|
||||||
|
|
||||||
|
/** HTML-escape for text output: rewrite '&' to "&" and pass every other
|
||||||
|
byte through unchanged. */
|
||||||
HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Like escape_for_html_print() but also convert every high byte (>= 128) to a
|
||||||
|
numeric entity "&#xNN;". */
|
||||||
HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const dest, const size_t size);
|
HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const dest, const size_t size);
|
||||||
|
|
||||||
|
/** Percent-decode @p s into @p catbuff (capacity @p size) and return @p
|
||||||
|
catbuff. Decodes every "%xx" hex escape. */
|
||||||
HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const char *const s);
|
HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const char *const s);
|
||||||
|
|
||||||
|
/** Percent-decode @p s into @p catbuff, but only the escapes that are safe to
|
||||||
|
decode while keeping a valid URI (reserved, delimiter, unwise, control and
|
||||||
|
must-avoid escapes are kept encoded, and %25 is never decoded). @p no_high &
|
||||||
|
1 also decodes high (>= 128) bytes; @p no_high & 2 also decodes an escaped
|
||||||
|
space. Returns @p catbuff. */
|
||||||
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size, const char *s, const int no_high);
|
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size, const char *s, const int no_high);
|
||||||
|
|
||||||
|
/** @warning No implementation is linked into the library; calling this fails to
|
||||||
|
link. */
|
||||||
HTSEXT_API char *antislash_unescaped(char *catbuff, const char *s);
|
HTSEXT_API char *antislash_unescaped(char *catbuff, const char *s);
|
||||||
|
|
||||||
HTSEXT_API void escape_remove_control(char *s);
|
HTSEXT_API void escape_remove_control(char *s);
|
||||||
|
|
||||||
|
/** Determine the MIME type of local file name @p fil into @p s (capacity
|
||||||
|
@p ssize): user --assume rules, then ".html", then the built-in extension
|
||||||
|
table. @p flag != 0 forces a fallback type. @return 1 if a type was written,
|
||||||
|
0 otherwise. */
|
||||||
HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
HTSEXT_API int get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||||
const char *fil, int flag);
|
const char *fil, int flag);
|
||||||
|
|
||||||
|
/** @deprecated Use get_httptype_sized(). Assumes @p s has at least
|
||||||
|
HTS_MIMETYPE_SIZE capacity. */
|
||||||
HTS_DEPRECATED("use get_httptype_sized(opt, s, ssize, fil, flag)")
|
HTS_DEPRECATED("use get_httptype_sized(opt, s, ssize, fil, flag)")
|
||||||
|
|
||||||
HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil,
|
HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil,
|
||||||
int flag);
|
int flag);
|
||||||
|
|
||||||
|
/** Classify @p fil by its extension: 0 unknown, 1 known non-HTML, 2 known HTML.
|
||||||
|
Consults the built-in table then user --assume rules. 0 for a NULL @p fil.
|
||||||
|
*/
|
||||||
HTSEXT_API int is_knowntype(httrackp * opt, const char *fil);
|
HTSEXT_API int is_knowntype(httrackp * opt, const char *fil);
|
||||||
|
|
||||||
|
/** Like is_knowntype() but consults only the user --assume rules: 0 no rule,
|
||||||
|
1 non-HTML, 2 HTML. */
|
||||||
HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil);
|
HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil);
|
||||||
|
|
||||||
|
/** 1 if @p fil, an extension such as "asp" or "php" (not a full filename), is a
|
||||||
|
known dynamic-page type, else 0. */
|
||||||
HTSEXT_API int is_dyntype(const char *fil);
|
HTSEXT_API int is_dyntype(const char *fil);
|
||||||
|
|
||||||
|
/** Extract the extension of @p fil (text after the last '.', stopping at '?')
|
||||||
|
into caller scratch @p catbuff (capacity @p size) and return it. Returns ""
|
||||||
|
(a literal, not @p catbuff) when there is no extension or it does not fit.
|
||||||
|
*/
|
||||||
HTSEXT_API const char *get_ext(char *catbuff, size_t size, const char *fil);
|
HTSEXT_API const char *get_ext(char *catbuff, size_t size, const char *fil);
|
||||||
|
|
||||||
|
/** 1 if MIME type @p st must not be reclassified or renamed (hypertext types
|
||||||
|
and a built-in keep-list of commonly mislabeled types), else 0. */
|
||||||
HTSEXT_API int may_unknown(httrackp * opt, const char *st);
|
HTSEXT_API int may_unknown(httrackp * opt, const char *st);
|
||||||
|
|
||||||
|
/** Guess the MIME type of local file @p fil into @p s (capacity @p ssize),
|
||||||
|
always producing a type. @return 1 if a type was written. */
|
||||||
HTSEXT_API int guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
HTSEXT_API int guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
||||||
const char *fil);
|
const char *fil);
|
||||||
|
|
||||||
|
/** @deprecated Use guess_httptype_sized(). Assumes @p s has at least
|
||||||
|
HTS_MIMETYPE_SIZE capacity. */
|
||||||
HTS_DEPRECATED("use guess_httptype_sized(opt, s, ssize, fil)")
|
HTS_DEPRECATED("use guess_httptype_sized(opt, s, ssize, fil)")
|
||||||
|
|
||||||
HTSEXT_API void guess_httptype(httrackp * opt, char *s, const char *fil);
|
HTSEXT_API void guess_httptype(httrackp * opt, char *s, const char *fil);
|
||||||
|
|
||||||
/* Ugly string tools */
|
/* Ugly string tools */
|
||||||
|
/* These take a caller scratch buffer catbuff of capacity size and return it. On
|
||||||
|
overflow they stop without writing past size and return the truncated buffer.
|
||||||
|
size must be a real array sizeof (the macros below check this at compile
|
||||||
|
time), not a pointer. */
|
||||||
|
/** Concatenate @p a and @p b into @p catbuff (NULL or empty operands are
|
||||||
|
* skipped). */
|
||||||
HTSEXT_API char *concat(char *catbuff, size_t size, const char *a, const char *b);
|
HTSEXT_API char *concat(char *catbuff, size_t size, const char *a, const char *b);
|
||||||
|
|
||||||
|
/** Like concat(a, b) but convert '/' to the platform path separator (Windows).
|
||||||
|
*/
|
||||||
HTSEXT_API char *fconcat(char *catbuff, size_t size, const char *a, const char *b);
|
HTSEXT_API char *fconcat(char *catbuff, size_t size, const char *a, const char *b);
|
||||||
|
|
||||||
|
/** Copy @p a into @p catbuff, converting '/' to the platform path separator
|
||||||
|
(Windows). */
|
||||||
HTSEXT_API char *fconv(char *catbuff, size_t size, const char *a);
|
HTSEXT_API char *fconv(char *catbuff, size_t size, const char *a);
|
||||||
|
|
||||||
|
/** Copy @p a into @p catbuff, converting every '\\' to '/' on all platforms. */
|
||||||
HTSEXT_API char *fslash(char *catbuff, size_t size, const char *a);
|
HTSEXT_API char *fslash(char *catbuff, size_t size, const char *a);
|
||||||
|
|
||||||
/* Debugging */
|
/* Debugging */
|
||||||
|
/** Set the process-global debug verbosity (0 is off); higher levels log more to
|
||||||
|
stderr. Bit 0x80 redirects debug output to "hts-debug.txt". */
|
||||||
HTSEXT_API void hts_debug(int level);
|
HTSEXT_API void hts_debug(int level);
|
||||||
|
|
||||||
/* Portable directory API */
|
/* Portable directory API */
|
||||||
@@ -276,6 +668,7 @@ HTSEXT_API void hts_debug(int level);
|
|||||||
#ifndef HTS_DEF_FWSTRUCT_find_handle_struct
|
#ifndef HTS_DEF_FWSTRUCT_find_handle_struct
|
||||||
#define HTS_DEF_FWSTRUCT_find_handle_struct
|
#define HTS_DEF_FWSTRUCT_find_handle_struct
|
||||||
typedef struct find_handle_struct find_handle_struct;
|
typedef struct find_handle_struct find_handle_struct;
|
||||||
|
|
||||||
typedef find_handle_struct *find_handle;
|
typedef find_handle_struct *find_handle;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -283,22 +676,53 @@ typedef find_handle_struct *find_handle;
|
|||||||
#define HTS_DEF_FWSTRUCT_topindex_chain
|
#define HTS_DEF_FWSTRUCT_topindex_chain
|
||||||
typedef struct topindex_chain topindex_chain;
|
typedef struct topindex_chain topindex_chain;
|
||||||
#endif
|
#endif
|
||||||
|
/** One node of the index/category listing built when generating the top index.
|
||||||
|
*/
|
||||||
struct topindex_chain {
|
struct topindex_chain {
|
||||||
int level; /* sort level */
|
int level; /**< sort level */
|
||||||
char *category; /* category */
|
char *category; /**< category (heap string) */
|
||||||
char name[2048]; /* path */
|
char name[2048]; /**< path */
|
||||||
struct topindex_chain *next; /* next element */
|
struct topindex_chain *next; /**< next element */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Open directory @p path for iteration, positioned on the first entry. Returns
|
||||||
|
an opaque handle to free with hts_findclose(), or NULL on empty path or open
|
||||||
|
failure. */
|
||||||
HTSEXT_API find_handle hts_findfirst(char *path);
|
HTSEXT_API find_handle hts_findfirst(char *path);
|
||||||
|
|
||||||
|
/** Advance to the next directory entry. Returns 1 if an entry is available, 0
|
||||||
|
at end of directory. */
|
||||||
HTSEXT_API int hts_findnext(find_handle find);
|
HTSEXT_API int hts_findnext(find_handle find);
|
||||||
|
|
||||||
|
/** Close the iteration and free @p find. Always returns 0; NULL is accepted. */
|
||||||
HTSEXT_API int hts_findclose(find_handle find);
|
HTSEXT_API int hts_findclose(find_handle find);
|
||||||
|
|
||||||
|
/** Name of the current entry, or NULL. Points into the handle's storage; valid
|
||||||
|
only until the next hts_findnext()/hts_findclose(). */
|
||||||
HTSEXT_API char *hts_findgetname(find_handle find);
|
HTSEXT_API char *hts_findgetname(find_handle find);
|
||||||
|
|
||||||
|
/** Size in bytes of the current entry, or -1. Truncated to int, so unreliable
|
||||||
|
for files larger than 2 GB. */
|
||||||
HTSEXT_API int hts_findgetsize(find_handle find);
|
HTSEXT_API int hts_findgetsize(find_handle find);
|
||||||
|
|
||||||
|
/** 1 if the current entry is a directory, else 0 (a system/special entry, see
|
||||||
|
hts_findissystem(), reports 0). */
|
||||||
HTSEXT_API int hts_findisdir(find_handle find);
|
HTSEXT_API int hts_findisdir(find_handle find);
|
||||||
|
|
||||||
|
/** 1 if the current entry is a regular file, else 0 (a system/special entry,
|
||||||
|
see hts_findissystem(), reports 0). */
|
||||||
HTSEXT_API int hts_findisfile(find_handle find);
|
HTSEXT_API int hts_findisfile(find_handle find);
|
||||||
|
|
||||||
|
/** 1 if the current entry is a special/system entry to skip: "." or "..", on
|
||||||
|
POSIX also device/fifo/socket nodes, on Windows also system, hidden or
|
||||||
|
temporary entries. Else 0. */
|
||||||
HTSEXT_API int hts_findissystem(find_handle find);
|
HTSEXT_API int hts_findissystem(find_handle find);
|
||||||
|
|
||||||
/* UTF-8 aware FILE API */
|
/* UTF-8 aware FILE API */
|
||||||
|
/* On non-Windows these macros resolve directly to the POSIX calls. On Windows
|
||||||
|
they map to the hts_*_utf8 wrappers below, which convert the UTF-8 path to
|
||||||
|
UTF-16 and call the wide CRT, falling back to the narrow CRT if conversion
|
||||||
|
fails. Always pass UTF-8 paths through these. */
|
||||||
#ifndef HTS_DEF_FILEAPI
|
#ifndef HTS_DEF_FILEAPI
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define FOPEN hts_fopen_utf8
|
#define FOPEN hts_fopen_utf8
|
||||||
@@ -306,6 +730,7 @@ HTSEXT_API FILE *hts_fopen_utf8(const char *path, const char *mode);
|
|||||||
|
|
||||||
#define STAT hts_stat_utf8
|
#define STAT hts_stat_utf8
|
||||||
typedef struct _stat STRUCT_STAT;
|
typedef struct _stat STRUCT_STAT;
|
||||||
|
|
||||||
HTSEXT_API int hts_stat_utf8(const char *path, STRUCT_STAT * buf);
|
HTSEXT_API int hts_stat_utf8(const char *path, STRUCT_STAT * buf);
|
||||||
|
|
||||||
#define UNLINK hts_unlink_utf8
|
#define UNLINK hts_unlink_utf8
|
||||||
@@ -315,10 +740,13 @@ HTSEXT_API int hts_unlink_utf8(const char *pathname);
|
|||||||
HTSEXT_API int hts_rename_utf8(const char *oldpath, const char *newpath);
|
HTSEXT_API int hts_rename_utf8(const char *oldpath, const char *newpath);
|
||||||
|
|
||||||
#define MKDIR(F) hts_mkdir_utf8(F)
|
#define MKDIR(F) hts_mkdir_utf8(F)
|
||||||
|
|
||||||
HTSEXT_API int hts_mkdir_utf8(const char *pathname);
|
HTSEXT_API int hts_mkdir_utf8(const char *pathname);
|
||||||
|
|
||||||
#define UTIME(A,B) hts_utime_utf8(A,B)
|
#define UTIME(A,B) hts_utime_utf8(A,B)
|
||||||
|
|
||||||
typedef struct _utimbuf STRUCT_UTIMBUF;
|
typedef struct _utimbuf STRUCT_UTIMBUF;
|
||||||
|
|
||||||
HTSEXT_API int hts_utime_utf8(const char *filename,
|
HTSEXT_API int hts_utime_utf8(const char *filename,
|
||||||
const STRUCT_UTIMBUF * times);
|
const STRUCT_UTIMBUF * times);
|
||||||
#else
|
#else
|
||||||
@@ -329,6 +757,7 @@ typedef struct stat STRUCT_STAT;
|
|||||||
#define UNLINK unlink
|
#define UNLINK unlink
|
||||||
#define RENAME rename
|
#define RENAME rename
|
||||||
#define MKDIR(F) mkdir(F, HTS_ACCESS_FOLDER)
|
#define MKDIR(F) mkdir(F, HTS_ACCESS_FOLDER)
|
||||||
|
|
||||||
typedef struct utimbuf STRUCT_UTIMBUF;
|
typedef struct utimbuf STRUCT_UTIMBUF;
|
||||||
|
|
||||||
#define UTIME(A,B) utime(A,B)
|
#define UTIME(A,B) utime(A,B)
|
||||||
@@ -336,19 +765,22 @@ typedef struct utimbuf STRUCT_UTIMBUF;
|
|||||||
#define HTS_DEF_FILEAPI
|
#define HTS_DEF_FILEAPI
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Macro aimed to break at build-time if a size is not a sizeof() strictly
|
/** Macro aimed to break at build-time if a size is not a sizeof() strictly
|
||||||
* greater than sizeof(char*). **/
|
* greater than sizeof(char*). **/
|
||||||
#undef COMPILE_TIME_CHECK_SIZE
|
#undef COMPILE_TIME_CHECK_SIZE
|
||||||
#define COMPILE_TIME_CHECK_SIZE(A) (void) ((void (*)(char[A - sizeof(char*) - 1])) NULL)
|
#define COMPILE_TIME_CHECK_SIZE(A) (void) ((void (*)(char[A - sizeof(char*) - 1])) NULL)
|
||||||
|
|
||||||
/** Macro aimed to break at compile-time if a size is not a sizeof() strictly
|
/** Macro aimed to break at compile-time if a size is not a sizeof() strictly
|
||||||
* greater than sizeof(char*). **/
|
* greater than sizeof(char*). **/
|
||||||
#undef RUNTIME_TIME_CHECK_SIZE
|
#undef RUNTIME_TIME_CHECK_SIZE
|
||||||
#define RUNTIME_TIME_CHECK_SIZE(A) assertf((A) != sizeof(void*))
|
#define RUNTIME_TIME_CHECK_SIZE(A) assertf((A) != sizeof(void*))
|
||||||
|
|
||||||
#define fconv(A,B,C) (COMPILE_TIME_CHECK_SIZE(B), fconv(A,B,C))
|
#define fconv(A,B,C) (COMPILE_TIME_CHECK_SIZE(B), fconv(A,B,C))
|
||||||
|
|
||||||
#define concat(A,B,C,D) (COMPILE_TIME_CHECK_SIZE(B), concat(A,B,C,D))
|
#define concat(A,B,C,D) (COMPILE_TIME_CHECK_SIZE(B), concat(A,B,C,D))
|
||||||
|
|
||||||
#define fconcat(A,B,C,D) (COMPILE_TIME_CHECK_SIZE(B), fconcat(A,B,C,D))
|
#define fconcat(A,B,C,D) (COMPILE_TIME_CHECK_SIZE(B), fconcat(A,B,C,D))
|
||||||
|
|
||||||
#define fslash(A,B,C) (COMPILE_TIME_CHECK_SIZE(B), fslash(A,B,C))
|
#define fslash(A,B,C) (COMPILE_TIME_CHECK_SIZE(B), fslash(A,B,C))
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
Reference in New Issue
Block a user