Files
httrack/src/proxy/store.c
Fredrik Fornwall e751159a38 Fix Android build
The NDK headers nowaday has timezone in time.h, so trying to redefine it
causes the build to fail with:

proxy/store.c:34:18: error: static declaration of 'timezone' follows non-static declaration
static long int  timezone = 0;
                 ^
include/time.h:42:17: note: previous declaration is here
extern long int timezone;
2019-12-12 02:05:03 +01:00

2378 lines
75 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* ------------------------------------------------------------ */
/*
HTTrack Website Copier, Offline Browser for Windows and Unix
Copyright (C) 1998-2017 Xavier Roche and other contributors
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Please visit our Website: http://www.httrack.com
*/
/* Parts (inside ARC format routines) by Lars Clausen (lc@statsbiblioteket.dk) */
/* ------------------------------------------------------------ */
/* File: Cache manager for ProxyTrack */
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
/* Locking */
#ifdef _WIN32
#include <process.h> /* _beginthread, _endthread */
#else
#include <pthread.h>
#endif
#define HTSSAFE_ABORT_FUNCTION(A,B,C)
#include "htsglobal.h"
#define HTS_INTERNAL_BYTECODE
#include "coucal.h"
#include "htsmd5.h"
#undef HTS_INTERNAL_BYTECODE
#include "../minizip/mztools.h"
#include "../minizip/zip.h"
#include "htscore.h"
#include "htsback.h"
#include "store.h"
#include "proxystrings.h"
#include "proxytrack.h"
/* Unlocked functions */
static int PT_LookupCache__New_u(PT_Index index, const char *url);
static PT_Element PT_ReadCache__New_u(PT_Index index, const char *url,
int flags);
static int PT_LookupCache__Old_u(PT_Index index, const char *url);
static PT_Element PT_ReadCache__Old_u(PT_Index index, const char *url,
int flags);
static int PT_LookupCache__Arc_u(PT_Index index, const char *url);
static PT_Element PT_ReadCache__Arc_u(PT_Index index, const char *url,
int flags);
/* Locking */
#ifdef _WIN32
void MutexInit(PT_Mutex * pMutex) {
*pMutex = CreateMutex(NULL, FALSE, NULL);
}
void MutexLock(PT_Mutex * pMutex) {
WaitForSingleObject(*pMutex, INFINITE);
}
void MutexUnlock(PT_Mutex * pMutex) {
ReleaseMutex(*pMutex);
}
void MutexFree(PT_Mutex * pMutex) {
CloseHandle(*pMutex);
*pMutex = NULL;
}
#else
void MutexInit(PT_Mutex * pMutex) {
(void) pthread_mutex_init(pMutex, 0);
}
void MutexLock(PT_Mutex * pMutex) {
pthread_mutex_lock(pMutex);
}
void MutexUnlock(PT_Mutex * pMutex) {
pthread_mutex_unlock(pMutex);
}
void MutexFree(PT_Mutex * pMutex) {
pthread_mutex_destroy(pMutex);
}
#endif
/* Indexes */
typedef struct _PT_Index__New _PT_Index__New;
typedef struct _PT_Index__Old _PT_Index__Old;
typedef struct _PT_Index__Arc _PT_Index__Arc;
typedef struct _PT_Index_Functions _PT_Index_Functions;
typedef struct _PT_Index__New *PT_Index__New;
typedef struct _PT_Index__Old *PT_Index__Old;
typedef struct _PT_Index__Arc *PT_Index__Arc;
typedef struct _PT_Index_Functions *PT_Index_Functions;
enum {
PT_CACHE_UNDEFINED = -1,
PT_CACHE_MIN = 0,
PT_CACHE__NEW = PT_CACHE_MIN,
PT_CACHE__OLD,
PT_CACHE__ARC,
PT_CACHE_MAX = PT_CACHE__ARC
};
static int PT_LoadCache__New(PT_Index index, const char *filename);
static void PT_Index_Delete__New(PT_Index * pindex);
static PT_Element PT_ReadCache__New(PT_Index index, const char *url, int flags);
static int PT_LookupCache__New(PT_Index index, const char *url);
static int PT_SaveCache__New(PT_Indexes indexes, const char *filename);
/**/ static int PT_LoadCache__Old(PT_Index index, const char *filename);
static void PT_Index_Delete__Old(PT_Index * pindex);
static PT_Element PT_ReadCache__Old(PT_Index index, const char *url, int flags);
static int PT_LookupCache__Old(PT_Index index, const char *url);
/**/ static int PT_LoadCache__Arc(PT_Index index, const char *filename);
static void PT_Index_Delete__Arc(PT_Index * pindex);
static PT_Element PT_ReadCache__Arc(PT_Index index, const char *url, int flags);
static int PT_LookupCache__Arc(PT_Index index, const char *url);
static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename);
struct _PT_Index_Functions {
/* Mandatory services */
int (*PT_LoadCache) (PT_Index index, const char *filename);
void (*PT_Index_Delete) (PT_Index * pindex);
PT_Element(*PT_ReadCache) (PT_Index index, const char *url, int flags);
int (*PT_LookupCache) (PT_Index index, const char *url);
/* Optional services */
int (*PT_SaveCache) (PT_Indexes indexes, const char *filename);
};
static _PT_Index_Functions _IndexFuncts[] = {
{PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New,
PT_LookupCache__New, PT_SaveCache__New},
{PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old,
PT_LookupCache__Old, NULL},
{PT_LoadCache__Arc, PT_Index_Delete__Arc, PT_ReadCache__Arc,
PT_LookupCache__Arc, PT_SaveCache__Arc},
{NULL, NULL, NULL, NULL}
};
#define PT_INDEX_COMMON_STRUCTURE \
time_t timestamp; \
coucal hash; \
char startUrl[1024]
struct _PT_Index__New {
PT_INDEX_COMMON_STRUCTURE;
char path[1024]; /* either empty, or must include ending / */
int fixedPath;
int safeCache;
unzFile zFile;
PT_Mutex zFileLock;
};
struct _PT_Index__Old {
PT_INDEX_COMMON_STRUCTURE;
char filenameDat[1024];
char filenameNdx[1024];
FILE *dat, *ndx;
PT_Mutex fileLock;
int version;
char lastmodified[1024];
char path[1024]; /* either empty, or must include ending / */
int fixedPath;
int safeCache;
};
struct _PT_Index__Arc {
PT_INDEX_COMMON_STRUCTURE;
FILE *file;
PT_Mutex fileLock;
int version;
char lastmodified[1024];
char line[2048];
char filenameIndexBuff[2048];
};
struct _PT_Index {
int type;
union {
_PT_Index__New formatNew;
_PT_Index__Old formatOld;
_PT_Index__Arc formatArc;
struct {
PT_INDEX_COMMON_STRUCTURE;
} common;
} slots;
};
struct _PT_Indexes {
coucal cil;
struct _PT_Index **index;
int index_size;
};
struct _PT_CacheItem {
time_t lastUsed;
size_t size;
void *data;
};
struct _PT_Cache {
coucal index;
size_t maxSize;
size_t totalSize;
int count;
};
PT_Indexes PT_New(void) {
PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1);
index->cil = coucal_new(0);
coucal_set_name(index->cil, "index->cil");
index->index_size = 0;
index->index = NULL;
return index;
}
void PT_Delete(PT_Indexes index) {
if (index != NULL) {
coucal_delete(&index->cil);
free(index);
}
}
int PT_RemoveIndex(PT_Indexes index, int indexId) {
return 0;
}
static int binput(char *buff, char *s, int max) {
int count = 0;
int destCount = 0;
// Note: \0 will return 1
while(destCount < max && buff[count] != '\0' && buff[count] != '\n') {
if (buff[count] != '\r') {
s[destCount++] = buff[count];
}
count++;
}
s[destCount] = '\0';
// then return the supplemental jump offset
return count + 1;
}
static time_t file_timestamp(const char *file) {
struct stat buf;
if (stat(file, &buf) == 0) {
time_t tt = buf.st_mtime;
if (tt != (time_t) 0 && tt != (time_t) - 1) {
return tt;
}
}
return (time_t) 0;
}
static int PT_Index_Check__(PT_Index index, const char *file, int line) {
if (index == NULL)
return 0;
if (index->type >= PT_CACHE_MIN && index->type <= PT_CACHE_MAX)
return 1;
proxytrack_print_log(CRITICAL, "index corrupted in memory at %s:%d", file,
line);
return 0;
}
#define SAFE_INDEX(index) PT_Index_Check__(index, __FILE__, __LINE__)
/* ------------------------------------------------------------ */
/* Generic cache dispatch */
/* ------------------------------------------------------------ */
void PT_Index_Delete(PT_Index * pindex) {
if (pindex != NULL && (*pindex) != NULL) {
PT_Index index = *pindex;
if (SAFE_INDEX(index)) {
_IndexFuncts[index->type].PT_Index_Delete(pindex);
}
free(index);
*pindex = NULL;
}
}
static void PT_Index_Delete__New(PT_Index * pindex) {
if (pindex != NULL && (*pindex) != NULL) {
PT_Index__New index = &(*pindex)->slots.formatNew;
if (index->zFile != NULL) {
unzClose(index->zFile);
index->zFile = NULL;
}
if (index->hash != NULL) {
coucal_delete(&index->hash);
index->hash = NULL;
}
MutexFree(&index->zFileLock);
}
}
static void PT_Index_Delete__Old(PT_Index * pindex) {
if (pindex != NULL && (*pindex) != NULL) {
PT_Index__Old index = &(*pindex)->slots.formatOld;
if (index->dat != NULL) {
fclose(index->dat);
}
if (index->ndx != NULL) {
fclose(index->ndx);
}
if (index->hash != NULL) {
coucal_delete(&index->hash);
index->hash = NULL;
}
MutexFree(&index->fileLock);
}
}
static void PT_Index_Delete__Arc(PT_Index * pindex) {
if (pindex != NULL && (*pindex) != NULL) {
PT_Index__Arc index = &(*pindex)->slots.formatArc;
if (index->file != NULL) {
fclose(index->file);
}
MutexFree(&index->fileLock);
}
}
int PT_AddIndex(PT_Indexes indexes, const char *path) {
PT_Index index = PT_LoadCache(path);
if (index != NULL) {
int ret = PT_IndexMerge(indexes, &index);
if (index != NULL) {
PT_Index_Delete(&index);
}
return ret;
}
return -1;
}
PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) {
if (indexes != NULL) {
PT_Element elt = PT_ElementNew();
int i;
String html = STRING_EMPTY;
StringClear(html);
StringCat(html,
"<html>" PROXYTRACK_COMMENT_HEADER
DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES "<head>\r\n"
"<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>"
"</head>\r\n" "<body>\r\n"
"<h3>Available sites in this cache:</h3><br />" "<br />");
StringCat(html, "<ul>\r\n");
for(i = 0; i < indexes->index_size; i++) {
if (indexes->index[i] != NULL
&& indexes->index[i]->slots.common.startUrl[0] != '\0') {
const char *url = indexes->index[i]->slots.common.startUrl;
StringCat(html, "<li>\r\n");
StringCat(html, "<a href=\"");
StringCat(html, url);
StringCat(html, "\">");
StringCat(html, url);
StringCat(html, "</a>\r\n");
StringCat(html, "</li>\r\n");
}
}
StringCat(html, "</ul>\r\n");
StringCat(html, "</body></html>\r\n");
elt->size = StringLength(html);
elt->adr = StringAcquire(&html);
elt->statuscode = HTTP_OK;
strcpy(elt->charset, "iso-8859-1");
strcpy(elt->contenttype, "text/html");
strcpy(elt->msg, "OK");
StringFree(html);
return elt;
}
return NULL;
}
static char *strchr_stop(char *str, char c, char stop) {
for(; *str != 0 && *str != stop && *str != c; str++) ;
if (*str == c)
return str;
return NULL;
}
char **PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
// should be cached!
if (indexes != NULL && indexes->cil != NULL) {
unsigned int urlSize;
String list = STRING_EMPTY;
String listindexes = STRING_EMPTY;
String subitem = STRING_EMPTY;
unsigned int listCount = 0;
struct_coucal_enum en = coucal_enum_new(indexes->cil);
coucal_item *chain;
coucal hdupes = NULL;
if (!subtree) {
hdupes = coucal_new(0);
coucal_set_name(hdupes, "hdupes");
}
StringClear(list);
StringClear(listindexes);
StringClear(subitem);
if (strncmp(url, "http://", 7) == 0)
url += 7;
urlSize = (unsigned int) strlen(url);
while((chain = coucal_enum_next(&en))) {
long int index = (long int) chain->value.intg;
if (urlSize == 0 || strncmp(chain->name, url, urlSize) == 0) {
if (index >= 0 && index < indexes->index_size) {
char *item = (char*) chain->name + urlSize;
if (*item == '/')
item++;
{
char *pos = subtree ? 0 : strchr_stop(item, '/', '?');
unsigned int len =
pos ? (unsigned int) (pos - item) : (unsigned int) strlen(item);
if (len > 0 /* default document */ || *item == 0) {
int isFolder = (item[len] == '/');
StringClear(subitem);
if (len > 0)
StringMemcat(subitem, item, len);
if (len == 0 || !coucal_exists(hdupes, StringBuff(subitem))) {
char *ptr = NULL;
ptr += StringLength(list);
if (len > 0)
StringCat(list, StringBuff(subitem));
if (isFolder)
StringCat(list, "/");
StringMemcat(list, "\0", 1); /* NULL terminated strings */
StringMemcat(listindexes, (char*) &ptr, sizeof(ptr));
listCount++;
coucal_write(hdupes, StringBuff(subitem), 0);
}
}
}
} else {
proxytrack_print_log(CRITICAL,
"PT_Enumerate:Corrupted central index locator");
}
}
}
StringFree(subitem);
coucal_delete(&hdupes);
if (listCount > 0) {
unsigned int i;
void *blk;
char *nullPointer = NULL;
char *startStrings;
/* NULL terminated index */
StringMemcat(listindexes, (char*) &nullPointer, sizeof(nullPointer));
/* start of all strings (index) */
startStrings = nullPointer + StringLength(listindexes);
/* copy list of URLs after indexes */
StringMemcat(listindexes, StringBuff(list), StringLength(list));
/* ---- no reallocation beyond this point (fixed addresses) ---- */
/* start of all strings (pointer) */
startStrings = (startStrings - nullPointer) + StringBuffRW(listindexes);
/* transform indexes into references */
for(i = 0; i < listCount; i++) {
char *ptr = NULL;
unsigned int ndx;
memcpy(&ptr, &StringBuff(listindexes)[i * sizeof(char *)],
sizeof(char *));
ndx = (unsigned int) (ptr - nullPointer);
ptr = startStrings + ndx;
memcpy(&StringBuffRW(listindexes)[i * sizeof(char *)], &ptr,
sizeof(char *));
}
blk = StringAcquire(&listindexes);
StringFree(list);
StringFree(listindexes);
return (char **) blk;
}
}
return NULL;
}
void PT_Enumerate_Delete(char ***plist) {
if (plist != NULL && *plist != NULL) {
free(*plist);
*plist = NULL;
}
}
static int PT_GetType(const char *filename) {
char *dot = strrchr(filename, '.');
if (dot != NULL) {
if (strcasecmp(dot, ".zip") == 0) {
return PT_CACHE__NEW;
} else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) {
return PT_CACHE__OLD;
} else if (strcasecmp(dot, ".arc") == 0) {
return PT_CACHE__ARC;
}
}
return PT_CACHE_UNDEFINED;
}
PT_Index PT_LoadCache(const char *filename) {
int type = PT_GetType(filename);
if (type != PT_CACHE_UNDEFINED) {
PT_Index index = calloc(sizeof(_PT_Index), 1);
if (index != NULL) {
index->type = type;
index->slots.common.timestamp = (time_t) time(NULL);
index->slots.common.startUrl[0] = '\0';
index->slots.common.hash = coucal_new(0);
coucal_set_name(index->slots.common.hash, "index->slots.common.hash");
if (!_IndexFuncts[type].PT_LoadCache(index, filename)) {
proxytrack_print_log(DEBUG,
"reading httrack cache (format #%d) %s : error",
type, filename);
free(index);
index = NULL;
return NULL;
} else {
proxytrack_print_log(DEBUG,
"reading httrack cache (format #%d) %s : success",
type, filename);
}
/* default starting URL is the first hash entry */
if (index->slots.common.startUrl[0] == '\0') {
struct_coucal_enum en = coucal_enum_new(index->slots.common.hash);
coucal_item *chain;
chain = coucal_enum_next(&en);
if (chain != NULL && strstr(chain->name, "/robots.txt") != NULL) {
chain = coucal_enum_next(&en);
}
if (chain != NULL) {
if (!link_has_authority(chain->name))
strcat(index->slots.common.startUrl, "http://");
strcat(index->slots.common.startUrl, chain->name);
}
}
}
return index;
}
return NULL;
}
static long int filesize(const char *filename) {
struct stat st;
memset(&st, 0, sizeof(st));
if (stat(filename, &st) == 0) {
return (long int) st.st_size;
}
return -1;
}
int PT_LookupCache(PT_Index index, const char *url) {
if (index != NULL && SAFE_INDEX(index)) {
return _IndexFuncts[index->type].PT_LookupCache(index, url);
}
return 0;
}
int PT_SaveCache(PT_Indexes indexes, const char *filename) {
int type = PT_GetType(filename);
if (type != PT_CACHE_UNDEFINED) {
if (_IndexFuncts[type].PT_SaveCache != NULL) {
int ret = _IndexFuncts[type].PT_SaveCache(indexes, filename);
if (ret == 0) {
(void) set_filetime_time_t(filename, PT_GetTimeIndex(indexes));
return 0;
}
}
}
return -1;
}
int PT_EnumCache(PT_Indexes indexes,
int (*callback) (void *, const char *url, PT_Element),
void *arg) {
if (indexes != NULL && indexes->cil != NULL) {
struct_coucal_enum en = coucal_enum_new(indexes->cil);
coucal_item *chain;
while((chain = coucal_enum_next(&en))) {
const long int index_id = (long int) chain->value.intg;
const char *const url = chain->name;
if (index_id >= 0 && index_id <= indexes->index_size) {
PT_Element item =
PT_ReadCache(indexes->index[index_id], url,
FETCH_HEADERS | FETCH_BODY);
if (item != NULL) {
int ret = callback(arg, url, item);
PT_Element_Delete(&item);
if (ret != 0)
return ret;
}
} else {
proxytrack_print_log(CRITICAL,
"PT_ReadCache:Corrupted central index locator");
return -1;
}
}
}
return 0;
}
time_t PT_Index_Timestamp(PT_Index index) {
return index->slots.common.timestamp;
}
static int PT_LookupCache__New(PT_Index index, const char *url) {
int retCode;
MutexLock(&index->slots.formatNew.zFileLock);
{
retCode = PT_LookupCache__New_u(index, url);
}
MutexUnlock(&index->slots.formatNew.zFileLock);
return retCode;
}
static int PT_LookupCache__New_u(PT_Index index_, const char *url) {
if (index_ != NULL) {
PT_Index__New index = &index_->slots.formatNew;
if (index->hash != NULL && index->zFile != NULL && url != NULL && *url != 0) {
int hash_pos_return;
if (strncmp(url, "http://", 7) == 0)
url += 7;
hash_pos_return = coucal_read(index->hash, url, NULL);
if (hash_pos_return)
return 1;
}
}
return 0;
}
int PT_IndexMerge(PT_Indexes indexes, PT_Index * pindex) {
if (pindex != NULL && *pindex != NULL && (*pindex)->slots.common.hash != NULL
&& indexes != NULL) {
PT_Index index = *pindex;
struct_coucal_enum en = coucal_enum_new(index->slots.common.hash);
coucal_item *chain;
int index_id = indexes->index_size++;
int nMerged = 0;
if ((indexes->index =
realloc(indexes->index,
sizeof(struct _PT_Index) * indexes->index_size)) != NULL) {
indexes->index[index_id] = index;
*pindex = NULL;
while((chain = coucal_enum_next(&en)) != NULL) {
const char *url = chain->name;
if (url != NULL && url[0] != '\0') {
intptr_t previous_index_id = 0;
if (coucal_read(indexes->cil, url, &previous_index_id)) {
if (previous_index_id >= 0
&& previous_index_id < indexes->index_size) {
if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer
break;
} else {
proxytrack_print_log(CRITICAL,
"PT_IndexMerge:Corrupted central index locator");
}
}
coucal_write(indexes->cil, chain->name, index_id);
nMerged++;
}
}
} else {
proxytrack_print_log(CRITICAL, "PT_IndexMerge:Memory exhausted");
}
return nMerged;
}
return -1;
}
void PT_Element_Delete(PT_Element * pentry) {
if (pentry != NULL) {
PT_Element entry = *pentry;
if (entry != NULL) {
if (entry->adr != NULL) {
free(entry->adr);
entry->adr = NULL;
}
if (entry->headers != NULL) {
free(entry->headers);
entry->headers = NULL;
}
if (entry->location != NULL) {
free(entry->location);
entry->location = NULL;
}
free(entry);
}
*pentry = NULL;
}
}
PT_Element PT_ReadIndex(PT_Indexes indexes, const char *url, int flags) {
if (indexes != NULL) {
intptr_t index_id;
if (strncmp(url, "http://", 7) == 0)
url += 7;
if (coucal_read(indexes->cil, url, &index_id)) {
if (index_id >= 0 && index_id <= indexes->index_size) {
PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags);
if (item != NULL) {
item->indexId = (int) index_id;
return item;
}
} else {
proxytrack_print_log(CRITICAL,
"PT_ReadCache:Corrupted central index locator");
}
}
}
return NULL;
}
int PT_LookupIndex(PT_Indexes indexes, const char *url) {
if (indexes != NULL) {
intptr_t index_id;
if (strncmp(url, "http://", 7) == 0)
url += 7;
if (coucal_read(indexes->cil, url, &index_id)) {
if (index_id >= 0 && index_id <= indexes->index_size) {
return 1;
} else {
proxytrack_print_log(CRITICAL,
"PT_ReadCache:Corrupted central index locator");
}
}
}
return 0;
}
time_t PT_GetTimeIndex(PT_Indexes indexes) {
if (indexes != NULL && indexes->index_size > 0) {
int i;
time_t maxt = indexes->index[0]->slots.common.timestamp;
for(i = 1; i < indexes->index_size; i++) {
const time_t currt = indexes->index[i]->slots.common.timestamp;
if (currt > maxt) {
maxt = currt;
}
}
return maxt;
}
return (time_t) - 1;
}
PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) {
if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size) {
return indexes->index[indexId];
}
return NULL;
}
PT_Element PT_ElementNew(void) {
PT_Element r = NULL;
if ((r = calloc(sizeof(_PT_Element), 1)) == NULL)
return NULL;
r->statuscode = STATUSCODE_INVALID;
r->indexId = -1;
return r;
}
PT_Element PT_ReadCache(PT_Index index, const char *url, int flags) {
if (index != NULL && SAFE_INDEX(index)) {
return _IndexFuncts[index->type].PT_ReadCache(index, url, flags);
}
return NULL;
}
static PT_Element PT_ReadCache__New(PT_Index index, const char *url, int flags) {
PT_Element retCode;
MutexLock(&index->slots.formatNew.zFileLock);
{
retCode = PT_ReadCache__New_u(index, url, flags);
}
MutexUnlock(&index->slots.formatNew.zFileLock);
return retCode;
}
/* ------------------------------------------------------------ */
/* New HTTrack cache (new.zip) format */
/* ------------------------------------------------------------ */
#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \
if ( (value != NULL) && (value)[0] != '\0') { \
sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \
(headersSize) += (int) strlen(headers + headersSize); \
} \
} while(0)
#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \
if ( (value != 0) ) { \
sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
(headersSize) += (int) strlen(headers + headersSize); \
} \
} while(0)
#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \
sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
(headersSize) += (int) strlen(headers + headersSize); \
} while(0)
#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \
if (line[0] != '\0' && strfield2(line, refline)) { \
strcpy(refvalue, value); \
line[0] = '\0'; \
} \
} while(0)
#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \
if (line[0] != '\0' && strfield2(line, refline)) { \
int intval = 0; \
sscanf(value, "%d", &intval); \
(refvalue) = intval; \
line[0] = '\0'; \
} \
} while(0)
int PT_LoadCache__New(PT_Index index_, const char *filename) {
if (index_ != NULL && filename != NULL) {
PT_Index__New index = &index_->slots.formatNew;
unzFile zFile = index->zFile = unzOpen(filename);
index->timestamp = file_timestamp(filename);
MutexInit(&index->zFileLock);
// Opened ?
if (zFile != NULL) {
const char *abpath;
int slashes;
coucal hashtable = index->hash;
/* Compute base path for this index - the filename MUST be absolute! */
for(slashes = 2, abpath = filename + (int) strlen(filename) - 1;
abpath > filename && ((*abpath != '/' && *abpath != '\\')
|| --slashes > 0);
abpath--) ;
index->path[0] = '\0';
if (slashes == 0 && *abpath != 0) {
int i;
strncat(index->path, filename, (int) (abpath - filename) + 1);
for(i = 0; index->path[i] != 0; i++) {
if (index->path[i] == '\\') {
index->path[i] = '/';
}
}
}
/* Ready directory entries */
if (unzGoToFirstFile(zFile) == Z_OK) {
char comment[128];
char filename[HTS_URLMAXSIZE * 4];
int entries = 0;
int firstSeen = 0;
memset(comment, 0, sizeof(comment)); // for truncated reads
do {
int readSizeHeader = 0;
filename[0] = '\0';
comment[0] = '\0';
if (unzOpenCurrentFile(zFile) == Z_OK) {
if ((readSizeHeader =
unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0
&& unzGetCurrentFileInfo(zFile, NULL, filename,
sizeof(filename) - 2, NULL, 0, NULL,
0) == Z_OK) {
long int pos = (long int) unzGetOffset(zFile);
assertf(readSizeHeader < sizeof(comment));
comment[readSizeHeader] = '\0';
entries++;
if (pos > 0) {
int dataincache = 0; // data in cache ?
char *filenameIndex = filename;
if (strncmp(filenameIndex, "http://", 7) == 0) {
filenameIndex += 7;
}
if (comment[0] != '\0') {
int maxLine = 2;
char *a = comment;
while(*a && maxLine-- > 0) { // parse only few first lines
char line[1024];
line[0] = '\0';
a += binput(a, line, sizeof(line) - 2);
if (strncmp(line, "X-In-Cache:", 11) == 0) {
if (strcmp(line, "X-In-Cache: 1") == 0) {
dataincache = 1;
} else {
dataincache = 0;
}
break;
}
}
}
if (dataincache)
coucal_add(hashtable, filenameIndex, pos);
else
coucal_add(hashtable, filenameIndex, -pos);
/* First link as starting URL */
if (!firstSeen) {
if (strstr(filenameIndex, "/robots.txt") == NULL) {
firstSeen = 1;
if (!link_has_authority(filenameIndex))
strcat(index->startUrl, "http://");
strcat(index->startUrl, filenameIndex);
}
}
} else {
fprintf(stderr, "Corrupted cache meta entry #%d" LF,
(int) entries);
}
} else {
fprintf(stderr, "Corrupted cache entry #%d" LF, (int) entries);
}
unzCloseCurrentFile(zFile);
} else {
fprintf(stderr, "Corrupted cache entry #%d" LF, (int) entries);
}
} while(unzGoToNextFile(zFile) == Z_OK);
return 1;
} else {
coucal_delete(&index->hash);
index = NULL;
}
} else {
index = NULL;
}
}
return 0;
}
static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
int flags) {
PT_Index__New index = (PT_Index__New) & index_->slots.formatNew;
char location_default[HTS_URLMAXSIZE * 2];
char previous_save[HTS_URLMAXSIZE * 2];
char previous_save_[HTS_URLMAXSIZE * 2];
char catbuff[CATBUFF_SIZE];
intptr_t hash_pos;
int hash_pos_return;
PT_Element r = NULL;
if (index == NULL || index->hash == NULL || index->zFile == NULL
|| url == NULL || *url == 0)
return NULL;
if ((r = PT_ElementNew()) == NULL)
return NULL;
location_default[0] = '\0';
previous_save[0] = previous_save_[0] = '\0';
memset(r, 0, sizeof(_PT_Element));
r->location = location_default;
strcpy(r->location, "");
if (strncmp(url, "http://", 7) == 0)
url += 7;
hash_pos_return = coucal_read(index->hash, url, &hash_pos);
if (hash_pos_return) {
uLong posInZip;
if (hash_pos > 0) {
posInZip = (uLong) hash_pos;
} else {
posInZip = (uLong) - hash_pos;
}
if (unzSetOffset(index->zFile, posInZip) == Z_OK) {
/* Read header (Max 8KiB) */
if (unzOpenCurrentFile(index->zFile) == Z_OK) {
char headerBuff[8192 + 2];
int readSizeHeader;
//int totalHeader = 0;
int dataincache = 0;
/* For BIG comments */
headerBuff[0]
= headerBuff[sizeof(headerBuff) - 1]
= headerBuff[sizeof(headerBuff) - 2]
= headerBuff[sizeof(headerBuff) - 3] = '\0';
if ((readSizeHeader =
unzGetLocalExtrafield(index->zFile, headerBuff,
sizeof(headerBuff) - 2)) > 0) {
int offset = 0;
char line[HTS_URLMAXSIZE + 2];
int lineEof = 0;
headerBuff[readSizeHeader] = '\0';
do {
char *value;
line[0] = '\0';
offset += binput(headerBuff + offset, line, sizeof(line) - 2);
if (line[0] == '\0') {
lineEof = 1;
}
value = strchr(line, ':');
if (value != NULL) {
*value++ = '\0';
if (*value == ' ' || *value == '\t')
value++;
ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache);
ZIP_READFIELD_INT(line, value, "X-Statuscode", r->statuscode);
ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r->msg); // msg
ZIP_READFIELD_INT(line, value, "X-Size", r->size); // size
ZIP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
ZIP_READFIELD_STRING(line, value, "X-Charset", r->charset); // contenttype
ZIP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
ZIP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
ZIP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
ZIP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
//ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address
//ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename
ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename
if (line[0] != '\0') {
int len = r->headers ? ((int) strlen(r->headers)) : 0;
int nlen =
(int) (strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1);
r->headers = realloc(r->headers, len + nlen);
r->headers[len] = '\0';
strcat(r->headers, line);
strcat(r->headers, ": ");
strcat(r->headers, value);
strcat(r->headers, "\r\n");
}
}
} while(offset < readSizeHeader && !lineEof);
//totalHeader = offset;
/* Previous entry */
if (previous_save_[0] != '\0') {
int pathLen = (int) strlen(index->path);
if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
strcpy(previous_save, previous_save_);
}
// relative ? (hack)
else if (index->safeCache || (previous_save_[0] != '/' // /home/foo/bar.gif
&& (!isalpha(previous_save_[0]) || previous_save_[1] != ':')) // c:/home/foo/bar.gif
) {
index->safeCache = 1;
sprintf(previous_save, "%s%s", index->path, previous_save_);
}
// bogus format (includes buggy absolute path)
else {
/* guess previous path */
if (index->fixedPath == 0) {
const char *start = jump_protocol_and_auth(url);
const char *end = start ? strchr(start, '/') : NULL;
int len = (int) (end - start);
if (start != NULL && end != NULL && len > 0 && len < 128) {
char piece[128 + 2];
const char *where;
piece[0] = '\0';
strncat(piece, start, len);
if ((where = strstr(previous_save_, piece)) != NULL) {
index->fixedPath = (int) (where - previous_save_); // offset to relative path
}
}
}
if (index->fixedPath > 0) {
int saveLen = (int) strlen(previous_save_);
if (index->fixedPath < saveLen) {
sprintf(previous_save, "%s%s", index->path,
previous_save_ + index->fixedPath);
} else {
sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)",
previous_save_, (int) index->fixedPath);
r->statuscode = STATUSCODE_INVALID;
}
} else {
sprintf(previous_save, "%s%s", index->path, previous_save_);
}
}
}
/* Complete fields */
r->adr = NULL;
if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
int ok = 0;
// Court-circuit:
// Peut-on stocker le fichier directement sur disque?
if (ok) {
if (r->msg[0] == '\0') {
strcpy(r->msg, "Cache Read Error : Unexpected error");
}
} else { // lire en mémoire
if (!dataincache) {
/* Read in memory from cache */
if (flags & FETCH_BODY) {
if (strnotempty(previous_save)) {
FILE *fp = fopen(file_convert(catbuff, sizeof(catbuff), previous_save), "rb");
if (fp != NULL) {
r->adr = (char *) malloc(r->size + 4);
if (r->adr != NULL) {
if (r->size > 0
&& fread(r->adr, 1, r->size, fp) != r->size) {
int last_errno = errno;
r->statuscode = STATUSCODE_INVALID;
sprintf(r->msg, "Read error in cache disk data: %s",
strerror(last_errno));
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg,
"Read error (memory exhausted) from cache");
}
fclose(fp);
} else {
r->statuscode = STATUSCODE_INVALID;
sprintf(r->msg, "Read error (can't open '%s') from cache",
file_convert(catbuff, sizeof(catbuff), previous_save));
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cached file name is invalid");
}
}
} else {
// lire fichier (d'un coup)
if (flags & FETCH_BODY) {
r->adr = (char *) malloc(r->size + 1);
if (r->adr != NULL) {
if (unzReadCurrentFile(index->zFile, r->adr, (unsigned int) r->size) != r->size) { // erreur
free(r->adr);
r->adr = NULL;
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Read Data");
} else
*(r->adr + r->size) = '\0';
//printf(">%s status %d\n",back[p].r->contenttype,back[p].r->statuscode);
} else { // erreur
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Memory Error");
}
}
}
}
} // si save==null, ne rien charger (juste en tête)
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Read Header Data");
}
unzCloseCurrentFile(index->zFile);
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Open File");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Bad Offset");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "File Cache Entry Not Found");
}
if (r->location[0] != '\0') {
r->location = strdup(r->location);
} else {
r->location = NULL;
}
return r;
}
static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element) {
zipFile zFileOut = (zipFile) arg;
char headers[8192];
int headersSize;
zip_fileinfo fi;
int zErr;
const char *url_adr = "";
const char *url_fil = "";
headers[0] = '\0';
headersSize = 0;
/* Fields */
headers[0] = '\0';
headersSize = 0;
/* */
{
const char *message;
if (strlen(element->msg) < 32) {
message = element->msg;
} else {
message = "(See X-StatusMessage)";
}
/* 64 characters MAX for first line */
sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1',
element->statuscode, message);
}
headersSize += (int) strlen(headers + headersSize);
/* Second line MUST ALWAYS be X-In-Cache */
ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", 1);
ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", element->statuscode);
ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", element->msg);
ZIP_FIELD_INT(headers, headersSize, "X-Size", element->size); // size
ZIP_FIELD_STRING(headers, headersSize, "Content-Type", element->contenttype); // contenttype
ZIP_FIELD_STRING(headers, headersSize, "X-Charset", element->charset); // contenttype
ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", element->lastmodified); // last-modified
ZIP_FIELD_STRING(headers, headersSize, "Etag", element->etag); // Etag
ZIP_FIELD_STRING(headers, headersSize, "Location", element->location); // 'location' pour moved
ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", element->cdispo); // Content-disposition
ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address
ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename
ZIP_FIELD_STRING(headers, headersSize, "X-Save", ""); // Original save filename
/* Time */
memset(&fi, 0, sizeof(fi));
if (element->lastmodified[0] != '\0') {
struct tm buffer;
struct tm *tm_s = convert_time_rfc822(&buffer, element->lastmodified);
if (tm_s) {
fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec;
fi.tmz_date.tm_min = (uInt) tm_s->tm_min;
fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour;
fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday;
fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon;
fi.tmz_date.tm_year = (uInt) tm_s->tm_year;
}
}
/* Open file - NOTE: headers in "comment" */
if ((zErr = zipOpenNewFileInZip(zFileOut, url, &fi,
/*
Store headers in realtime in the local file directory as extra field
In case of crash, we'll be able to recover the whole ZIP file by rescanning it
*/
headers, (uInt) strlen(headers), NULL, 0, NULL, /* comment */
Z_DEFLATED, Z_DEFAULT_COMPRESSION)) != Z_OK) {
assertf(! "zip_zipOpenNewFileInZip_failed");
}
/* Write data in cache */
if (element->size > 0 && element->adr != NULL) {
if ((zErr =
zipWriteInFileInZip(zFileOut, element->adr,
(int) element->size)) != Z_OK) {
assertf(! "zip_zipWriteInFileInZip_failed");
}
}
/* Close */
if ((zErr = zipCloseFileInZip(zFileOut)) != Z_OK) {
assertf(! "zip_zipCloseFileInZip_failed");
}
/* Flush */
if ((zErr = zipFlush(zFileOut)) != 0) {
assertf(! "zip_zipFlush_failed");
}
return 0;
}
static int PT_SaveCache__New(PT_Indexes indexes, const char *filename) {
zipFile zFileOut = zipOpen(filename, 0);
if (zFileOut != NULL) {
int ret = PT_EnumCache(indexes, PT_SaveCache__New_Fun, (void *) zFileOut);
zipClose(zFileOut,
"Created by HTTrack Website Copier/ProxyTrack "
PROXYTRACK_VERSION);
zFileOut = NULL;
if (ret != 0)
(void) unlink(filename);
return ret;
}
return -1;
}
/* ------------------------------------------------------------ */
/* Old HTTrack cache (dat/ndx) format */
/* ------------------------------------------------------------ */
static int cache_brstr(char *adr, char *s) {
int i;
int off;
char buff[256 + 1];
off = binput(adr, buff, 256);
adr += off;
sscanf(buff, "%d", &i);
if (i > 0)
strncpy(s, adr, i);
*(s + i) = '\0';
off += i;
return off;
}
static void cache_rstr(FILE * fp, char *s) {
INTsys i;
char buff[256 + 4];
linput(fp, buff, 256);
sscanf(buff, INTsysP, &i);
if (i < 0 || i > 32768) /* error, something nasty happened */
i = 0;
if (i > 0) {
if ((int) fread(s, 1, i, fp) != i) {
assertf(! "fread_cache_failed");
}
}
*(s + i) = '\0';
}
static char *cache_rstr_addr(FILE * fp) {
INTsys i;
char *addr = NULL;
char buff[256 + 4];
linput(fp, buff, 256);
sscanf(buff, "%d", &i);
if (i < 0 || i > 32768) /* error, something nasty happened */
i = 0;
if (i > 0) {
addr = malloc(i + 1);
if (addr != NULL) {
if ((int) fread(addr, 1, i, fp) != i) {
assertf(! "fread_cache_failed");
}
*(addr + i) = '\0';
}
}
return addr;
}
static void cache_rint(FILE * fp, int *i) {
char s[256];
cache_rstr(fp, s);
sscanf(s, "%d", i);
}
static void cache_rLLint(FILE * fp, unsigned long *i) {
int l;
char s[256];
cache_rstr(fp, s);
sscanf(s, "%d", &l);
*i = (unsigned long) l;
}
static int PT_LoadCache__Old(PT_Index index_, const char *filename) {
if (index_ != NULL && filename != NULL) {
char *pos = strrchr(filename, '.');
PT_Index__Old cache = &index_->slots.formatOld;
long int ndxSize;
cache->filenameDat[0] = '\0';
cache->filenameNdx[0] = '\0';
cache->path[0] = '\0';
{
PT_Index__Old index = cache;
const char *abpath;
int slashes;
/* -------------------- COPY OF THE __New() CODE -------------------- */
/* Compute base path for this index - the filename MUST be absolute! */
for(slashes = 2, abpath = filename + (int) strlen(filename) - 1;
abpath > filename && ((*abpath != '/' && *abpath != '\\')
|| --slashes > 0);
abpath--) ;
index->path[0] = '\0';
if (slashes == 0 && *abpath != 0) {
int i;
strncat(index->path, filename, (int) (abpath - filename) + 1);
for(i = 0; index->path[i] != 0; i++) {
if (index->path[i] == '\\') {
index->path[i] = '/';
}
}
}
/* -------------------- END OF COPY OF THE __New() CODE -------------------- */
}
/* Index/data filenames */
if (pos != NULL) {
int nLen = (int) (pos - filename);
strncat(cache->filenameDat, filename, nLen);
strncat(cache->filenameNdx, filename, nLen);
strcat(cache->filenameDat, ".dat");
strcat(cache->filenameNdx, ".ndx");
}
ndxSize = filesize(cache->filenameNdx);
cache->timestamp = file_timestamp(cache->filenameDat);
cache->dat = fopen(cache->filenameDat, "rb");
cache->ndx = fopen(cache->filenameNdx, "rb");
if (cache->dat != NULL && cache->ndx != NULL && ndxSize > 0) {
char *use = malloc(ndxSize + 1);
if (fread(use, 1, ndxSize, cache->ndx) == ndxSize) {
char firstline[256];
char *a = use;
use[ndxSize] = '\0';
a += cache_brstr(a, firstline);
if (strncmp(firstline, "CACHE-", 6) == 0) { // Nouvelle version du cache
if (strncmp(firstline, "CACHE-1.", 8) == 0) { // Version 1.1x
cache->version = (int) (firstline[8] - '0'); // cache 1.x
if (cache->version <= 5) {
a += cache_brstr(a, firstline);
strcpy(cache->lastmodified, firstline);
} else {
// fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
fclose(cache->dat);
cache->dat = NULL;
free(use);
use = NULL;
}
} else { // non supporté
// fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
fclose(cache->dat);
cache->dat = NULL;
free(use);
use = NULL;
}
/* */
} else { // Vieille version du cache
/* */
// hts_log_print(opt, LOG_WARNING, "Cache: importing old cache format");
cache->version = 0; // cache 1.0
strcpy(cache->lastmodified, firstline);
}
/* Create hash table for the cache (MUCH FASTER!) */
if (use) {
char line[HTS_URLMAXSIZE * 2];
char linepos[256];
int pos;
int firstSeen = 0;
while((a != NULL) && (a < (use + ndxSize))) {
a = strchr(a + 1, '\n'); /* start of line */
if (a) {
a++;
/* read "host/file" */
a += binput(a, line, HTS_URLMAXSIZE);
a += binput(a, line + strlen(line), HTS_URLMAXSIZE);
/* read position */
a += binput(a, linepos, 200);
sscanf(linepos, "%d", &pos);
/* Add entry */
coucal_add(cache->hash, line, pos);
/* First link as starting URL */
if (!firstSeen) {
if (strstr(line, "/robots.txt") == NULL) {
PT_Index__Old index = cache;
firstSeen = 1;
if (!link_has_authority(line))
strcat(index->startUrl, "http://");
strcat(index->startUrl, line);
}
}
}
}
/* Not needed anymore! */
free(use);
use = NULL;
return 1;
}
}
}
}
return 0;
}
static String DecodeUrl(const char *url) {
int i;
String s = STRING_EMPTY;
StringClear(s);
for(i = 0; url[i] != '\0'; i++) {
if (url[i] == '+') {
StringAddchar(s, ' ');
} else if (url[i] == '%') {
if (url[i + 1] == '%') {
StringAddchar(s, '%');
i++;
} else if (url[i + 1] != 0 && url[i + 2] != 0) {
char tmp[3];
int codepoint = 0;
tmp[0] = url[i + 1];
tmp[1] = url[i + 2];
tmp[2] = 0;
if (sscanf(tmp, "%x", &codepoint) == 1) {
StringAddchar(s, (char) codepoint);
}
i += 2;
}
} else {
StringAddchar(s, url[i]);
}
}
return s;
}
static PT_Element PT_ReadCache__Old(PT_Index index, const char *url, int flags) {
PT_Element retCode;
MutexLock(&index->slots.formatOld.fileLock);
{
retCode = PT_ReadCache__Old_u(index, url, flags);
}
MutexUnlock(&index->slots.formatOld.fileLock);
return retCode;
}
static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char *url,
int flags) {
PT_Index__Old cache = (PT_Index__Old) & index_->slots.formatOld;
intptr_t hash_pos;
int hash_pos_return;
char location_default[HTS_URLMAXSIZE * 2];
char previous_save[HTS_URLMAXSIZE * 2];
char previous_save_[HTS_URLMAXSIZE * 2];
PT_Element r;
int ok = 0;
if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
return NULL;
if ((r = PT_ElementNew()) == NULL)
return NULL;
location_default[0] = '\0';
previous_save[0] = previous_save_[0] = '\0';
memset(r, 0, sizeof(_PT_Element));
r->location = location_default;
strcpy(r->location, "");
if (strncmp(url, "http://", 7) == 0)
url += 7;
hash_pos_return = coucal_read(cache->hash, url, &hash_pos);
if (hash_pos_return) {
int pos = (int) hash_pos; /* simply */
if (fseek(cache->dat, (pos > 0) ? pos : (-pos), SEEK_SET) == 0) {
/* Importer cache1.0 */
if (cache->version == 0) {
OLD_htsblk old_r;
if (fread((char *) &old_r, 1, sizeof(old_r), cache->dat) == sizeof(old_r)) { // lire tout (y compris statuscode etc)
int i;
String urlDecoded;
r->statuscode = old_r.statuscode;
r->size = old_r.size; // taille fichier
strcpy(r->msg, old_r.msg);
strcpy(r->contenttype, old_r.contenttype);
/* Guess the destination filename.. this sucks, because this method is not reliable.
Yes, the old 1.0 cache format was *that* bogus. /rx */
#define FORBIDDEN_CHAR(c) (c == '~' \
|| c == '\\' \
|| c == ':' \
|| c == '*' \
|| c == '?' \
|| c == '\"' \
|| c == '<' \
|| c == '>' \
|| c == '|' \
|| c == '@' \
|| ((unsigned char) c ) <= 31 \
|| ((unsigned char) c ) == 127 \
)
urlDecoded = DecodeUrl(jump_protocol_and_auth(url));
strcpy(previous_save_, StringBuff(urlDecoded));
StringFree(urlDecoded);
for(i = 0; previous_save_[i] != '\0' && previous_save_[i] != '?'; i++) {
if (FORBIDDEN_CHAR(previous_save_[i])) {
previous_save_[i] = '_';
}
}
previous_save_[i] = '\0';
#undef FORBIDDEN_CHAR
ok = 1; /* import ok */
}
/* */
/* Cache 1.1 */
} else {
char check[256];
unsigned long size_read;
unsigned long int size_;
check[0] = '\0';
//
cache_rint(cache->dat, &r->statuscode);
cache_rLLint(cache->dat, &size_);
r->size = (size_t) size_;
cache_rstr(cache->dat, r->msg);
cache_rstr(cache->dat, r->contenttype);
if (cache->version >= 3)
cache_rstr(cache->dat, r->charset);
cache_rstr(cache->dat, r->lastmodified);
cache_rstr(cache->dat, r->etag);
cache_rstr(cache->dat, r->location);
if (cache->version >= 2)
cache_rstr(cache->dat, r->cdispo);
if (cache->version >= 4) {
cache_rstr(cache->dat, previous_save_); // adr
cache_rstr(cache->dat, previous_save_); // fil
previous_save[0] = '\0';
cache_rstr(cache->dat, previous_save_); // save
}
if (cache->version >= 5) {
r->headers = cache_rstr_addr(cache->dat);
}
//
cache_rstr(cache->dat, check);
if (strcmp(check, "HTS") == 0) { /* intégrité OK */
ok = 1;
}
cache_rLLint(cache->dat, &size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */
if (size_read > 0) { /* si inscrite ici */
r->size = size_read;
} else { /* pas de données directement dans le cache, fichier présent? */
r->size = 0;
}
}
/* Check destination filename */
{
PT_Index__Old index = cache;
/* -------------------- COPY OF THE __New() CODE -------------------- */
if (previous_save_[0] != '\0') {
int pathLen = (int) strlen(index->path);
if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
strcpy(previous_save, previous_save_);
}
// relative ? (hack)
else if (index->safeCache || (previous_save_[0] != '/' // /home/foo/bar.gif
&& (!isalpha(previous_save_[0]) || previous_save_[1] != ':')) // c:/home/foo/bar.gif
) {
index->safeCache = 1;
sprintf(previous_save, "%s%s", index->path, previous_save_);
}
// bogus format (includes buggy absolute path)
else {
/* guess previous path */
if (index->fixedPath == 0) {
const char *start = jump_protocol_and_auth(url);
const char *end = start ? strchr(start, '/') : NULL;
int len = (int) (end - start);
if (start != NULL && end != NULL && len > 0 && len < 128) {
char piece[128 + 2];
const char *where;
piece[0] = '\0';
strncat(piece, start, len);
if ((where = strstr(previous_save_, piece)) != NULL) {
index->fixedPath = (int) (where - previous_save_); // offset to relative path
}
}
}
if (index->fixedPath > 0) {
int saveLen = (int) strlen(previous_save_);
if (index->fixedPath < saveLen) {
sprintf(previous_save, "%s%s", index->path,
previous_save_ + index->fixedPath);
} else {
sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)",
previous_save_, (int) index->fixedPath);
r->statuscode = STATUSCODE_INVALID;
}
} else {
sprintf(previous_save, "%s%s", index->path, previous_save_);
}
}
}
/* -------------------- END OF COPY OF THE __New() CODE -------------------- */
}
/* Read data */
if (ok) {
r->adr = NULL;
if ((r->statuscode >= 0) && (r->statuscode <= 999)) {
r->adr = NULL;
if (pos < 0) {
if (flags & FETCH_BODY) {
FILE *fp = fopen(previous_save, "rb");
if (fp != NULL) {
r->adr = (char *) malloc(r->size + 1);
if (r->adr != NULL) {
if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Read error in cache disk data");
}
r->adr[r->size] = '\0';
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Read error (memory exhausted) from cache");
}
fclose(fp);
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Previous cache file not found (2)");
}
}
} else {
// lire fichier (d'un coup)
if (flags & FETCH_BODY) {
r->adr = (char *) malloc(r->size + 1);
if (r->adr != NULL) {
if (fread(r->adr, 1, r->size, cache->dat) != r->size) { // erreur
free(r->adr);
r->adr = NULL;
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Read Data");
} else
r->adr[r->size] = '\0';
} else { // erreur
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Memory Error");
}
}
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Bad Data");
}
} else { // erreur
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Read Header");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Seek Failed");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "File Cache Entry Not Found");
}
if (r->location[0] != '\0') {
r->location = strdup(r->location);
} else {
r->location = NULL;
}
return r;
}
static int PT_LookupCache__Old(PT_Index index, const char *url) {
int retCode;
MutexLock(&index->slots.formatOld.fileLock);
{
retCode = PT_LookupCache__Old_u(index, url);
}
MutexUnlock(&index->slots.formatOld.fileLock);
return retCode;
}
static int PT_LookupCache__Old_u(PT_Index index_, const char *url) {
if (index_ != NULL) {
PT_Index__New cache = (PT_Index__New) & index_->slots.formatNew;
if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
return 0;
if (strncmp(url, "http://", 7) == 0)
url += 7;
if (coucal_read(cache->hash, url, NULL))
return 1;
}
return 0;
}
/* ------------------------------------------------------------ */
/* Internet Archive Arc 1.0 (arc) format */
/* Xavier Roche (roche@httrack.com) */
/* Lars Clausen (lc@statsbiblioteket.dk) */
/* ------------------------------------------------------------ */
#define ARC_SP ' '
static const char *getArcField(const char *line, int pos) {
int i;
for(i = 0; line[i] != '\0' && pos > 0; i++) {
if (line[i] == ARC_SP)
pos--;
}
if (pos == 0)
return &line[i];
return NULL;
}
static char *copyArcField(const char *line, int npos, char *dest, int destMax) {
const char *pos;
if ((pos = getArcField(line, npos)) != NULL) {
int i;
for(i = 0; pos[i] != '\0' && pos[i] != ARC_SP && (--destMax) > 0; i++) {
dest[i] = pos[i];
}
dest[i] = 0;
return dest;
}
dest[0] = 0;
return NULL;
}
static int getArcLength(const char *line) {
const char *pos;
if ((pos = getArcField(line, 9)) != NULL
|| (pos = getArcField(line, 4)) != NULL
|| (pos = getArcField(line, 2)) != NULL) {
int length;
if (sscanf(pos, "%d", &length) == 1) {
return length;
}
}
return -1;
}
static int skipArcNl(FILE * file) {
if (fgetc(file) == 0x0a) {
return 0;
}
return -1;
}
static int skipArcData(FILE * file, const char *line) {
int jump = getArcLength(line);
if (jump != -1) {
if (fseek(file, jump, SEEK_CUR) == 0 /* && skipArcNl(file) == 0 */ ) {
return 0;
}
}
return -1;
}
static int getDigit(const char digit) {
return (int) (digit - '0');
}
static int getDigit2(const char *const pos) {
return getDigit(pos[0]) * 10 + getDigit(pos[1]);
}
static int getDigit4(const char *const pos) {
return getDigit(pos[0]) * 1000 + getDigit(pos[1]) * 100 +
getDigit(pos[2]) * 10 + getDigit(pos[3]);
}
static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */
time_t t = mktime(tm);
if (t != (time_t) - 1 && t != (time_t) 0) {
/* BSD does not have static "timezone" declared */
#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__))
time_t now = time(NULL);
time_t timezone = -localtime(&now)->tm_gmtoff;
#endif
return (time_t) (t - timezone);
}
return (time_t) - 1;
}
static time_t getArcTimestamp(const char *const line) {
const char *pos;
if ((pos = getArcField(line, 2)) != NULL) {
int i;
/* date == YYYYMMDDhhmmss (Greenwich Mean Time) */
/* example: 20050405154029 */
for(i = 0; pos[i] >= '0' && pos[i] <= '9'; i++) ;
if (i == 14) {
struct tm tm;
memset(&tm, 0, sizeof(tm));
tm.tm_year = getDigit4(pos + 0) - 1900; /* current year minus 1900 */
tm.tm_mon = getDigit2(pos + 4) - 1; /* 0 11 */
tm.tm_mday = getDigit2(pos + 6); /* 1 31 */
tm.tm_hour = getDigit2(pos + 8); /* 0 23 */
tm.tm_min = getDigit2(pos + 10); /* 0 59 */
tm.tm_sec = getDigit2(pos + 12); /* 0 59 */
tm.tm_isdst = 0;
return getGMT(&tm);
}
}
return (time_t) - 1;
}
static int readArcURLRecord(PT_Index__Arc index) {
index->line[0] = '\0';
if (linput(index->file, index->line, sizeof(index->line) - 1)) {
return 0;
}
return -1;
}
#define str_begins(str, sstr) ( strncmp(str, sstr, sizeof(sstr) - 1) == 0 )
static int PT_CompatibleScheme(const char *url) {
return (str_begins(url, "http:")
|| str_begins(url, "https:")
|| str_begins(url, "ftp:")
|| str_begins(url, "file:"));
}
int PT_LoadCache__Arc(PT_Index index_, const char *filename) {
if (index_ != NULL && filename != NULL) {
PT_Index__Arc index = &index_->slots.formatArc;
index->timestamp = file_timestamp(filename);
MutexInit(&index->fileLock);
index->file = fopen(filename, "rb");
// Opened ?
if (index->file != NULL) {
coucal hashtable = index->hash;
if (readArcURLRecord(index) == 0) {
int entries = 0;
/* Read first line */
if (strncmp(index->line, "filedesc://", sizeof("filedesc://") - 1) != 0) {
fprintf(stderr, "Unexpected bad signature #%s" LF, index->line);
fclose(index->file);
index->file = NULL;
return 0;
}
/* Timestamp */
index->timestamp = getArcTimestamp(index->line);
/* Skip first entry */
if (skipArcData(index->file, index->line) != 0
|| skipArcNl(index->file) != 0) {
fprintf(stderr, "Unexpected bad data offset size first entry" LF);
fclose(index->file);
index->file = NULL;
return 0;
}
/* Read all meta-entries (not data) */
while(!feof(index->file)) {
unsigned long int fpos = ftell(index->file);
if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
int length = getArcLength(index->line);
if (length >= 0) {
const char *filenameIndex = copyArcField(index->line, 0,
index->filenameIndexBuff, sizeof(index->filenameIndexBuff) - 1); /* can not be NULL */
if (strncmp(filenameIndex, "http://", 7) == 0) {
filenameIndex += 7;
}
if (*filenameIndex != 0) {
if (skipArcData(index->file, index->line) != 0) {
fprintf(stderr,
"Corrupted cache data entry #%d (truncated file?), aborting read"
LF, (int) entries);
}
/*fprintf(stdout, "adding %s [%d]\n", filenameIndex, (int)fpos); */
if (PT_CompatibleScheme(index->filenameIndexBuff)) {
coucal_add(hashtable, filenameIndex, fpos); /* position of meta-data */
entries++;
}
} else {
fprintf(stderr, "Corrupted cache meta entry #%d" LF,
(int) entries);
}
} else {
fprintf(stderr,
"Corrupted cache meta entry #%d, aborting read" LF,
(int) entries);
break;
}
} else {
break;
}
}
/* OK */
return 1;
} else {
fprintf(stderr, "Bad file (empty ?)" LF);
}
} else {
fprintf(stderr, "Unable to open file" LF);
index = NULL;
}
} else {
fprintf(stderr, "Bad arguments" LF);
}
return 0;
}
#define HTTP_READFIELD_STRING(line, value, refline, refvalue) do { \
if (line[0] != '\0' && strfield2(line, refline)) { \
strcpy(refvalue, value); \
line[0] = '\0'; \
} \
} while(0)
#define HTTP_READFIELD_INT(line, value, refline, refvalue) do { \
if (line[0] != '\0' && strfield2(line, refline)) { \
int intval = 0; \
sscanf(value, "%d", &intval); \
(refvalue) = intval; \
line[0] = '\0'; \
} \
} while(0)
static PT_Element PT_ReadCache__Arc(PT_Index index, const char *url, int flags) {
PT_Element retCode;
MutexLock(&index->slots.formatArc.fileLock);
{
retCode = PT_ReadCache__Arc_u(index, url, flags);
}
MutexUnlock(&index->slots.formatArc.fileLock);
return retCode;
}
static PT_Element PT_ReadCache__Arc_u(PT_Index index_, const char *url,
int flags) {
PT_Index__Arc index = (PT_Index__Arc) & index_->slots.formatArc;
char location_default[HTS_URLMAXSIZE * 2];
intptr_t hash_pos;
int hash_pos_return;
PT_Element r = NULL;
if (index == NULL || index->hash == NULL || url == NULL || *url == 0)
return NULL;
if ((r = PT_ElementNew()) == NULL)
return NULL;
location_default[0] = '\0';
memset(r, 0, sizeof(_PT_Element));
r->location = location_default;
strcpy(r->location, "");
if (strncmp(url, "http://", 7) == 0)
url += 7;
hash_pos_return = coucal_read(index->hash, url, &hash_pos);
if (hash_pos_return) {
if (fseek(index->file, (long) hash_pos, SEEK_SET) == 0) {
if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
long int fposMeta = ftell(index->file);
int dataLength = getArcLength(index->line);
const char *pos;
/* Read HTTP headers */
/* HTTP/1.1 404 Not Found */
if (linput(index->file, index->line, sizeof(index->line) - 1)) {
if ((pos = getArcField(index->line, 1)) != NULL) {
if (sscanf(pos, "%d", &r->statuscode) != 1) {
r->statuscode = STATUSCODE_INVALID;
}
}
if ((pos = getArcField(index->line, 2)) != NULL) {
r->msg[0] = '\0';
strncat(r->msg, pos, sizeof(pos) - 1);
}
while(linput(index->file, index->line, sizeof(index->line) - 1)
&& index->line[0] != '\0') {
char *const line = index->line;
char *value = strchr(line, ':');
if (value != NULL) {
*value = '\0';
for(value++; *value == ' ' || *value == '\t'; value++) ;
HTTP_READFIELD_INT(line, value, "Content-Length", r->size); // size
HTTP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
HTTP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
HTTP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
HTTP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
HTTP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
if (line[0] != '\0') {
int len = r->headers ? ((int) strlen(r->headers)) : 0;
int nlen =
(int) (strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1);
r->headers = realloc(r->headers, len + nlen);
r->headers[len] = '\0';
strcat(r->headers, line);
strcat(r->headers, ": ");
strcat(r->headers, value);
strcat(r->headers, "\r\n");
}
}
}
/* FIXME charset */
if (r->contenttype[0] != '\0') {
char *pos = strchr(r->contenttype, ';');
if (pos != NULL) {
/*char *chs = strchr(pos, "charset="); */
/*HTTP_READFIELD_STRING(line, value, "X-Charset", r->charset); */
*pos = 0;
if ((pos = strchr(r->contenttype, ' ')) != NULL) {
*pos = 0;
}
}
}
/* Read data */
if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
if (flags & FETCH_BODY) {
long int fposCurrent = ftell(index->file);
long int metaSize = fposCurrent - fposMeta;
long int fetchSize = (long int) r->size;
if (fetchSize <= 0) {
fetchSize = dataLength - metaSize;
} else if (fetchSize > dataLength - metaSize) {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Truncated Data");
}
r->size = 0;
if (r->statuscode != STATUSCODE_INVALID) {
r->adr = (char *) malloc(fetchSize);
if (r->adr != NULL) {
if (fetchSize > 0
&& (r->size =
(int) fread(r->adr, 1, fetchSize,
index->file)) != fetchSize) {
int last_errno = errno;
r->statuscode = STATUSCODE_INVALID;
sprintf(r->msg, "Read error in cache disk data: %s",
strerror(last_errno));
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Read error (memory exhausted) from cache");
}
}
}
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Read Header Error");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Read Header Error");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "Cache Read Error : Seek Error");
}
} else {
r->statuscode = STATUSCODE_INVALID;
strcpy(r->msg, "File Cache Entry Not Found");
}
if (r->location[0] != '\0') {
r->location = strdup(r->location);
} else {
r->location = NULL;
}
return r;
}
static int PT_LookupCache__Arc(PT_Index index, const char *url) {
int retCode;
MutexLock(&index->slots.formatArc.fileLock);
{
retCode = PT_LookupCache__Arc_u(index, url);
}
MutexUnlock(&index->slots.formatArc.fileLock);
return retCode;
}
static int PT_LookupCache__Arc_u(PT_Index index_, const char *url) {
if (index_ != NULL) {
PT_Index__New cache = (PT_Index__New) & index_->slots.formatNew;
if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
return 0;
if (strncmp(url, "http://", 7) == 0)
url += 7;
if (coucal_read(cache->hash, url, NULL))
return 1;
}
return 0;
}
typedef struct PT_SaveCache__Arc_t {
PT_Indexes indexes;
FILE *fp;
time_t t;
char filename[64];
struct tm buff;
char headers[8192];
char md5[32 + 2];
} PT_SaveCache__Arc_t;
static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element) {
PT_SaveCache__Arc_t *st = (PT_SaveCache__Arc_t *) arg;
FILE *const fp = st->fp;
struct tm *tm = convert_time_rfc822(&st->buff, element->lastmodified);
int size_headers;
sprintf(st->headers,
"HTTP/1.0 %d %s" "\r\n" "X-Server: ProxyTrack " PROXYTRACK_VERSION
"\r\n" "Content-type: %s%s%s%s" "\r\n" "Last-modified: %s" "\r\n"
"Content-length: %d" "\r\n", element->statuscode, element->msg,
/**/ element->contenttype,
(element->charset[0] ? "; charset=\"" : ""),
(element->charset[0] ? element->charset : ""),
(element->charset[0] ? "\"" : ""), /**/ element->lastmodified,
(int) element->size);
if (element->location != NULL && element->location[0] != '\0') {
sprintf(st->headers + strlen(st->headers), "Location: %s" "\r\n",
element->location);
}
if (element->headers != NULL) {
if (strlen(element->headers) <
sizeof(st->headers) - strlen(element->headers) - 1) {
strcat(st->headers, element->headers);
}
}
strcat(st->headers, "\r\n");
size_headers = (int) strlen(st->headers);
/* doc == <nl><URL-record><nl><network_doc> */
/* Format: URL IP date mime result checksum location offset filename length */
if (element->adr != NULL) {
domd5mem(element->adr, element->size, st->md5, 1);
} else {
strcpy(st->md5, "-");
}
fprintf(fp,
/* nl */
"\n"
/* URL-record */
"%s%s %s %04d%02d%02d%02d%02d%02d %s %d %s %s %ld %s %ld"
/* nl */
"\n",
/* args */
(link_has_authority(url) ? "" : "http://"), url, "0.0.0.0",
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour,
tm->tm_min, tm->tm_sec, element->contenttype, element->statuscode,
st->md5, (element->location ? element->location : "-"),
(long int) ftell(fp), st->filename,
(long int) (size_headers + element->size));
/* network_doc */
if (fwrite(st->headers, 1, size_headers, fp) != size_headers
|| (element->size > 0
&& fwrite(element->adr, 1, element->size, fp) != element->size)
) {
return 1; /* Error */
}
return 0;
}
static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename) {
FILE *fp = fopen(filename, "wb");
if (fp != NULL) {
PT_SaveCache__Arc_t st;
int ret;
time_t t = PT_GetTimeIndex(indexes);
struct tm tm = PT_GetTime(t);
/* version-2-block ==
filedesc://<path><sp><ip_address><sp><date><sp>text/plain<sp>200<sp>-<sp>-<sp>0<sp><filename><sp><length><nl>
2<sp><reserved><sp><origin-code><nl>
URL<sp>IP-address<sp>Archive-date<sp>Content-type<sp>Result-code<sp>Checksum<sp>Location<sp> Offset<sp>Filename<sp>Archive-length<nl>
<nl> */
const char *prefix =
"2 0 HTTrack Website Copier" "\n"
"URL IP-address Archive-Date Content-Type Result-code Checksum Location Offset Filename Archive-length"
"\n" "\n";
sprintf(st.filename, "httrack_%d.arc", (int) t);
fprintf(fp,
"filedesc://%s 0.0.0.0 %04d%02d%02d%02d%02d%02d text/plain 200 - - 0 %s %d"
"\n" "%s", st.filename, tm.tm_year + 1900, tm.tm_mon + 1,
tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, st.filename,
(int) strlen(prefix), prefix);
st.fp = fp;
st.indexes = indexes;
st.t = t;
ret = PT_EnumCache(indexes, PT_SaveCache__Arc_Fun, (void *) &st);
fclose(fp);
if (ret != 0)
(void) unlink(filename);
return ret;
}
return -1;
}