9 Commits

Author SHA1 Message Date
Xavier Roche
6d742104a1 3.48.7 tag 2014-05-14 18:42:23 +00:00
Xavier Roche
16aec722bf 3.48.7 2014-05-14 17:57:18 +00:00
Xavier Roche
4ff55249ed 3.48.7 2014-05-14 17:51:09 +00:00
Xavier Roche
fea8122ed3 Fixed hashtable corruption because of dirty code directly modifying the host address in memory, leading to have hashtable positions not anymore valid.
This issue was especially triggered when a redirect was processed ("Warning moved treated for .." messages)
  * closes: #43
2014-05-14 17:48:04 +00:00
Xavier Roche
7323230eb3 Added debug logging facility. 2014-05-14 17:45:51 +00:00
Xavier Roche
c9f656fdeb Better stdarg.h 2014-05-13 20:12:10 +00:00
Xavier Roche
042525a1db #include <stdarg.h> 2014-05-13 20:11:41 +00:00
Xavier Roche
3fde59c090 #include <stdarg.h> 2014-05-13 20:11:02 +00:00
Xavier Roche
edaaa73328 #include <stdarg.h> 2014-05-13 20:08:53 +00:00
9 changed files with 158 additions and 6 deletions

View File

@@ -2355,7 +2355,7 @@ void host_ban(httrackp * opt, lien_url ** liens, int ptr, int lien_tot,
if (strfield2(jump_identification(liens[i]->adr), host)) { // host
hts_log_print(opt, LOG_DEBUG, "Cancel: %s%s", liens[i]->adr,
liens[i]->fil);
strcpybuff(liens[i]->adr, "!"); // cancel (invalide hash)
hash_invalidate_entry(opt->hash, i); // invalidate hashtable entry
// on efface pas le hash, because si on rencontre le lien, reverif sav..
}
} else {

View File

@@ -36,8 +36,8 @@ Please visit our Website: http://www.httrack.com
#define HTTRACK_GLOBAL_DEFH
// Version (also check external version information)
#define HTTRACK_VERSION "3.48-6"
#define HTTRACK_VERSIONID "3.48.6"
#define HTTRACK_VERSION "3.48-7"
#define HTTRACK_VERSIONID "3.48.7"
#define HTTRACK_AFF_VERSION "3.x"
#define HTTRACK_LIB_VERSION "2.0"

View File

@@ -85,6 +85,14 @@ static int key_sav_equals(void *arg, const char *a, const char *b) {
return strcasecmp(a, b) == 0;
}
static const char* key_sav_debug_print(void *arg, const char *a) {
return a;
}
static const char* value_sav_debug_print(void *arg, void *a) {
return (char*) a;
}
/* Pseudo-key (lien_url structure) hash function */
static inthash_keys key_adrfil_hashes_generic(void *arg, const char *value_,
const int former) {
@@ -151,6 +159,32 @@ static int key_adrfil_equals_generic(void *arg, const char *a_, const char *b_,
}
}
static const char* key_adrfil_debug_print_(void *arg, const char *a_, const int former) {
hash_struct *const hash = (hash_struct*) arg;
const int normalized = hash->normalized;
const lien_url*const a = (lien_url*) a_;
const char *const a_adr = !former ? a->adr : a->former_adr;
const char *const a_fil = !former ? a->fil : a->former_fil;
snprintf(hash->normfil, sizeof(hash->normfil), "%s%s", a_adr, a_fil);
return hash->normfil;
}
static const char* key_adrfil_debug_print(void *arg, const char *a_) {
return key_adrfil_debug_print_(arg, a_, 0);
}
static const char* key_former_adrfil_debug_print(void *arg, const char *a_) {
return key_adrfil_debug_print_(arg, a_, 1);
}
static const char* value_adrfil_debug_print(void *arg, void *value) {
hash_struct *const hash = (hash_struct*) arg;
inthash_value v;
v.ptr = value;
snprintf(hash->normfil2, sizeof(hash->normfil2), "%d", (int) v.intg);
return hash->normfil2;
}
/* "adr"/"fil" lien_url structure members hashing function */
static inthash_keys key_adrfil_hashes(void *arg, const char *value_) {
return key_adrfil_hashes_generic(arg, value_, 0);
@@ -207,6 +241,20 @@ void hash_init(httrackp *opt, hash_struct * hash, int normalized) {
key_former_adrfil_hashes,
key_former_adrfil_equals,
hash);
/* pretty-printing */
inthash_set_print_handler(hash->sav,
key_sav_debug_print,
value_sav_debug_print,
NULL);
inthash_set_print_handler(hash->adrfil,
key_adrfil_debug_print,
value_adrfil_debug_print,
hash);
inthash_set_print_handler(hash->former_adrfil,
key_former_adrfil_debug_print,
value_adrfil_debug_print,
hash);
}
void hash_free(hash_struct *hash) {
@@ -272,3 +320,14 @@ void hash_write(hash_struct * hash, int lpos) {
inthash_write(hash->former_adrfil, (char*) hash->liens[lpos], lpos);
}
}
void hash_invalidate_entry(hash_struct * hash, int lpos) {
if (inthash_remove(hash->adrfil, (char*) hash->liens[lpos])) {
/* devalidate entry now it is removed from hashtable */
strcpybuff(hash->liens[lpos]->adr, "!");
/* add back */
inthash_write(hash->adrfil, (char*) hash->liens[lpos], lpos);
} else {
assertf(! "error invalidating hash entry");
}
}

View File

@@ -56,6 +56,7 @@ void hash_free(hash_struct *hash);
int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
hash_struct_type type);
void hash_write(hash_struct * hash, int lpos);
void hash_invalidate_entry(hash_struct * hash, int lpos);
int *hash_calc_chaine(hash_struct * hash, hash_struct_type type, int pos);
unsigned long int hash_cle(const char *nom1, const char *nom2);
#endif

View File

@@ -187,6 +187,16 @@ struct struct_inthash {
/** hashtable name for logging **/
const char *name;
} error;
/** How to handle pretty-print (debug) (might be NULL). **/
struct {
/** key print() **/
t_inthash_printkeyhandler key;
/** value print() **/
t_inthash_printvaluehandler value;
/** opaque argument **/
void *arg;
} print;
} custom;
};
@@ -889,10 +899,59 @@ static int inthash_add_item_(inthash hashtable, inthash_item item) {
(int) hashtable->stash.size);
return 1; /* added */
} else {
/* debugging */
if (hashtable->custom.print.key != NULL
&& hashtable->custom.print.value != NULL) {
size_t i;
for(i = 0 ; i < hashtable->stash.size ; i++) {
inthash_item *const item = &hashtable->stash.items[i];
const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1);
const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2);
inthash_crit(hashtable, "stash[%u]: key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x",
(int) i,
hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
(int) pos1, (int) pos2,
item->hashes.hash1, item->hashes.hash2);
if (!inthash_is_free(hashtable, pos1)) {
inthash_item *const item = &hashtable->items[pos1];
const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1);
const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2);
inthash_crit(hashtable, "\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x",
hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
(int) pos1, (int) pos2,
item->hashes.hash1, item->hashes.hash2);
} else {
inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos1);
}
if (!inthash_is_free(hashtable, pos2)) {
inthash_item *const item = &hashtable->items[pos2];
const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1);
const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2);
inthash_crit(hashtable, "\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x",
hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
(int) pos1, (int) pos2,
item->hashes.hash1, item->hashes.hash2);
} else {
inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos2);
}
}
//struct_inthash_enum e = inthash_enum_new(hashtable);
//while((item = inthash_enum_next(&e)) != NULL) {
// inthash_crit(hashtable, "element key='%s' value='%s' hash1=%04x hash2=%04x",
// hashtable->custom.print.key(hashtable->custom.print.arg, item->name),
// hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr),
// item->hashes.hash1, item->hashes.hash2);
//}
}
/* we are doomed. hopefully the probability is lower than being killed
by a wandering radioactive monkey */
inthash_log_stats(hashtable);
inthash_assert(hashtable, ! "hashtable internal error: cuckoo/stash collision");
/* not reachable code */
return -1;
}
@@ -1235,9 +1294,13 @@ inthash inthash_new(size_t initial_size) {
hashtable->custom.key.hash = NULL;
hashtable->custom.key.equals = NULL;
hashtable->custom.key.arg = NULL;
hashtable->custom.error.log = NULL;
hashtable->custom.error.fatal = NULL;
hashtable->custom.error.name = NULL;
hashtable->custom.error.arg = NULL;
hashtable->custom.print.key = NULL;
hashtable->custom.print.value = NULL;
hashtable->custom.print.arg = NULL;
}
return hashtable;
}
@@ -1288,12 +1351,20 @@ void inthash_set_assert_handler(inthash hashtable,
t_inthash_loghandler log,
t_inthash_asserthandler fatal,
void *arg) {
inthash_assert(hashtable, fatal != NULL);
hashtable->custom.error.log = log;
hashtable->custom.error.fatal = fatal;
hashtable->custom.error.arg = arg;
}
void inthash_set_print_handler(inthash hashtable,
t_inthash_printkeyhandler key,
t_inthash_printvaluehandler value,
void *arg) {
hashtable->custom.print.key = key;
hashtable->custom.print.value = value;
hashtable->custom.print.arg = arg;
}
size_t inthash_nitems(inthash hashtable) {
if (hashtable != NULL)
return hashtable->used;

View File

@@ -67,6 +67,7 @@ typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif
#include <stdarg.h>
/** Value. **/
typedef union inthash_value {
@@ -140,6 +141,12 @@ typedef void (*t_inthash_loghandler)(void *arg, inthash_loglevel level,
/** Hashtable fatal assertion failure. **/
typedef void (*t_inthash_asserthandler)(void *arg, const char* exp, const char* file, int line);
/** Key printer (debug) **/
typedef const char* (*t_inthash_printkeyhandler)(void *arg, const char *name);
/** Value printer (debug) **/
typedef const char* (*t_inthash_printvaluehandler)(void *arg, void *value);
/**
* Value comparison handler (returns non-zero value if strings are equal).
**/
@@ -240,6 +247,18 @@ void inthash_set_assert_handler(inthash hashtable,
t_inthash_asserthandler fatal,
void *arg);
/**
* Set pretty print loggers (debug). Both handlers must return a string
* pointer which shall be valid until the next call. Both key and value
* pointers shall be valid at the same time.
* name: handler called to print the string representation of the name
* value: handler called to print the string representation of the value
**/
void inthash_set_print_handler(inthash hashtable,
t_inthash_printkeyhandler key,
t_inthash_printvaluehandler value,
void *arg);
/**
* Set the hashtable name, for degugging purpose.
* name: the hashtable name (ASCII or UTF-8)

View File

@@ -62,6 +62,7 @@ Please visit our Website: http://www.httrack.com
#include <unistd.h>
#endif
#endif /* _WIN32 */
#include <stdarg.h>
#include <string.h>
#include <time.h>

View File

@@ -3606,7 +3606,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
urladr, urlfil, mov_adr, mov_fil);
// canceller lien actuel
error = 1;
strcpybuff(liens[ptr]->adr, "!"); // caractère bidon (invalide hash)
hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
// noter NOUVEAU lien
//xxc xxc
// set_prio_to=0+1; // protection if the moved URL is an html page!!
@@ -3742,7 +3742,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//
// canceller lien actuel
error = 1;
strcpybuff(liens[ptr]->adr, "!"); // caractère bidon (invalide hash)
hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
//
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);

View File

@@ -38,6 +38,7 @@ Please visit our Website: http://www.httrack.com
#ifndef _WIN32
#include <inttypes.h>
#endif
#include <stdarg.h>
#ifndef HTS_DEF_FWSTRUCT_httrackp
#define HTS_DEF_FWSTRUCT_httrackp