mirror of
https://github.com/xroche/httrack.git
synced 2026-06-14 06:14:23 +03:00
Compare commits
8 Commits
test/expan
...
test/filte
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
794404bba2 | ||
|
|
89b25e418b | ||
|
|
017c634c53 | ||
|
|
f2b36c4b29 | ||
|
|
19947efd74 | ||
|
|
de26ad881a | ||
|
|
106d34d82c | ||
|
|
61e0b3250b |
@@ -2899,7 +2899,9 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
}
|
||||
|
||||
{
|
||||
char n_lock[256];
|
||||
/* Sized to the concat-buffer capacity so it can always hold the lock-file
|
||||
path produced by fconcat(), even with a long log path (issue #183). */
|
||||
char n_lock[OPT_GET_BUFF_SIZE(opt)];
|
||||
|
||||
// on peut pas avoir un affichage ET un fichier log
|
||||
// ca sera pour la version 2
|
||||
|
||||
@@ -274,6 +274,28 @@ Please visit our Website: http://www.httrack.com
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
/* Percent-encode the angle brackets of a string so it is safe to embed inside
|
||||
an HTML comment (the default footer) or any other HTML context. A URL holding
|
||||
"-->" would otherwise close the footer comment and inject markup (issue #165).
|
||||
Raw '<' and '>' are not valid URL characters, so encoding them is harmless. */
|
||||
static const char *html_inline_safe(const char *src, char *dst, size_t size) {
|
||||
size_t i, j;
|
||||
|
||||
for(i = 0, j = 0; src[i] != '\0' && j + 4 < size; i++) {
|
||||
const char c = src[i];
|
||||
|
||||
if (c == '<' || c == '>') {
|
||||
dst[j++] = '%';
|
||||
dst[j++] = '3';
|
||||
dst[j++] = (c == '<') ? 'C' : 'E';
|
||||
} else {
|
||||
dst[j++] = c;
|
||||
}
|
||||
}
|
||||
dst[j] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
/* Main parser */
|
||||
int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
char catbuff[CATBUFF_SIZE];
|
||||
@@ -719,13 +741,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
if (StringNotEmpty(opt->footer)) {
|
||||
char BIGSTK tempo[1024 + HTS_URLMAXSIZE * 2];
|
||||
char gmttime[256];
|
||||
char BIGSTK safe_adr[HTS_URLMAXSIZE * 3 + 4];
|
||||
char BIGSTK safe_fil[HTS_URLMAXSIZE * 3 + 4];
|
||||
|
||||
tempo[0] = '\0';
|
||||
time_gmt_rfc822(gmttime);
|
||||
strcatbuff(tempo, eol);
|
||||
hts_template_format_str(tempo + strlen(tempo), sizeof(tempo) - strlen(tempo),
|
||||
StringBuff(opt->footer),
|
||||
jump_identification_const(urladr()), urlfil(), gmttime,
|
||||
html_inline_safe(jump_identification_const(urladr()), safe_adr, sizeof(safe_adr)),
|
||||
html_inline_safe(urlfil(), safe_fil, sizeof(safe_fil)), gmttime,
|
||||
HTTRACK_VERSIONID, /* EOF */ NULL);
|
||||
strcatbuff(tempo, eol);
|
||||
//fwrite(tempo,1,strlen(tempo),fp);
|
||||
|
||||
@@ -193,7 +193,23 @@ HTSEXT_API void hts_mutexfree(htsmutex * mutex) {
|
||||
HTSEXT_API void hts_mutexlock(htsmutex * mutex) {
|
||||
assertf(mutex != NULL);
|
||||
if (*mutex == HTSMUTEX_INIT) { /* must be initialized */
|
||||
hts_mutexinit(mutex);
|
||||
/* Initialize exactly once, even when several threads race to lock the same
|
||||
mutex for the first time. Build our own object, then publish it with a
|
||||
single atomic compare-and-swap; the threads that lose the race free the
|
||||
object they built (issue #297). No static guard is needed, which keeps
|
||||
this safe on Windows 2000 (no statically-initializable lock there). */
|
||||
htsmutex created = HTSMUTEX_INIT;
|
||||
|
||||
hts_mutexinit(&created);
|
||||
#ifdef _WIN32
|
||||
if (InterlockedCompareExchangePointer((PVOID volatile *) mutex, created,
|
||||
HTSMUTEX_INIT) != HTSMUTEX_INIT)
|
||||
#else
|
||||
if (!__sync_bool_compare_and_swap(mutex, HTSMUTEX_INIT, created))
|
||||
#endif
|
||||
{
|
||||
hts_mutexfree(&created);
|
||||
}
|
||||
}
|
||||
assertf(*mutex != NULL);
|
||||
#ifdef _WIN32
|
||||
|
||||
@@ -47,3 +47,25 @@ match '*foo*bar' 'foozbar'
|
||||
|
||||
# '?' is the query-string marker, not a single-char wildcard
|
||||
nomatch 'a?c' 'abc'
|
||||
|
||||
# backslash escapes a metacharacter inside a class so it is matched literally.
|
||||
# Quirk: the decoder also adds the backslash itself to the set, so '\X' matches
|
||||
# both X and '\'. These assertions pin that behavior.
|
||||
match '*[\*]' '*'
|
||||
match '*[\*]' "\\"
|
||||
nomatch '*[\*]' 'a'
|
||||
match '*[\\]' "\\"
|
||||
nomatch '*[\\]' 'a'
|
||||
match '*[\[]' '['
|
||||
match '*[\[]' "\\"
|
||||
nomatch '*[\[]' 'a'
|
||||
|
||||
# A literal ']' cannot be a class member: the class parser stops at the first
|
||||
# ']', escaped or not. So '*[\[\]]' does NOT mean "the [ or ] character" as the
|
||||
# filter guide claims (GitHub #148); it parses as the class {'[','\'} followed
|
||||
# by a trailing literal ']'. These assertions document the current (buggy)
|
||||
# behavior so any future matcher fix is a deliberate, visible change.
|
||||
nomatch '*[\[\]]' '[' # not matched, despite the docs
|
||||
match '*[\[\]]' ']' # only via the empty class-match + trailing ']'
|
||||
match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']'
|
||||
nomatch '*[\[\]]' '[]x'
|
||||
|
||||
Reference in New Issue
Block a user