mirror of
https://github.com/xroche/httrack.git
synced 2026-06-25 11:37:28 +03:00
Compare commits
2 Commits
cleanup/ht
...
cleanup/ht
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
88bfcff10c | ||
|
|
1df45fc231 |
@@ -3862,7 +3862,8 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
|
||||
opt->savename_83 = b;
|
||||
if (r != -1 && !forbidden_url) {
|
||||
if (savename()) {
|
||||
if (lienrelatif(tempo, afs.save, savename()) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), afs.save, savename()) ==
|
||||
0) {
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"(module): relative link at %s build with %s and %s: %s",
|
||||
afs.af.adr, afs.save, savename(), tempo);
|
||||
|
||||
@@ -431,6 +431,50 @@ static void basic_selftests(void) {
|
||||
assertf(strcmp(b + len - 4, ".htm") == 0);
|
||||
}
|
||||
}
|
||||
// longfile_to_83(): single-name 8-3 (mode 1) / ISO9660 (mode 2) conversion;
|
||||
// uppercases, clamps the name (8 / 31) and the extension (3). It rewrites
|
||||
// 'save' in place, so pass a mutable array.
|
||||
{
|
||||
char n83[256];
|
||||
|
||||
{
|
||||
char save[] = "longfilename.html";
|
||||
|
||||
longfile_to_83(1, n83, sizeof(n83), save); // 8-3: name->8, ext->3
|
||||
assertf(strcmp(n83, "LONGFILE.HTM") == 0);
|
||||
}
|
||||
{
|
||||
char save[] = "longfilename.html";
|
||||
|
||||
longfile_to_83(2, n83, sizeof(n83), save); // ISO9660: name->31, ext->3
|
||||
assertf(strcmp(n83, "LONGFILENAME.HTM") == 0);
|
||||
}
|
||||
{ // sanitization: leading '.'->'_', interior dots
|
||||
char save[] = ".a b.c.d e"; // collapse to '_', spaces/specials -> '_'
|
||||
// (only the last dot stays as the separator)
|
||||
longfile_to_83(1, n83, sizeof(n83), save);
|
||||
assertf(strcmp(n83, "_A_B_C.D_E") == 0);
|
||||
}
|
||||
}
|
||||
// long_to_83(): per-segment 8-3 conversion of a whole path.
|
||||
{
|
||||
char n83[HTS_URLMAXSIZE * 2];
|
||||
char save[] = "dir/longfilename.html";
|
||||
|
||||
long_to_83(1, n83, sizeof(n83), save);
|
||||
assertf(strcmp(n83, "DIR/LONGFILE.HTM") == 0);
|
||||
}
|
||||
// lienrelatif(): relative path from the directory of curr_fil to link.
|
||||
{
|
||||
char s[HTS_URLMAXSIZE * 2];
|
||||
|
||||
// same directory -> just the basename
|
||||
assertf(lienrelatif(s, sizeof(s), "dir/page.html", "dir/index.html") == 0);
|
||||
assertf(strcmp(s, "page.html") == 0);
|
||||
// link one level up -> a "../" prefix
|
||||
assertf(lienrelatif(s, sizeof(s), "a.html", "dir/index.html") == 0);
|
||||
assertf(strcmp(s, "../a.html") == 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
|
||||
|
||||
@@ -925,7 +925,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
|
||||
pth[0] = n83[0] = '\0';
|
||||
strncatbuff(pth, fil, (int) (nom_pos - fil) - 1);
|
||||
long_to_83(opt->savename_83, n83, pth);
|
||||
long_to_83(opt->savename_83, n83, sizeof(n83), pth);
|
||||
htsbuff_cat(&sb, n83);
|
||||
}
|
||||
}
|
||||
@@ -1307,7 +1307,7 @@ int url_savename(lien_adrfilsave *const afs,
|
||||
if (opt->savename_83) {
|
||||
char BIGSTK n83[HTS_URLMAXSIZE * 2];
|
||||
|
||||
long_to_83(opt->savename_83, n83, afs->save);
|
||||
long_to_83(opt->savename_83, n83, sizeof(n83), afs->save);
|
||||
strcpybuff(afs->save, n83);
|
||||
}
|
||||
// enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson)
|
||||
|
||||
@@ -610,11 +610,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
b = strchr(a, '<'); // prochain tag
|
||||
}
|
||||
}
|
||||
if (lienrelatif
|
||||
(tempo, heap(ptr)->sav,
|
||||
concat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_html_utf8),
|
||||
"index.html")) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), heap(ptr)->sav,
|
||||
concat(OPT_GET_BUFF(opt),
|
||||
OPT_GET_BUFF_SIZE(opt),
|
||||
StringBuff(opt->path_html_utf8),
|
||||
"index.html")) == 0) {
|
||||
detect_title = 1; // ok détecté pour cette page!
|
||||
makeindex_links++; // un de plus
|
||||
strcpybuff(makeindex_firstlink, tempo);
|
||||
@@ -2720,7 +2720,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
|
||||
strcpybuff(save, StringBuff(opt->path_html_utf8));
|
||||
strcatbuff(save, cat_name);
|
||||
if (lienrelatif(tempo, save, relativesavename()) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), save,
|
||||
relativesavename()) == 0) {
|
||||
/* Never escape high-chars (we don't know the encoding!!) */
|
||||
inplace_escape_uri_utf(tempo, sizeof(tempo)); // escape with %xx
|
||||
//if (!no_esc_utf)
|
||||
@@ -2950,7 +2951,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
tempo[0] = '\0';
|
||||
// calculer le lien relatif
|
||||
|
||||
if (lienrelatif(tempo, afs.save, relativesavename()) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), afs.save,
|
||||
relativesavename()) == 0) {
|
||||
if (!in_media) { // In media (such as real audio): don't patch
|
||||
/* Never escape high-chars (we don't know the encoding!!) */
|
||||
inplace_escape_uri_utf(tempo, sizeof(tempo));
|
||||
|
||||
@@ -274,7 +274,9 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
|
||||
char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a));
|
||||
if (idna != NULL) {
|
||||
if (strlen(idna) < HTS_URLMAXSIZE) {
|
||||
strcpybuff(a, idna);
|
||||
/* a points within adrfil->adr; bound by the remaining capacity */
|
||||
strlcpybuff(a, idna,
|
||||
sizeof(adrfil->adr) - (size_t) (a - adrfil->adr));
|
||||
}
|
||||
free(idna);
|
||||
}
|
||||
@@ -286,7 +288,7 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
|
||||
|
||||
// créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu)
|
||||
// un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif
|
||||
int lienrelatif(char *s, const char *link, const char *curr_fil) {
|
||||
int lienrelatif(char *s, size_t ssize, const char *link, const char *curr_fil) {
|
||||
char BIGSTK _curr[HTS_URLMAXSIZE * 2];
|
||||
char BIGSTK newcurr_fil[HTS_URLMAXSIZE * 2], newlink[HTS_URLMAXSIZE * 2];
|
||||
char *curr;
|
||||
@@ -314,9 +316,9 @@ int lienrelatif(char *s, const char *link, const char *curr_fil) {
|
||||
}
|
||||
}
|
||||
|
||||
// recopier uniquement le chemin courant
|
||||
// copy only the current path
|
||||
curr = _curr;
|
||||
strcpybuff(curr, curr_fil);
|
||||
strlcpybuff(curr, curr_fil, sizeof(_curr));
|
||||
if ((a = strchr(curr, '?')) == NULL) // couper au ? (params)
|
||||
a = curr + strlen(curr) - 1; // pas de params: aller à la fin
|
||||
while((*a != '/') && (a > curr))
|
||||
@@ -359,14 +361,14 @@ int lienrelatif(char *s, const char *link, const char *curr_fil) {
|
||||
a++;
|
||||
while(*a)
|
||||
if (*(a++) == '/')
|
||||
strcatbuff(s, "../");
|
||||
strlcatbuff(s, "../", ssize);
|
||||
//if (strlen(s)==0) strcatbuff(s,"/");
|
||||
|
||||
if (slash)
|
||||
strcatbuff(s, "/"); // garder absolu!!
|
||||
strlcatbuff(s, "/", ssize); // keep it absolute!
|
||||
|
||||
// on est dans le répertoire de départ, copier
|
||||
strcatbuff(s, link + ((*link == '/') ? 1 : 0));
|
||||
// we are in the starting directory, copy
|
||||
strlcatbuff(s, link + ((*link == '/') ? 1 : 0), ssize);
|
||||
|
||||
/* Security check */
|
||||
if (strlen(s) >= HTS_URLMAXSIZE)
|
||||
@@ -410,7 +412,7 @@ int link_has_authorization(const char *lien) {
|
||||
}
|
||||
|
||||
// conversion chemin de fichier/dossier vers 8-3 ou ISO9660
|
||||
void long_to_83(int mode, char *n83, char *save) {
|
||||
void long_to_83(int mode, char *n83, size_t n83size, char *save) {
|
||||
n83[0] = '\0';
|
||||
|
||||
while(*save) {
|
||||
@@ -425,19 +427,19 @@ void long_to_83(int mode, char *n83, char *save) {
|
||||
}
|
||||
fnl[j] = '\0';
|
||||
// conversion
|
||||
longfile_to_83(mode, fn83, fnl);
|
||||
strcatbuff(n83, fn83);
|
||||
longfile_to_83(mode, fn83, sizeof(fn83), fnl);
|
||||
strlcatbuff(n83, fn83, n83size);
|
||||
|
||||
save += i;
|
||||
if (*save == '/') {
|
||||
strcatbuff(n83, "/");
|
||||
strlcatbuff(n83, "/", n83size);
|
||||
save++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660
|
||||
void longfile_to_83(int mode, char *n83, char *save) {
|
||||
void longfile_to_83(int mode, char *n83, size_t n83size, char *save) {
|
||||
int j = 0, max = 0;
|
||||
int i = 0;
|
||||
char nom[256];
|
||||
@@ -526,10 +528,10 @@ void longfile_to_83(int mode, char *n83, char *save) {
|
||||
}
|
||||
// corriger vers 8-3
|
||||
n83[0] = '\0';
|
||||
strncatbuff(n83, nom, max);
|
||||
strlncatbuff(n83, nom, n83size, max);
|
||||
if (strnotempty(ext)) {
|
||||
strcatbuff(n83, ".");
|
||||
strncatbuff(n83, ext, 3);
|
||||
strlcatbuff(n83, ".", n83size);
|
||||
strlncatbuff(n83, ext, n83size, 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -61,11 +61,11 @@ typedef struct lien_adrfilsave lien_adrfilsave;
|
||||
int ident_url_relatif(const char *lien, const char *origin_adr,
|
||||
const char *origin_fil,
|
||||
lien_adrfil* const adrfil);
|
||||
int lienrelatif(char *s, const char *link, const char *curr);
|
||||
int lienrelatif(char *s, size_t ssize, const char *link, const char *curr);
|
||||
int link_has_authority(const char *lien);
|
||||
int link_has_authorization(const char *lien);
|
||||
void long_to_83(int mode, char *n83, char *save);
|
||||
void longfile_to_83(int mode, char *n83, char *save);
|
||||
void long_to_83(int mode, char *n83, size_t n83size, char *save);
|
||||
void longfile_to_83(int mode, char *n83, size_t n83size, char *save);
|
||||
HTS_INLINE int __rech_tageq(const char *adr, const char *s);
|
||||
HTS_INLINE int __rech_tageqbegdigits(const char *adr, const char *s);
|
||||
HTS_INLINE int rech_tageq_all(const char *adr, const char *s);
|
||||
|
||||
@@ -223,8 +223,9 @@ static int hts_acceptlink_(httrackp * opt, int ptr,
|
||||
// note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
|
||||
// ex: si on descend 2 fois on peut remonter 1 fois
|
||||
|
||||
if (lienrelatif(tempo, fil, heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, fil, heap(ptr)->fil) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), fil,
|
||||
heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, sizeof(tempo2), fil, heap(ptr)->fil) == 0) {
|
||||
hts_log_print(opt, LOG_DEBUG,
|
||||
"build relative links to test: %s %s (with %s and %s)",
|
||||
tempo, tempo2, heap(heap(ptr)->premier)->fil,
|
||||
@@ -326,8 +327,9 @@ static int hts_acceptlink_(httrackp * opt, int ptr,
|
||||
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
|
||||
char BIGSTK tempo2[HTS_URLMAXSIZE * 2];
|
||||
|
||||
if (lienrelatif(tempo, fil, heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, fil, heap(ptr)->fil) == 0) {
|
||||
if (lienrelatif(tempo, sizeof(tempo), fil,
|
||||
heap(heap(ptr)->premier)->fil) == 0) {
|
||||
if (lienrelatif(tempo2, sizeof(tempo2), fil, heap(ptr)->fil) == 0) {
|
||||
} else {
|
||||
hts_log_print(opt, LOG_ERROR,
|
||||
"Error building relative link %s and %s", fil,
|
||||
@@ -336,7 +338,6 @@ static int hts_acceptlink_(httrackp * opt, int ptr,
|
||||
} else {
|
||||
hts_log_print(opt, LOG_ERROR, "Error building relative link %s and %s",
|
||||
fil, heap(heap(ptr)->premier)->fil);
|
||||
|
||||
}
|
||||
} // fin tester interdiction de monter
|
||||
|
||||
|
||||
Reference in New Issue
Block a user