mirror of
https://github.com/xroche/httrack.git
synced 2026-07-03 15:43:42 +03:00
Compare commits
1 Commits
phase0-par
...
htsparse-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
43478895df |
@@ -441,6 +441,72 @@ void hts_finish_makeindex(httrackp *opt, int *makeindex_done,
|
||||
*makeindex_done = 1;
|
||||
}
|
||||
|
||||
/* Flush the parsed HTML output buffer to disk, skipping the rewrite when the
|
||||
* on-disk MD5 is unchanged. */
|
||||
void hts_finish_html_file(httrackp *opt, cache_back *cache, htsblk *r,
|
||||
FILE **fp, const char *ht_buff, size_t ht_len,
|
||||
const char *adr, const char *fil, const char *save) {
|
||||
char digest[32 + 2];
|
||||
off_t fsize_old =
|
||||
fsize(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), save));
|
||||
int ok = 0;
|
||||
|
||||
digest[0] = '\0';
|
||||
domd5mem(ht_buff, ht_len, digest, 1);
|
||||
if (fsize_old == (off_t) ht_len) {
|
||||
int mlen = 0;
|
||||
char *mbuff;
|
||||
|
||||
cache_readdata(cache, "//[HTML-MD5]//", save, &mbuff, &mlen);
|
||||
if (mlen)
|
||||
mbuff[mlen] = '\0';
|
||||
if ((mlen == 32) && (strcmp(((mbuff != NULL) ? mbuff : ""), digest) == 0)) {
|
||||
ok = 1;
|
||||
hts_log_print(opt, LOG_DEBUG, "File not re-written (md5): %s", save);
|
||||
}
|
||||
freet(mbuff);
|
||||
}
|
||||
if (!ok) {
|
||||
file_notify(opt, adr, fil, save, 1, 1, r->notmodified);
|
||||
*fp = filecreate(&opt->state.strc, save);
|
||||
if (*fp) {
|
||||
if (ht_len > 0 && fwrite(ht_buff, 1, ht_len, *fp) != ht_len) {
|
||||
int fcheck = check_fatal_io_errno();
|
||||
|
||||
if (fcheck)
|
||||
opt->state.exit_xh = -1;
|
||||
if (opt->log) {
|
||||
hts_log_print(opt, LOG_ERROR | LOG_ERRNO,
|
||||
"Unable to write HTML file %s", save);
|
||||
if (fcheck)
|
||||
hts_log_print(opt, LOG_ERROR, "* * Fatal write error, giving up");
|
||||
}
|
||||
}
|
||||
fclose(*fp);
|
||||
*fp = NULL;
|
||||
if (strnotempty(r->lastmodified))
|
||||
set_filetime_rfc822(save, r->lastmodified);
|
||||
} else {
|
||||
int fcheck = check_fatal_io_errno();
|
||||
|
||||
if (fcheck) {
|
||||
hts_log_print(opt, LOG_ERROR,
|
||||
"Mirror aborted: disk full or filesystem problems");
|
||||
opt->state.exit_xh = -1;
|
||||
}
|
||||
hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to save file %s", save);
|
||||
if (fcheck)
|
||||
hts_log_print(opt, LOG_ERROR, "* * Fatal write error, giving up");
|
||||
}
|
||||
} else {
|
||||
file_notify(opt, adr, fil, save, 0, 0, r->notmodified);
|
||||
filenote(&opt->state.strc, save, NULL);
|
||||
}
|
||||
if (cache->ndx)
|
||||
cache_writedata(cache->ndx, cache->dat, "//[HTML-MD5]//", save, digest,
|
||||
(int) strlen(digest));
|
||||
}
|
||||
|
||||
/* does it look like XML ? (SVG et al.) */
|
||||
static int look_like_xml(const char *s) {
|
||||
return strncmp(s, "<?xml", 5) == 0
|
||||
|
||||
@@ -370,6 +370,12 @@ void hts_finish_makeindex(httrackp *opt, int *makeindex_done,
|
||||
const char *template_footer, const char *adr,
|
||||
const char *fil);
|
||||
|
||||
// Flush ht_buff[0..ht_len] to save on disk (skip if MD5 unchanged); *fp
|
||||
// closed+NULLed on write. Precondition: ht_len>0.
|
||||
void hts_finish_html_file(httrackp *opt, cache_back *cache, htsblk *r,
|
||||
FILE **fp, const char *ht_buff, size_t ht_len,
|
||||
const char *adr, const char *fil, const char *save);
|
||||
|
||||
int filters_init(char ***ptrfilters, int maxfilter, int filterinc);
|
||||
|
||||
int fspc(httrackp * opt, FILE * fp, const char *type);
|
||||
|
||||
@@ -106,73 +106,6 @@ Please visit our Website: http://www.httrack.com
|
||||
// does nothing
|
||||
#define XH_uninit do {} while(0)
|
||||
|
||||
/* clang-format off: an edit realigns all backslashes, churning the macro. */
|
||||
/* clang-format off */
|
||||
#define HT_ADD_END { \
|
||||
int ok=0;\
|
||||
if (TypedArraySize(output_buffer) != 0) { \
|
||||
const size_t ht_len = TypedArraySize(output_buffer); \
|
||||
const char *const ht_buff = TypedArrayElts(output_buffer); \
|
||||
char digest[32+2];\
|
||||
off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),savename()));\
|
||||
digest[0] = '\0';\
|
||||
domd5mem(TypedArrayElts(output_buffer), ht_len, digest, 1);\
|
||||
if (fsize_old == (off_t) ht_len) { \
|
||||
int mlen = 0;\
|
||||
char* mbuff;\
|
||||
cache_readdata(cache,"//[HTML-MD5]//",savename(),&mbuff,&mlen);\
|
||||
if (mlen) \
|
||||
mbuff[mlen]='\0';\
|
||||
if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
|
||||
ok=1;\
|
||||
hts_log_print(opt, LOG_DEBUG, "File not re-written (md5): %s",savename());\
|
||||
} else {\
|
||||
ok=0;\
|
||||
} \
|
||||
freet(mbuff);\
|
||||
}\
|
||||
if (!ok) { \
|
||||
file_notify(opt,urladr(), urlfil(), savename(), 1, 1, r->notmodified); \
|
||||
fp=filecreate(&opt->state.strc, savename()); \
|
||||
if (fp) { \
|
||||
if (ht_len>0) {\
|
||||
if (fwrite(ht_buff,1,ht_len,fp) != ht_len) { \
|
||||
int fcheck;\
|
||||
if ((fcheck=check_fatal_io_errno())) {\
|
||||
opt->state.exit_xh=-1;\
|
||||
}\
|
||||
if (opt->log) { \
|
||||
hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to write HTML file %s", savename());\
|
||||
if (fcheck) {\
|
||||
hts_log_print(opt, LOG_ERROR, "* * Fatal write error, giving up");\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
fclose(fp); fp=NULL; \
|
||||
if (strnotempty(r->lastmodified)) \
|
||||
set_filetime_rfc822(savename(),r->lastmodified); \
|
||||
} else {\
|
||||
int fcheck;\
|
||||
if ((fcheck=check_fatal_io_errno())) {\
|
||||
hts_log_print(opt, LOG_ERROR, "Mirror aborted: disk full or filesystem problems"); \
|
||||
opt->state.exit_xh=-1;\
|
||||
}\
|
||||
hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to save file %s", savename());\
|
||||
if (fcheck) {\
|
||||
hts_log_print(opt, LOG_ERROR, "* * Fatal write error, giving up");\
|
||||
}\
|
||||
}\
|
||||
} else {\
|
||||
file_notify(opt,urladr(), urlfil(), savename(), 0, 0, r->notmodified); \
|
||||
filenote(&opt->state.strc, savename(),NULL); \
|
||||
}\
|
||||
if (cache->ndx)\
|
||||
cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename(),digest,(int)strlen(digest));\
|
||||
} \
|
||||
TypedArrayFree(output_buffer); \
|
||||
}
|
||||
/* clang-format on */
|
||||
#define HT_ADD_FOP
|
||||
|
||||
#define ENGINE_DEFINE_CONTEXT() \
|
||||
@@ -232,8 +165,6 @@ Please visit our Website: http://www.httrack.com
|
||||
/* */ \
|
||||
*stre->stat_fragment_ = stat_fragment
|
||||
|
||||
#define _FILTERS (*opt->filters.filters)
|
||||
#define _FILTERS_PTR (opt->filters.filptr)
|
||||
#define _ROBOTS ((robots_wizard*)opt->robotsptr)
|
||||
|
||||
/* Apply current *adr character for the script automate */
|
||||
@@ -3528,7 +3459,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
|
||||
}
|
||||
|
||||
/* Flush and save to disk */
|
||||
HT_ADD_END; // achever
|
||||
if (TypedArraySize(output_buffer) != 0) {
|
||||
hts_finish_html_file(
|
||||
opt, cache, r, &fp, TypedArrayElts(output_buffer),
|
||||
TypedArraySize(output_buffer), urladr(), urlfil(), savename());
|
||||
}
|
||||
TypedArrayFree(output_buffer);
|
||||
}
|
||||
//
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user