Files
httrack/libtest/callbacks-example-contentfilter.c
Xavier Roche 07f4b00367 Indenting cleanup for all *.[ch] files, except htsparse.c (too ugly to be automatically indented for now) and /minizip/, /mmsrip/ (external files)
setup:
  indent -l80 -lc80 -nhnl -nut -bad -bap -bbo -br -brf -bli2 -brs -bls -br -ss -sai -pmt -nsaw -nsaf -nprs -i2 -ce -npsl -npcs -cs -sob -cdw -nbc -lp

logs:
indent: ./src/htsback.c:157: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:1417: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:1826: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:1833: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:1981: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:2685: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:2747: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:2861: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:3128: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsback.c:3512: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htswizard.c:140: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htswizard.c:597: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htswizard.c:598: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htswizard.c:611: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscoremain.c:97: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscoremain.c:106: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscoremain.c:106: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscoremain.c:256: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscoremain.c:262: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscoremain.c:935: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:271: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:272: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:273: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:274: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:275: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:276: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:277: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:278: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:279: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:280: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:281: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:662: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/httrack.c:752: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:413: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:414: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:415: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:416: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:417: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:418: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:419: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:420: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:421: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:422: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsweb.c:423: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:188: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:507: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:508: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:509: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:510: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:511: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:512: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:580: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.c:581: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:115: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:246: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:247: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:248: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:249: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:250: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:251: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:314: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/proxy/proxytrack.h:315: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:151: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:158: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:161: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:166: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:174: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:177: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:188: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:243: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:245: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:248: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:257: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htstools.c:647: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htsbauth.c:364: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsbauth.c:387: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htscache.c:473: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsserver.h:112: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsserver.h:187: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsjava.c:312: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsjava.c:379: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsjava.c:407: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsjava.c:472: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsjava.c:483: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htslib.c:593: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:809: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:1743: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:1874: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:1896: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:1984: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2085: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2604: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2605: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2606: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2607: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2608: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2609: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2672: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2673: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2906: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2928: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:2996: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:4802: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htslib.c:5353: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:5354: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:5355: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:5357: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:5429: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htslib.c:5435: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscore.c:208: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htscore.c:277: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htscore.c:279: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htscore.c:357: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htscore.c:394: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htscore.c:1544: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscore.c:3330: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscore.c:3361: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htshash.c:140: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htshash.c:217: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsname.c:733: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htsname.c:749: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htsname.c:933: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htsname.c:1520: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htszlib.c:76: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htszlib.c:81: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htscatchurl.c:268: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsinthash.c:156: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsinthash.c:159: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsinthash.c:176: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsinthash.c:179: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsinthash.c:226: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsinthash.c:253: Warning:old style assignment ambiguity in "=&".  Assuming "= &"
indent: ./src/htsftp.c:169: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsftp.c:177: Warning:old style assignment ambiguity in "=-".  Assuming "= -"
indent: ./src/htsfilters.c:67: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
indent: ./src/htsfilters.c:80: Warning:old style assignment ambiguity in "=*".  Assuming "= *"
2013-05-14 20:30:28 +00:00

168 lines
5.0 KiB
C
Executable File

/*
HTTrack external callbacks example : crawling html pages depending on content
Example of <wrappername>_init and <wrappername>_exit call (httrack >> 3.31)
.c file
How to build: (callback.so or callback.dll)
With GNU-GCC:
gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack2
With MS-Visual C++:
cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack.lib
Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
How to use:
httrack --wrapper mycallback,stringtofind,stringtofind.. ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Standard httrack module includes */
#include "httrack-library.h"
#include "htsopt.h"
#include "htsdefines.h"
/* Local function definitions */
static int process(t_hts_callbackarg * carg, httrackp * opt, char *html,
int len, const char *address, const char *filename);
static int end(t_hts_callbackarg * carg, httrackp * opt);
/* external functions */
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv);
/* TOLOWER */
#define TOLOWER_(a) (a >= 'A' && a <= 'Z') ? (a + ('a' - 'A')) : a
#define TOLOWER(a) ( TOLOWER_( (a) ) )
/*
This sample just crawls pages that contains certain keywords, and skips the other ones
*/
typedef struct t_my_userdef {
char stringfilter[8192];
char *stringfilters[128];
} t_my_userdef;
/*
module entry point
*/
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv) {
const char *arg = strchr(argv, ',');
if (arg != NULL)
arg++;
/* Check args */
if (arg == NULL || *arg == '\0') {
fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
fprintf(stderr,
"usage: httrack --wrapper callback,stringtofind,stringtofind..\n");
fprintf(stderr, "example: httrack --wrapper callback,apple,orange,lemon\n");
return 0;
} else {
t_my_userdef *userdef = (t_my_userdef *) malloc(sizeof(t_my_userdef)); /* userdef */
char *const stringfilter = userdef->stringfilter;
char **const stringfilters = userdef->stringfilters;
/* */
char *a = stringfilter;
int i = 0;
fprintf(stderr, "** info: wrapper_init(%s) called!\n", arg);
fprintf(stderr,
"** callback example: crawling pages only if specific keywords are found\n");
/* stringfilters = split(arg, ','); */
strcpy(stringfilter, arg);
while(a != NULL) {
stringfilters[i] = a;
a = strchr(a, ',');
if (a != NULL) {
*a = '\0';
a++;
}
fprintf(stderr, "** callback info: will crawl pages with '%s' in them\n",
stringfilters[i]);
i++;
}
stringfilters[i++] = NULL;
/* Plug callback functions */
CHAIN_FUNCTION(opt, check_html, process, userdef);
CHAIN_FUNCTION(opt, end, end, userdef);
}
return 1; /* success */
}
static int process(t_hts_callbackarg * carg, httrackp * opt, char *html,
int len, const char *address, const char *filename) {
t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);
/*char * const stringfilter = userdef->stringfilter; */
char **const stringfilters = userdef->stringfilters;
/* */
int i = 0;
int getIt = 0;
char *pos;
/* Call parent functions if multiple callbacks are chained. */
if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
if (!CALLBACKARG_PREV_FUN(carg, check_html)
(CALLBACKARG_PREV_CARG(carg), opt, html, len, address, filename)) {
return 0; /* Abort */
}
}
/* Process */
if (strcmp(address, "primary") == 0 && strcmp(filename, "/primary") == 0) /* primary page (list of links) */
return 1;
while(stringfilters[i] != NULL && !getIt) {
if ((pos = strstr(html, stringfilters[i])) != NULL) {
int j;
getIt = 1;
fprintf(stderr,
"** callback info: found '%s' keyword in '%s%s', crawling this page!\n",
stringfilters[i], address, filename);
fprintf(stderr, "** details:\n(..)");
for(j = 0; j < 72 && pos[j]; j++) {
if (pos[j] > 32)
fprintf(stderr, "%c", pos[j]);
else
fprintf(stderr, "?");
}
fprintf(stderr, "(..)\n");
}
i++;
}
if (getIt) {
return 1; /* success */
} else {
fprintf(stderr,
"** callback info: won't parse '%s%s' (no specified keywords found)\n",
address, filename);
return 0; /* this page sucks, don't parse it */
}
}
static int end(t_hts_callbackarg * carg, httrackp * opt) {
t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);
fprintf(stderr, "** info: wrapper_exit() called!\n");
if (userdef != NULL) {
free(userdef);
userdef = NULL;
}
/* Call parent functions if multiple callbacks are chained. */
if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
return CALLBACKARG_PREV_FUN(carg, end) (CALLBACKARG_PREV_CARG(carg), opt);
}
return 1; /* success */
}