mirror of
https://github.com/xroche/httrack.git
synced 2026-06-25 03:27:22 +03:00
The resolver cache was a hand-rolled singly-linked list with a dummy head node: O(n) lookup, O(n^2) build, and each record carried its own next pointer plus an inline copy of the hostname key. Swap it for coucal, the hashtable already used for the backing cache and the ready slots, keyed by hostname with the address record as the value. coucal owns the records (freed through a value handler on coucal_delete) and dups the key itself, so t_dnscache sheds both its next link and its inline iadr string and becomes a pure address record. The state field keeps the same pointer width (t_dnscache* -> coucal), so the installed htsopt.h layout and the ABI are unchanged. Behaviour is identical: same -1/0/>0 lookup contract, same negative caching, same resolve-once semantics, all under the existing opt->state.lock (coucal is not internally serialized against the FTP/web threads). The DNS self-test exercises the full contract black-box and passes unchanged. Signed-off-by: Xavier Roche <roche@httrack.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
6475 lines
181 KiB
C
6475 lines
181 KiB
C
/* ------------------------------------------------------------ */
|
|
/*
|
|
HTTrack Website Copier, Offline Browser for Windows and Unix
|
|
Copyright (C) 1998 Xavier Roche and other contributors
|
|
|
|
SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
Ethical use: we kindly ask that you NOT use this software to harvest email
|
|
addresses or to collect any other private information about people. Doing so
|
|
would dishonor our work and waste the many hours we have spent on it.
|
|
|
|
Please visit our Website: http://www.httrack.com
|
|
*/
|
|
|
|
/* ------------------------------------------------------------ */
|
|
/* File: Subroutines */
|
|
/* Author: Xavier Roche */
|
|
/* ------------------------------------------------------------ */
|
|
|
|
/* Internal engine bytecode */
|
|
#define HTS_INTERNAL_BYTECODE
|
|
|
|
// Fichier librairie .c
|
|
|
|
#include "htscore.h"
|
|
|
|
/* specific definitions */
|
|
#include "htsbase.h"
|
|
#include "htsnet.h"
|
|
#include "htsbauth.h"
|
|
#include "htsthread.h"
|
|
#include "htsback.h"
|
|
#include "htswrap.h"
|
|
#include "htsmd5.h"
|
|
#include "htsmodules.h"
|
|
#include "htscharset.h"
|
|
#include "htsencoding.h"
|
|
|
|
#ifdef _WIN32
|
|
#include <direct.h>
|
|
#else
|
|
#ifdef HAVE_SYS_TYPES_H
|
|
#include <sys/types.h>
|
|
#endif
|
|
#ifdef HAVE_SYS_STAT_H
|
|
#include <sys/stat.h>
|
|
#endif
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#endif /* _WIN32 */
|
|
#include <stdarg.h>
|
|
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <stdarg.h>
|
|
|
|
#ifndef _WIN32
|
|
#include <sys/time.h>
|
|
#else
|
|
#include <sys/timeb.h>
|
|
#endif
|
|
#include <fcntl.h>
|
|
|
|
// pour utimbuf
|
|
#ifdef _WIN32
|
|
#include <sys/utime.h>
|
|
#else
|
|
#include <utime.h>
|
|
#endif /* _WIN32 */
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#ifdef __ANDROID__
|
|
#define timezone 0
|
|
#endif
|
|
/* END specific definitions */
|
|
|
|
/* Windows might be missing va_copy */
|
|
#ifdef _WIN32
|
|
#ifndef va_copy
|
|
#define va_copy(dst, src) ((dst) = (src))
|
|
#endif
|
|
#endif
|
|
|
|
// Debugging
|
|
#if _HTS_WIDE
|
|
FILE *DEBUG_fp = NULL;
|
|
#endif
|
|
|
|
/* variables globales */
|
|
int _DEBUG_HEAD;
|
|
FILE *ioinfo;
|
|
|
|
#if HTS_USEOPENSSL
|
|
SSL_CTX *openssl_ctx = NULL;
|
|
#endif
|
|
int IPV6_resolver = 0;
|
|
|
|
/* détection complémentaire */
|
|
const char *hts_detect[] = {
|
|
"archive",
|
|
"background",
|
|
"data", // OBJECT
|
|
"data-src",
|
|
"data-srcset",
|
|
"dynsrc",
|
|
"lowsrc",
|
|
"profile", // element META
|
|
"src",
|
|
"srcset", // HTML5 responsive images (<img>, <source>)
|
|
"swurl",
|
|
"url",
|
|
"usemap",
|
|
"longdesc", // accessibility
|
|
"xlink:href", // xml/svg tag
|
|
"poster", // HTML5
|
|
""
|
|
};
|
|
|
|
/* détecter début */
|
|
const char *hts_detectbeg[] = {
|
|
"hotspot", /* hotspot1=..,hotspot2=.. */
|
|
""
|
|
};
|
|
|
|
/* ne pas détcter de liens dedans */
|
|
const char *hts_nodetect[] = {
|
|
"accept-charset",
|
|
"accesskey",
|
|
"action",
|
|
"align",
|
|
"alt",
|
|
"axes",
|
|
"axis",
|
|
"char",
|
|
"charset",
|
|
"cite",
|
|
"class",
|
|
"classid",
|
|
"code",
|
|
"color",
|
|
"datetime",
|
|
"dir",
|
|
"enctype",
|
|
"face",
|
|
"height",
|
|
"id",
|
|
"lang",
|
|
"language",
|
|
"media",
|
|
"method",
|
|
"name",
|
|
"prompt",
|
|
"scheme",
|
|
"size",
|
|
"style",
|
|
"target",
|
|
"title",
|
|
"type",
|
|
"valign",
|
|
"version",
|
|
"width",
|
|
""
|
|
};
|
|
|
|
/* détection de mini-code javascript */
|
|
/* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */
|
|
const char *hts_detect_js[] = {
|
|
"onAbort",
|
|
"onBlur",
|
|
"onChange",
|
|
"onClick",
|
|
"onDblClick",
|
|
"onDragDrop",
|
|
"onError",
|
|
"onFocus",
|
|
"onKeyDown",
|
|
"onKeyPress",
|
|
"onKeyUp",
|
|
"onLoad",
|
|
"onMouseDown",
|
|
"onMouseMove",
|
|
"onMouseOut",
|
|
"onMouseOver",
|
|
"onMouseUp",
|
|
"onMove",
|
|
"onReset",
|
|
"onResize",
|
|
"onSelect",
|
|
"onSubmit",
|
|
"onUnload",
|
|
"style", /* hack for CSS code data */
|
|
""
|
|
};
|
|
|
|
const char *hts_main_mime[] = {
|
|
"application",
|
|
"audio",
|
|
"image",
|
|
"message",
|
|
"multipart",
|
|
"text",
|
|
"video",
|
|
""
|
|
};
|
|
|
|
/* détection "...URL=<url>" */
|
|
const char *hts_detectURL[] = {
|
|
"content",
|
|
""
|
|
};
|
|
|
|
/* tags où l'URL doit être réécrite mais non capturée */
|
|
const char *hts_detectandleave[] = {
|
|
"action",
|
|
""
|
|
};
|
|
|
|
/* ne pas renommer les types renvoyés (souvent types inconnus) */
|
|
const char *hts_mime_keep[] = {
|
|
"application/octet-stream",
|
|
"text/plain",
|
|
"application/xml",
|
|
"text/xml",
|
|
""
|
|
};
|
|
|
|
/* bogus servers returns these mime types when the extension is seen within the filename */
|
|
const char *hts_mime_bogus_multiple[] = {
|
|
"application/x-wais-source", /* src (src.rpm) */
|
|
""
|
|
};
|
|
|
|
/* pas de type mime connu, mais extension connue */
|
|
const char *hts_ext_dynamic[] = {
|
|
"php3",
|
|
"php",
|
|
"php4",
|
|
"php2",
|
|
"cgi",
|
|
"asp",
|
|
"jsp",
|
|
"pl",
|
|
/*"exe", */
|
|
"cfm",
|
|
"nsf", /* lotus */
|
|
""
|
|
};
|
|
|
|
/* types MIME
|
|
note: application/octet-stream should not be used here
|
|
*/
|
|
const char *hts_mime[][2] = {
|
|
{"application/acad", "dwg"},
|
|
{"application/arj", "arj"},
|
|
{"application/clariscad", "ccad"},
|
|
{"application/drafting", "drw"},
|
|
{"application/dxf", "dxf"},
|
|
{"application/excel", "xls"},
|
|
{"application/i-deas", "unv"},
|
|
{"application/iges", "isg"},
|
|
{"application/iges", "iges"},
|
|
{"application/mac-binhex40", "hqx"},
|
|
{"application/mac-compactpro", "cpt"},
|
|
{"application/msword", "doc"},
|
|
{"application/msword", "w6w"},
|
|
{"application/msword", "word"},
|
|
{"application/mswrite", "wri"},
|
|
/*{"application/octet-stream","dms"}, */
|
|
/*{"application/octet-stream","lzh"}, */
|
|
/*{"application/octet-stream","lha"}, */
|
|
/*{"application/octet-stream","bin"}, */
|
|
{"application/oda", "oda"},
|
|
{"application/pdf", "pdf"},
|
|
{"application/postscript", "ps"},
|
|
{"application/postscript", "ai"},
|
|
{"application/postscript", "eps"},
|
|
{"application/powerpoint", "ppt"},
|
|
{"application/pro_eng", "prt"},
|
|
{"application/pro_eng", "part"},
|
|
{"application/rtf", "rtf"},
|
|
{"application/set", "set"},
|
|
{"application/sla", "stl"},
|
|
{"application/smil", "smi"},
|
|
{"application/smil", "smil"},
|
|
{"application/smil", "sml"},
|
|
{"application/solids", "sol"},
|
|
{"application/STEP", "stp"},
|
|
{"application/STEP", "step"},
|
|
{"application/vda", "vda"},
|
|
{"application/x-authorware-map", "aam"},
|
|
{"application/x-authorware-seg", "aas"},
|
|
{"application/x-authorware-bin", "aab"},
|
|
{"application/x-bzip2", "bz2"},
|
|
{"application/x-cocoa", "cco"},
|
|
{"application/x-csh", "csh"},
|
|
{"application/x-director", "dir"},
|
|
{"application/x-director", "dcr"},
|
|
{"application/x-director", "dxr"},
|
|
{"application/x-mif", "mif"},
|
|
{"application/x-dvi", "dvi"},
|
|
{"application/x-gzip", "gz"},
|
|
{"application/x-gzip", "gzip"},
|
|
{"application/x-hdf", "hdf"},
|
|
{"application/x-javascript", "js"},
|
|
{"application/x-koan", "skp"},
|
|
{"application/x-koan", "skd"},
|
|
{"application/x-koan", "skt"},
|
|
{"application/x-koan", "skm"},
|
|
{"application/x-latex", "latex"},
|
|
{"application/x-netcdf", "nc"},
|
|
{"application/x-netcdf", "cdf"},
|
|
/* {"application/x-sh","sh"}, */
|
|
/* {"application/x-csh","csh"}, */
|
|
/* {"application/x-ksh","ksh"}, */
|
|
{"application/x-shar", "shar"},
|
|
{"application/x-stuffit", "sit"},
|
|
{"application/x-tcl", "tcl"},
|
|
{"application/x-tex", "tex"},
|
|
{"application/x-texinfo", "texinfo"},
|
|
{"application/x-texinfo", "texi"},
|
|
{"application/x-troff", "t"},
|
|
{"application/x-troff", "tr"},
|
|
{"application/x-troff", "roff"},
|
|
{"application/x-troff-man", "man"},
|
|
{"application/x-troff-me", "ms"},
|
|
{"application/x-wais-source", "src"},
|
|
{"application/zip", "zip"},
|
|
{"application/x-zip-compressed", "zip"},
|
|
{"application/x-bcpio", "bcpio"},
|
|
{"application/x-cdlink", "vcd"},
|
|
{"application/x-cpio", "cpio"},
|
|
{"application/x-gtar", "tgz"},
|
|
{"application/x-gtar", "gtar"},
|
|
{"application/x-shar", "shar"},
|
|
{"application/x-shockwave-flash", "swf"},
|
|
{"application/x-sv4cpio", "sv4cpio"},
|
|
{"application/x-sv4crc", "sv4crc"},
|
|
{"application/x-tar", "tar"},
|
|
{"application/x-ustar", "ustar"},
|
|
{"application/x-winhelp", "hlp"},
|
|
{"application/xml", "xml"},
|
|
{"audio/midi", "mid"},
|
|
{"audio/midi", "midi"},
|
|
{"audio/midi", "kar"},
|
|
{"audio/mpeg", "mp3"},
|
|
{"audio/mpeg", "mpga"},
|
|
{"audio/mpeg", "mp2"},
|
|
{"audio/basic", "au"},
|
|
{"audio/basic", "snd"},
|
|
{"audio/x-aiff", "aif"},
|
|
{"audio/x-aiff", "aiff"},
|
|
{"audio/x-aiff", "aifc"},
|
|
{"audio/x-pn-realaudio", "rm"},
|
|
{"audio/x-pn-realaudio", "ram"},
|
|
{"audio/x-pn-realaudio", "ra"},
|
|
{"audio/x-pn-realaudio-plugin", "rpm"},
|
|
{"audio/x-wav", "wav"},
|
|
{"chemical/x-pdb", "pdb"},
|
|
{"chemical/x-pdb", "xyz"},
|
|
{"drawing/x-dwf", "dwf"},
|
|
{"image/gif", "gif"},
|
|
{"image/ief", "ief"},
|
|
{"image/jpeg", "jpg"},
|
|
{"image/jpeg", "jpe"},
|
|
{"image/jpeg", "jpeg"},
|
|
{"image/pict", "pict"},
|
|
{"image/png", "png"},
|
|
{"image/tiff", "tiff"},
|
|
{"image/tiff", "tif"},
|
|
{"image/svg+xml", "svg"},
|
|
{"image/svg-xml", "svg"},
|
|
{"image/x-cmu-raster", "ras"},
|
|
{"image/x-freehand", "fh4"},
|
|
{"image/x-freehand", "fh7"},
|
|
{"image/x-freehand", "fh5"},
|
|
{"image/x-freehand", "fhc"},
|
|
{"image/x-freehand", "fh"},
|
|
{"image/x-portable-anymap", "pnm"},
|
|
{"image/x-portable-bitmap", "pgm"},
|
|
{"image/x-portable-pixmap", "ppm"},
|
|
{"image/x-rgb", "rgb"},
|
|
{"image/x-xbitmap", "xbm"},
|
|
{"image/x-xpixmap", "xpm"},
|
|
{"image/x-xwindowdump", "xwd"},
|
|
{"model/mesh", "msh"},
|
|
{"model/mesh", "mesh"},
|
|
{"model/mesh", "silo"},
|
|
{"multipart/x-zip", "zip"},
|
|
{"multipart/x-gzip", "gzip"},
|
|
{"text/css", "css"},
|
|
{"text/html", "html"},
|
|
{"text/html", "htm"},
|
|
{"text/plain", "txt"},
|
|
{"text/plain", "g"},
|
|
{"text/plain", "h"},
|
|
{"text/plain", "c"},
|
|
{"text/plain", "cc"},
|
|
{"text/plain", "hh"},
|
|
{"text/plain", "m"},
|
|
{"text/plain", "f90"},
|
|
{"text/richtext", "rtx"},
|
|
{"text/tab-separated-values", "tsv"},
|
|
{"text/x-setext", "etx"},
|
|
{"text/x-sgml", "sgml"},
|
|
{"text/x-sgml", "sgm"},
|
|
{"text/xml", "xml"},
|
|
{"text/xml", "dtd"},
|
|
{"video/mpeg", "mpeg"},
|
|
{"video/mpeg", "mpg"},
|
|
{"video/mpeg", "mpe"},
|
|
{"video/quicktime", "qt"},
|
|
{"video/quicktime", "mov"},
|
|
{"video/x-msvideo", "avi"},
|
|
{"video/x-sgi-movie", "movie"},
|
|
{"x-conference/x-cooltalk", "ice"},
|
|
/*{"application/x-httpd-cgi","cgi"}, */
|
|
{"x-world/x-vrml", "wrl"},
|
|
|
|
/* More from w3schools.com */
|
|
{"application/envoy", "evy"},
|
|
{"application/fractals", "fif"},
|
|
{"application/futuresplash", "spl"},
|
|
{"application/hta", "hta"},
|
|
{"application/internet-property-stream", "acx"},
|
|
{"application/msword", "dot"},
|
|
{"application/olescript", "axs"},
|
|
{"application/pics-rules", "prf"},
|
|
{"application/pkcs10", "p10"},
|
|
{"application/pkix-crl", "crl"},
|
|
{"application/set-payment-initiation", "setpay"},
|
|
{"application/set-registration-initiation", "setreg"},
|
|
{"application/vnd.ms-excel", "xls"},
|
|
{"application/vnd.ms-excel", "xla"},
|
|
{"application/vnd.ms-excel", "xlc"},
|
|
{"application/vnd.ms-excel", "xlm"},
|
|
{"application/vnd.ms-excel", "xlt"},
|
|
{"application/vnd.ms-excel", "xlw"},
|
|
{"application/vnd.ms-pkicertstore", "sst"},
|
|
{"application/vnd.ms-pkiseccat", "cat"},
|
|
{"application/vnd.ms-powerpoint", "ppt"},
|
|
{"application/vnd.ms-powerpoint", "pot"},
|
|
{"application/vnd.ms-powerpoint", "pps"},
|
|
{"application/vnd.ms-project", "mpp"},
|
|
{"application/vnd.ms-works", "wcm"},
|
|
{"application/vnd.ms-works", "wdb"},
|
|
{"application/vnd.ms-works", "wks"},
|
|
{"application/vnd.ms-works", "wps"},
|
|
{"application/vnd.oasis.opendocument.chart", "odc"},
|
|
{"application/vnd.oasis.opendocument.database", "odb"},
|
|
{"application/vnd.oasis.opendocument.formula", "odf"},
|
|
{"application/vnd.oasis.opendocument.graphics", "odg"},
|
|
{"application/vnd.oasis.opendocument.graphics-template", "otg"},
|
|
{"application/vnd.oasis.opendocument.image", "odi"},
|
|
{"application/vnd.oasis.opendocument.presentation", "odp"},
|
|
{"application/vnd.oasis.opendocument.presentation-template", "otp"},
|
|
{"application/vnd.oasis.opendocument.spreadsheet", "ods"},
|
|
{"application/vnd.oasis.opendocument.spreadsheet-template", "ots"},
|
|
{"application/vnd.oasis.opendocument.text", "odt"},
|
|
{"application/vnd.oasis.opendocument.text-master", "odm"},
|
|
{"application/vnd.oasis.opendocument.text-template", "ott"},
|
|
{"application/vnd.oasis.opendocument.text-web", "oth"},
|
|
{"application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx"},
|
|
{"application/vnd.openxmlformats-officedocument.presentationml.slide", "sldx"},
|
|
{"application/vnd.openxmlformats-officedocument.presentationml.slideshow", "ppsx"},
|
|
{"application/vnd.openxmlformats-officedocument.presentationml.template", "potx"},
|
|
{"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"},
|
|
{"application/vnd.openxmlformats-officedocument.spreadsheetml.template", "xltx"},
|
|
{"application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"},
|
|
{"application/vnd.openxmlformats-officedocument.wordprocessingml.template", "dotx"},
|
|
{"application/x-compress", "z"},
|
|
{"application/x-compressed", "tgz"},
|
|
{"application/x-internet-signup", "ins"},
|
|
{"application/x-internet-signup", "isp"},
|
|
{"application/x-iphone", "iii"},
|
|
{"application/x-javascript", "js"},
|
|
{"application/x-msaccess", "mdb"},
|
|
{"application/x-mscardfile", "crd"},
|
|
{"application/x-msclip", "clp"},
|
|
{"application/x-msmediaview", "m13"},
|
|
{"application/x-msmediaview", "m14"},
|
|
{"application/x-msmediaview", "mvb"},
|
|
{"application/x-msmetafile", "wmf"},
|
|
{"application/x-msmoney", "mny"},
|
|
{"application/x-mspublisher", "pub"},
|
|
{"application/x-msschedule", "scd"},
|
|
{"application/x-msterminal", "trm"},
|
|
{"application/x-perfmon", "pma"},
|
|
{"application/x-perfmon", "pmc"},
|
|
{"application/x-perfmon", "pml"},
|
|
{"application/x-perfmon", "pmr"},
|
|
{"application/x-perfmon", "pmw"},
|
|
{"application/x-pkcs12", "p12"},
|
|
{"application/x-pkcs12", "pfx"},
|
|
{"application/x-pkcs7-certificates", "p7b"},
|
|
{"application/x-pkcs7-certificates", "spc"},
|
|
{"application/x-pkcs7-certreqresp", "p7r"},
|
|
{"application/x-pkcs7-mime", "p7c"},
|
|
{"application/x-pkcs7-mime", "p7m"},
|
|
{"application/x-pkcs7-signature", "p7s"},
|
|
{"application/x-troff-me", "me"},
|
|
{"application/x-x509-ca-cert", "cer"},
|
|
{"application/x-x509-ca-cert", "crt"},
|
|
{"application/x-x509-ca-cert", "der"},
|
|
{"application/ynd.ms-pkipko", "pko"},
|
|
{"audio/mid", "mid"},
|
|
{"audio/mid", "rmi"},
|
|
{"audio/mpeg", "mp3"},
|
|
{"audio/x-mpegurl", "m3u"},
|
|
{"image/bmp", "bmp"},
|
|
{"image/cis-cod", "cod"},
|
|
{"image/pipeg", "jfif"},
|
|
{"image/x-cmx", "cmx"},
|
|
{"image/x-icon", "ico"},
|
|
{"image/x-portable-bitmap", "pbm"},
|
|
{"message/rfc822", "mht"},
|
|
{"message/rfc822", "mhtml"},
|
|
{"message/rfc822", "nws"},
|
|
{"text/css", "css"},
|
|
{"text/h323", "323"},
|
|
{"text/html", "stm"},
|
|
{"text/iuls", "uls"},
|
|
{"text/plain", "bas"},
|
|
{"text/scriptlet", "sct"},
|
|
{"text/webviewhtml", "htt"},
|
|
{"text/x-component", "htc"},
|
|
{"text/x-vcard", "vcf"},
|
|
{"video/mpeg", "mp2"},
|
|
{"video/mpeg", "mpa"},
|
|
{"video/mpeg", "mpv2"},
|
|
{"video/x-la-asf", "lsf"},
|
|
{"video/x-la-asf", "lsx"},
|
|
{"video/x-ms-asf", "asf"},
|
|
{"video/x-ms-asf", "asr"},
|
|
{"video/x-ms-asf", "asx"},
|
|
{"video/x-ms-wmv", "wmv"},
|
|
{"x-world/x-vrml", "flr"},
|
|
{"x-world/x-vrml", "vrml"},
|
|
{"x-world/x-vrml", "wrz"},
|
|
{"x-world/x-vrml", "xaf"},
|
|
{"x-world/x-vrml", "xof"},
|
|
|
|
/* Various */
|
|
{"application/ogg", "ogg"},
|
|
|
|
{"application/x-java-vm", "class"},
|
|
{"application/x-bittorrent","torrent"},
|
|
|
|
{"", ""}
|
|
};
|
|
|
|
// Reserved (RFC2396)
|
|
#define CIS(c,ch) ( ((unsigned char)(c)) == (ch) )
|
|
#define CHAR_RESERVED(c) ( CIS(c,';') \
|
|
|| CIS(c,'/') \
|
|
|| CIS(c,'?') \
|
|
|| CIS(c,':') \
|
|
|| CIS(c,'@') \
|
|
|| CIS(c,'&') \
|
|
|| CIS(c,'=') \
|
|
|| CIS(c,'+') \
|
|
|| CIS(c,'$') \
|
|
|| CIS(c,',') )
|
|
//#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
|
|
// Delimiters (RFC2396)
|
|
#define CHAR_DELIM(c) ( CIS(c,'<') \
|
|
|| CIS(c,'>') \
|
|
|| CIS(c,'#') \
|
|
|| CIS(c,'%') \
|
|
|| CIS(c,'\"') )
|
|
//#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
|
|
// Unwise (RFC2396)
|
|
#define CHAR_UNWISE(c) ( CIS(c,'{') \
|
|
|| CIS(c,'}') \
|
|
|| CIS(c,'|') \
|
|
|| CIS(c,'\\') \
|
|
|| CIS(c,'^') \
|
|
|| CIS(c,'[') \
|
|
|| CIS(c,']') \
|
|
|| CIS(c,'`') )
|
|
//#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
|
|
// Special (escape chars) (RFC2396 + >127 )
|
|
#define CHAR_LOW(c) ( ((unsigned char)(c) <= 31) )
|
|
#define CHAR_HIG(c) ( ((unsigned char)(c) >= 127) )
|
|
#define CHAR_SPECIAL(c) ( CHAR_LOW(c) || CHAR_HIG(c) )
|
|
// We try to avoid them and encode them instead
|
|
#define CHAR_XXAVOID(c) ( CIS(c,' ') \
|
|
|| CIS(c,'*') \
|
|
|| CIS(c,'\'') \
|
|
|| CIS(c,'\"') \
|
|
|| CIS(c,'&') \
|
|
|| CIS(c,'!') )
|
|
//#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
|
|
#define CHAR_MARK(c) ( CIS(c,'-') \
|
|
|| CIS(c,'_') \
|
|
|| CIS(c,'.') \
|
|
|| CIS(c,'!') \
|
|
|| CIS(c,'~') \
|
|
|| CIS(c,'*') \
|
|
|| CIS(c,'\'') \
|
|
|| CIS(c,'(') \
|
|
|| CIS(c,')') )
|
|
//#define CHAR_MARK(c) ( strchr("-_.!~*'()",(unsigned char)(c)) != 0 )
|
|
|
|
// conversion éventuelle / vers antislash
|
|
#ifdef _WIN32
|
|
char *antislash(char *catbuff, const char *s) {
|
|
char *a;
|
|
|
|
strcpybuff(catbuff, s);
|
|
while(a = strchr(catbuff, '/'))
|
|
*a = '\\';
|
|
return catbuff;
|
|
}
|
|
#endif
|
|
|
|
// Initialize a htsblk structure
|
|
void hts_init_htsblk(htsblk * r) {
|
|
memset(r, 0, sizeof(htsblk)); // effacer
|
|
r->soc = INVALID_SOCKET;
|
|
r->msg[0] = '\0';
|
|
r->statuscode = STATUSCODE_INVALID;
|
|
r->totalsize = -1;
|
|
}
|
|
|
|
// ouvre une liaison http, envoie une requète GET et réceptionne le header
|
|
// retour: socket
|
|
T_SOC http_fopen(httrackp * opt, const char *adr, const char *fil, htsblk * retour) {
|
|
// / GET, traiter en-tête
|
|
return http_xfopen(opt, 0, 1, 1, NULL, adr, fil, retour);
|
|
}
|
|
|
|
// Read a CRLF line from a non-blocking socket (waits up to timeout per recv).
|
|
// Returns the line length (0 = empty), or -1 on timeout/EOF/error.
|
|
static int proxy_getline(T_SOC soc, char *s, int max, int timeout) {
|
|
int j = 0;
|
|
|
|
for (;;) {
|
|
unsigned char ch;
|
|
int n;
|
|
|
|
if (!check_readinput_t(soc, timeout))
|
|
return -1; // timed out waiting for data
|
|
n = (int) recv(soc, &ch, 1, 0);
|
|
if (n == 1) {
|
|
if (ch == 13) // CR
|
|
continue;
|
|
if (ch == 10) // LF: end of line
|
|
break;
|
|
if (j >= max - 1)
|
|
return -1; // line too long: bound the read against a hostile proxy
|
|
s[j++] = (char) ch;
|
|
} else if (n == 0) {
|
|
return -1; // connection closed
|
|
} else {
|
|
#ifdef _WIN32
|
|
if (WSAGetLastError() == WSAEWOULDBLOCK)
|
|
continue;
|
|
#else
|
|
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
|
|
continue;
|
|
#endif
|
|
return -1;
|
|
}
|
|
}
|
|
s[j] = '\0';
|
|
return j;
|
|
}
|
|
|
|
int http_proxy_tunnel(httrackp *opt, htsblk *retour, const char *adr,
|
|
int timeout) {
|
|
const T_SOC soc = retour->soc;
|
|
const char *const host = jump_identification_const(adr); // host[:port]
|
|
const char *const portsep = jump_toport_const(adr); // ":port" or NULL
|
|
char BIGSTK authority[HTS_URLMAXSIZE * 2];
|
|
char BIGSTK req[HTS_URLMAXSIZE * 4 + 1100];
|
|
char line[1024];
|
|
int code;
|
|
|
|
if (soc == INVALID_SOCKET)
|
|
return 0;
|
|
|
|
// CONNECT needs an explicit host:port; default the https port
|
|
authority[0] = '\0';
|
|
if (portsep != NULL)
|
|
strlcatbuff(authority, host, sizeof(authority)); // already host:port
|
|
else
|
|
snprintf(authority, sizeof(authority), "%s:%d", host, 443);
|
|
|
|
// backstop: never let a stray CR/LF in the host smuggle a second line into
|
|
// the CONNECT request (the host is already sanitized upstream)
|
|
{
|
|
const char *c;
|
|
|
|
for (c = authority; *c != '\0'; c++) {
|
|
if ((unsigned char) *c < ' ') {
|
|
strcpybuff(retour->msg, "proxy CONNECT: invalid host");
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
snprintf(req, sizeof(req), "CONNECT %s HTTP/1.0" H_CRLF "Host: %s" H_CRLF,
|
|
authority, authority);
|
|
|
|
// creds go on the CONNECT, not the tunneled origin request
|
|
if (link_has_authorization(retour->req.proxy.name)) {
|
|
const char *a = jump_identification_const(retour->req.proxy.name);
|
|
const char *astart = jump_protocol_const(retour->req.proxy.name);
|
|
char autorisation[1100];
|
|
char user_pass[256];
|
|
|
|
autorisation[0] = user_pass[0] = '\0';
|
|
strncatbuff(user_pass, astart, (int) (a - astart) - 1);
|
|
strcpybuff(user_pass, unescape_http(OPT_GET_BUFF(opt),
|
|
OPT_GET_BUFF_SIZE(opt), user_pass));
|
|
code64((unsigned char *) user_pass, (int) strlen(user_pass),
|
|
(unsigned char *) autorisation, 0);
|
|
strlcatbuff(req, "Proxy-Authorization: Basic ", sizeof(req));
|
|
strlcatbuff(req, autorisation, sizeof(req));
|
|
strlcatbuff(req, H_CRLF, sizeof(req));
|
|
}
|
|
strlcatbuff(req, H_CRLF, sizeof(req)); // end of request headers
|
|
|
|
// raw send: ssl is set, so sendc() would route to TLS
|
|
{
|
|
const char *p = req;
|
|
size_t remain = strlen(req);
|
|
int stalls = 0;
|
|
|
|
while (remain > 0) {
|
|
const int n = (int) send(soc, p, (int) remain, 0);
|
|
|
|
if (n > 0) {
|
|
p += n;
|
|
remain -= (size_t) n;
|
|
stalls = 0;
|
|
} else {
|
|
#ifdef _WIN32
|
|
const int wouldblock = (WSAGetLastError() == WSAEWOULDBLOCK);
|
|
#else
|
|
const int wouldblock =
|
|
(errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR);
|
|
#endif
|
|
// don't spin forever on a fatal error or an unwritable socket
|
|
if (!wouldblock || !check_writeinput_t(soc, timeout) ||
|
|
++stalls > 100) {
|
|
strcpybuff(retour->msg, "proxy CONNECT: write error");
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// proxy status line: "HTTP/1.x <code> ..."
|
|
if (proxy_getline(soc, line, sizeof(line), timeout) < 0) {
|
|
strcpybuff(retour->msg, "proxy CONNECT: no response");
|
|
return 0;
|
|
}
|
|
if (sscanf(line, "HTTP/%*d.%*d %d", &code) < 1)
|
|
code = 0;
|
|
if (code < 200 || code >= 300) {
|
|
snprintf(retour->msg, sizeof(retour->msg), "proxy CONNECT refused: %s",
|
|
strnotempty(line) ? line : "(no status)");
|
|
return 0;
|
|
}
|
|
|
|
// drain headers to the blank line; cap the count so a flooding proxy can't
|
|
// stall the crawl
|
|
{
|
|
int headers = 0;
|
|
|
|
for (;;) {
|
|
const int n = proxy_getline(soc, line, sizeof(line), timeout);
|
|
|
|
if (n < 0) {
|
|
strcpybuff(retour->msg, "proxy CONNECT: truncated response");
|
|
return 0;
|
|
}
|
|
if (n == 0)
|
|
break; // blank line: tunnel ready
|
|
if (++headers > 64) {
|
|
strcpybuff(retour->msg, "proxy CONNECT: too many response headers");
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
// ouverture d'une liaison http, envoi d'une requète
|
|
// mode: 0 GET 1 HEAD [2 POST]
|
|
// treat: traiter header?
|
|
// waitconnect: attendre le connect()
|
|
// note: dans retour, on met les params du proxy
|
|
T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect,
|
|
const char *xsend, const char *adr, const char *fil, htsblk * retour) {
|
|
//htsblk retour;
|
|
//int bufl=TAILLE_BUFFER; // 8Ko de buffer
|
|
T_SOC soc = INVALID_SOCKET;
|
|
char BIGSTK tempo_fil[HTS_URLMAXSIZE * 2];
|
|
|
|
//char *p,*q;
|
|
|
|
// retour prédéfini: erreur
|
|
if (retour) {
|
|
retour->adr = NULL;
|
|
retour->size = 0;
|
|
retour->msg[0] = '\0';
|
|
retour->statuscode = STATUSCODE_NON_FATAL; // a priori erreur non fatale
|
|
}
|
|
#if HDEBUG
|
|
printf("adr=%s\nfichier=%s\n", adr, fil);
|
|
#endif
|
|
|
|
// ouvrir liaison
|
|
#if HDEBUG
|
|
printf("Création d'une socket sur %s\n", adr);
|
|
#endif
|
|
|
|
#if CNXDEBUG
|
|
printf("..newhttp\n");
|
|
#endif
|
|
|
|
/* connexion */
|
|
if (retour) {
|
|
/* no proxy, or proxy not usable here (local file) */
|
|
if ((!(retour->req.proxy.active)) || (strcmp(adr, "file://") == 0)) {
|
|
soc = newhttp(opt, adr, retour, -1, waitconnect);
|
|
} else {
|
|
// to the proxy; https tunnels to the origin via CONNECT in back_wait
|
|
// (#85)
|
|
soc = newhttp(opt, retour->req.proxy.name, retour, retour->req.proxy.port,
|
|
waitconnect);
|
|
}
|
|
} else {
|
|
soc = newhttp(opt, adr, NULL, -1, waitconnect);
|
|
}
|
|
|
|
// copier index socket retour
|
|
if (retour)
|
|
retour->soc = soc;
|
|
|
|
/* Check for errors */
|
|
if (soc == INVALID_SOCKET) {
|
|
if (retour) {
|
|
if (retour->msg) {
|
|
if (!strnotempty(retour->msg)) {
|
|
#ifdef _WIN32
|
|
int last_errno = WSAGetLastError();
|
|
|
|
sprintf(retour->msg, "Connect error: %s", strerror(last_errno));
|
|
#else
|
|
int last_errno = errno;
|
|
|
|
sprintf(retour->msg, "Connect error: %s", strerror(last_errno));
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// --------------------
|
|
// court-circuit (court circuite aussi le proxy..)
|
|
// LOCAL_SOCKET_ID est une pseudo-socket locale
|
|
if (soc == LOCAL_SOCKET_ID) {
|
|
retour->is_file = 1; // fichier local
|
|
if (mode == 0) { // GET
|
|
|
|
// Test en cas de file:///C|...
|
|
if (!fexist
|
|
(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
|
unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil))))
|
|
if (fexist
|
|
(fconv
|
|
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
|
unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil + 1)))) {
|
|
strcpybuff(tempo_fil, fil + 1);
|
|
fil = tempo_fil;
|
|
}
|
|
// Ouvrir
|
|
retour->totalsize = fsize(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
|
unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil))); // taille du fichier
|
|
retour->msg[0] = '\0';
|
|
soc = INVALID_SOCKET;
|
|
if (retour->totalsize < 0)
|
|
strcpybuff(retour->msg, "Unable to open local file");
|
|
else {
|
|
// Note: On passe par un FILE* (plus propre)
|
|
//soc=open(fil,O_RDONLY,0); // en lecture seule!
|
|
retour->fp = FOPEN(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
|
unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil)), "rb"); // ouvrir
|
|
if (retour->fp == NULL)
|
|
soc = INVALID_SOCKET;
|
|
else
|
|
soc = LOCAL_SOCKET_ID;
|
|
}
|
|
retour->soc = soc;
|
|
if (soc != INVALID_SOCKET) {
|
|
retour->statuscode = HTTP_OK; // OK
|
|
strcpybuff(retour->msg, "OK");
|
|
guess_httptype_sized(opt, retour->contenttype,
|
|
sizeof(retour->contenttype), fil);
|
|
} else if (strnotempty(retour->msg) == 0)
|
|
strcpybuff(retour->msg, "Unable to open local file");
|
|
return soc; // renvoyer
|
|
} else { // HEAD ou POST : interdit sur un local!!!! (c'est idiot!)
|
|
strcpybuff(retour->msg, "Unexpected Head/Post local request");
|
|
soc = INVALID_SOCKET; // erreur
|
|
retour->soc = soc;
|
|
return soc;
|
|
}
|
|
}
|
|
// --------------------
|
|
|
|
if (soc != INVALID_SOCKET) {
|
|
char rcvd[1100];
|
|
|
|
rcvd[0] = '\0';
|
|
#if HDEBUG
|
|
printf("Ok, connexion réussie, id=%d\n", soc);
|
|
#endif
|
|
|
|
// connecté?
|
|
if (waitconnect) {
|
|
http_sendhead(opt, NULL, mode, xsend, adr, fil, NULL, NULL, retour);
|
|
}
|
|
|
|
if (soc != INVALID_SOCKET) {
|
|
|
|
#if HDEBUG
|
|
printf("Attente de la réponse:\n");
|
|
#endif
|
|
|
|
// si GET (réception d'un fichier), réceptionner en-tête d'abord,
|
|
// et ensuite le corps
|
|
// si POST on ne réceptionne rien du tout, c'est après que l'on fera
|
|
// une réception standard pour récupérer l'en tête
|
|
if ((treat) && (waitconnect)) { // traiter (attendre!) en-tête
|
|
// Réception de la status line et de l'en-tête (norme RFC1945)
|
|
|
|
// status-line à récupérer
|
|
finput(soc, rcvd, 1024);
|
|
if (strnotempty(rcvd) == 0)
|
|
finput(soc, rcvd, 1024); // "certains serveurs buggés envoient un \n au début" (RFC)
|
|
|
|
// traiter status-line
|
|
treatfirstline(retour, rcvd);
|
|
|
|
#if HDEBUG
|
|
printf("Status-Code=%d\n", retour->statuscode);
|
|
#endif
|
|
|
|
// en-tête
|
|
|
|
// header // ** !attention! HTTP/0.9 non supporté
|
|
do {
|
|
finput(soc, rcvd, 1024);
|
|
#if HDEBUG
|
|
printf(">%s\n", rcvd);
|
|
#endif
|
|
if (strnotempty(rcvd))
|
|
treathead(NULL, NULL, NULL, retour, rcvd); // traiter
|
|
|
|
} while(strnotempty(rcvd));
|
|
|
|
//rcvsize=-1; // forCER CHARGEMENT INCONNU
|
|
|
|
//if (retour)
|
|
// retour->totalsize=rcvsize;
|
|
|
|
} else { // si GET, on recevra l'en tête APRES
|
|
//rcvsize=-1; // on ne connait pas la taille de l'en-tête
|
|
if (retour)
|
|
retour->totalsize = -1;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return soc;
|
|
}
|
|
|
|
/* Buffer printing */
|
|
typedef struct buff_struct {
|
|
/** Buffer **/
|
|
char *buffer;
|
|
/** Buffer capacity in bytes **/
|
|
size_t capacity;
|
|
/** Buffer write position ; MUST point to a valid \0. **/
|
|
size_t pos;
|
|
} buff_struct;
|
|
|
|
static void print_buffer(buff_struct*const str, const char *format, ...)
|
|
HTS_PRINTF_FUN(2, 3);
|
|
|
|
/* Prints on a static buffer. asserts in case of overflow. */
|
|
static void print_buffer(buff_struct*const str, const char *format, ...) {
|
|
size_t result;
|
|
va_list args;
|
|
size_t remaining;
|
|
char *position;
|
|
|
|
/* Security check. */
|
|
assertf(str != NULL);
|
|
assertf(str->pos < str->capacity);
|
|
|
|
/* Print */
|
|
position = &str->buffer[str->pos];
|
|
remaining = str->capacity - str->pos;
|
|
va_start(args, format);
|
|
result = (size_t) vsnprintf(position, remaining, format, args);
|
|
va_end(args);
|
|
assertf(result < remaining);
|
|
|
|
/* Increment. */
|
|
str->pos += strlen(position);
|
|
assertf(str->pos < str->capacity);
|
|
}
|
|
|
|
/* Append the request "Cookie:" header line for every stored cookie matching
|
|
domain/path. RFC 6265 form: bare "name=value" pairs joined by "; ", no
|
|
$Version/$Path attributes (those are RFC 2965 syntax that modern servers
|
|
reject, issue #151). Returns the number of cookies emitted. */
|
|
static int append_cookie_header(buff_struct *bstr, t_cookie *cookie,
|
|
const char *domain, const char *path) {
|
|
char buffer[8192];
|
|
char *b;
|
|
int cook = 0;
|
|
int max_cookies = 8;
|
|
|
|
if (cookie == NULL)
|
|
return 0;
|
|
b = cookie->data;
|
|
do {
|
|
b = cookie_find(b, "", domain, path); // next matching cookie
|
|
if (b != NULL) {
|
|
max_cookies--;
|
|
if (!cook) {
|
|
print_buffer(bstr, "Cookie: ");
|
|
cook = 1;
|
|
} else
|
|
print_buffer(bstr, "; ");
|
|
print_buffer(bstr, "%s", cookie_get(buffer, b, 5));
|
|
print_buffer(bstr, "=%s", cookie_get(buffer, b, 6));
|
|
b = cookie_nextfield(b);
|
|
}
|
|
} while (b != NULL && max_cookies > 0);
|
|
if (cook)
|
|
print_buffer(bstr, H_CRLF);
|
|
return cook;
|
|
}
|
|
|
|
/* Self-test entry for append_cookie_header(): build the request Cookie line
|
|
into dst (always NUL-terminated). Returns the number of cookies emitted. */
|
|
int http_cookie_header_selftest(t_cookie *cookie, const char *domain,
|
|
const char *path, char *dst, size_t dst_size) {
|
|
buff_struct bstr = {dst, dst_size, 0};
|
|
|
|
assertf(dst != NULL && dst_size > 0);
|
|
dst[0] = '\0';
|
|
return append_cookie_header(&bstr, cookie, domain, path);
|
|
}
|
|
|
|
// envoi d'une requète
|
|
int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
|
|
const char *xsend, const char *adr, const char *fil,
|
|
const char *referer_adr, const char *referer_fil,
|
|
htsblk * retour) {
|
|
char BIGSTK buffer_head_request[16384];
|
|
buff_struct bstr = { buffer_head_request, sizeof(buffer_head_request), 0 };
|
|
|
|
//int use_11=0; // HTTP 1.1 utilisé
|
|
int direct_url = 0; // ne pas analyser l'url (exemple: ftp://)
|
|
const char *search_tag = NULL;
|
|
|
|
// Initialize buffer
|
|
buffer_head_request[0] = '\0';
|
|
|
|
// header Date
|
|
//strcatbuff(buff,"Date: ");
|
|
//time_gmt_rfc822(buff); // obtenir l'heure au format rfc822
|
|
//sendc("\n");
|
|
//strcatbuff(buff,buff);
|
|
|
|
// possibilité non documentée: >post: et >postfile:
|
|
// si présence d'un tag >post: alors executer un POST
|
|
// exemple: http://www.example.com/test.cgi?foo>post:posteddata=10&foo=5
|
|
// si présence d'un tag >postfile: alors envoyer en tête brut contenu dans le fichier en question
|
|
// exemple: http://www.example.com/test.cgi?foo>postfile:post0.txt
|
|
search_tag = strstr(fil, POSTTOK ":");
|
|
if (!search_tag) {
|
|
search_tag = strstr(fil, POSTTOK "file:");
|
|
if (search_tag) { // postfile
|
|
if (mode == 0) { // GET!
|
|
FILE *fp =
|
|
FOPEN(unescape_http(OPT_GET_BUFF(opt),
|
|
OPT_GET_BUFF_SIZE(opt), search_tag + strlen(POSTTOK) + 5), "rb");
|
|
if (fp) {
|
|
char BIGSTK line[1100];
|
|
char BIGSTK protocol[256], url[HTS_URLMAXSIZE * 2], method[256];
|
|
|
|
linput(fp, line, 1000);
|
|
if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
|
|
size_t ret;
|
|
// selon que l'on a ou pas un proxy
|
|
if (retour->req.proxy.active) {
|
|
print_buffer(&bstr,
|
|
"%s http://%s%s %s\r\n", method, adr, url,
|
|
protocol);
|
|
} else {
|
|
print_buffer(&bstr,
|
|
"%s %s %s\r\n", method, url, protocol);
|
|
}
|
|
// lire le reste en brut
|
|
ret = fread(&bstr.buffer[bstr.pos],
|
|
bstr.capacity - bstr.pos, 1, fp);
|
|
if ((int) ret < 0) {
|
|
return -1;
|
|
}
|
|
bstr.pos += strlen(&bstr.buffer[bstr.pos]);
|
|
}
|
|
fclose(fp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Fin postfile
|
|
|
|
if (bstr.pos == 0) { // PAS POSTFILE
|
|
// Type de requète?
|
|
if ((search_tag) && (mode == 0)) {
|
|
print_buffer(&bstr, "POST ");
|
|
} else if (mode == 0) { // GET
|
|
print_buffer(&bstr, "GET ");
|
|
} else { // if (mode==1) {
|
|
if (!retour->req.http11) // forcer HTTP/1.0
|
|
print_buffer(&bstr, "GET "); // certains serveurs (cgi) buggent avec HEAD
|
|
else
|
|
print_buffer(&bstr, "HEAD ");
|
|
}
|
|
|
|
// si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom
|
|
if (retour->req.proxy.active && (strncmp(adr, "https://", 8) != 0)) {
|
|
if (!link_has_authority(adr)) { // default http
|
|
#if HDEBUG
|
|
printf("Proxy Use: for %s%s proxy %d port %d\n", adr, fil,
|
|
retour->req.proxy.name, retour->req.proxy.port);
|
|
#endif
|
|
print_buffer(&bstr, "http://%s", jump_identification_const(adr));
|
|
} else { // ftp:// en proxy http
|
|
#if HDEBUG
|
|
printf("Proxy Use for ftp: for %s%s proxy %d port %d\n", adr, fil,
|
|
retour->req.proxy.name, retour->req.proxy.port);
|
|
#endif
|
|
direct_url = 1; // ne pas analyser user/pass
|
|
print_buffer(&bstr, "%s", adr);
|
|
}
|
|
}
|
|
// NOM DU FICHIER
|
|
// on slash doit être présent en début, sinon attention aux bad request! (400)
|
|
if (*fil != '/')
|
|
print_buffer(&bstr, "/");
|
|
|
|
{
|
|
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
|
|
|
|
tempo[0] = '\0';
|
|
if (search_tag)
|
|
strncatbuff(tempo, fil, (int) (search_tag - fil));
|
|
else
|
|
strcpybuff(tempo, fil);
|
|
inplace_escape_check_url(tempo, sizeof(tempo));
|
|
print_buffer(&bstr, "%s", tempo); // avec échappement
|
|
}
|
|
|
|
// protocole
|
|
if (!retour->req.http11) { // forcer HTTP/1.0
|
|
//use_11=0;
|
|
print_buffer(&bstr, " HTTP/1.0\x0d\x0a");
|
|
} else { // Requète 1.1
|
|
//use_11=1;
|
|
print_buffer(&bstr, " HTTP/1.1\x0d\x0a");
|
|
}
|
|
|
|
/* supplemental data */
|
|
if (xsend)
|
|
print_buffer(&bstr, "%s", xsend); // éventuelles autres lignes
|
|
|
|
// for https, auth rides the CONNECT (the tunneled GET would leak it)
|
|
if (retour->req.proxy.active && strncmp(adr, "https://", 8) != 0) {
|
|
if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy!
|
|
const char *a = jump_identification_const(retour->req.proxy.name);
|
|
const char *astart = jump_protocol_const(retour->req.proxy.name);
|
|
char autorisation[1100];
|
|
char user_pass[256];
|
|
|
|
autorisation[0] = user_pass[0] = '\0';
|
|
//
|
|
strncatbuff(user_pass, astart, (int) (a - astart) - 1);
|
|
strcpybuff(user_pass, unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), user_pass));
|
|
code64((unsigned char *) user_pass, (int) strlen(user_pass),
|
|
(unsigned char *) autorisation, 0);
|
|
print_buffer(&bstr, "Proxy-Authorization: Basic %s"H_CRLF,
|
|
autorisation);
|
|
#if HDEBUG
|
|
printf("Proxy-Authenticate, %s (code: %s)\n", user_pass, autorisation);
|
|
#endif
|
|
}
|
|
}
|
|
// Referer?
|
|
if (referer_adr != NULL && referer_fil != NULL && strnotempty(referer_adr)
|
|
&& strnotempty(referer_fil)
|
|
) { // non vide
|
|
if ((strcmp(referer_adr, "file://") != 0)
|
|
&& ( /* no https referer to http urls */
|
|
(strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */
|
|
||(strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
|
|
)
|
|
) { // PAS file://
|
|
print_buffer(&bstr, "Referer: http://%s%s"H_CRLF,
|
|
jump_identification_const(referer_adr), referer_fil);
|
|
}
|
|
}
|
|
// HTTP field: referer
|
|
else if (strnotempty(retour->req.referer)) {
|
|
print_buffer(&bstr, "Referer: %s"H_CRLF, retour->req.referer);
|
|
}
|
|
// POST?
|
|
if (mode == 0) { // GET!
|
|
if (search_tag) {
|
|
print_buffer(&bstr, "Content-length: %d" H_CRLF,
|
|
(int) (strlen
|
|
(unescape_http
|
|
(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
|
search_tag + strlen(POSTTOK) + 1))));
|
|
}
|
|
}
|
|
// send stored cookies matching this host/path
|
|
if (cookie) {
|
|
append_cookie_header(&bstr, cookie, jump_identification_const(adr), fil);
|
|
}
|
|
// gérer le keep-alive (garder socket)
|
|
if (retour->req.http11 && !retour->req.nokeepalive) {
|
|
print_buffer(&bstr, "Connection: keep-alive" H_CRLF);
|
|
} else {
|
|
print_buffer(&bstr, "Connection: close" H_CRLF);
|
|
}
|
|
|
|
{
|
|
const char *real_adr = jump_identification_const(adr);
|
|
|
|
// Mandatory per RFC2616
|
|
if (!direct_url) { // pas ftp:// par exemple
|
|
print_buffer(&bstr, "Host: %s"H_CRLF, real_adr);
|
|
}
|
|
|
|
// HTTP field: from
|
|
if (strnotempty(retour->req.from)) { // HTTP from
|
|
print_buffer(&bstr, "From: %s" H_CRLF, retour->req.from);
|
|
}
|
|
|
|
// Présence d'un user-agent?
|
|
if (retour->req.user_agent_send
|
|
&& strnotempty(retour->req.user_agent)) {
|
|
print_buffer(&bstr, "User-Agent: %s" H_CRLF, retour->req.user_agent);
|
|
}
|
|
|
|
// Accept
|
|
if (strnotempty(retour->req.accept)) {
|
|
print_buffer(&bstr, "Accept: %s" H_CRLF, retour->req.accept);
|
|
}
|
|
|
|
// Accept-language
|
|
if (strnotempty(retour->req.lang_iso)) {
|
|
print_buffer(&bstr, "Accept-Language: %s"H_CRLF, retour->req.lang_iso);
|
|
}
|
|
|
|
// Compression accepted ?
|
|
if (retour->req.http11) {
|
|
#if HTS_USEZLIB
|
|
if ((!retour->req.range_used)
|
|
&& (!retour->req.nocompression))
|
|
print_buffer(&bstr, "Accept-Encoding: " "gzip" /* gzip if the preffered encoding */
|
|
", " "identity;q=0.9" H_CRLF);
|
|
else
|
|
print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
|
|
#else
|
|
print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
|
|
#endif
|
|
}
|
|
|
|
/* Authentification */
|
|
{
|
|
char autorisation[1100];
|
|
const char *a;
|
|
|
|
autorisation[0] = '\0';
|
|
if (link_has_authorization(adr)) { // ohh une authentification!
|
|
const char *a = jump_identification_const(adr);
|
|
const char *astart = jump_protocol_const(adr);
|
|
|
|
if (!direct_url) { // pas ftp:// par exemple
|
|
char user_pass[256];
|
|
|
|
user_pass[0] = '\0';
|
|
strncatbuff(user_pass, astart, (int) (a - astart) - 1);
|
|
strcpybuff(user_pass,
|
|
unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), user_pass));
|
|
code64((unsigned char *) user_pass, (int) strlen(user_pass),
|
|
(unsigned char *) autorisation, 0);
|
|
if (strcmp(fil, "/robots.txt")) /* pas robots.txt */
|
|
bauth_add(cookie, astart, fil, autorisation);
|
|
}
|
|
} else if ((a = bauth_check(cookie, real_adr, fil)))
|
|
strcpybuff(autorisation, a);
|
|
/* On a une autorisation a donner? */
|
|
if (strnotempty(autorisation)) {
|
|
print_buffer(&bstr, "Authorization: Basic %s"H_CRLF, autorisation);
|
|
}
|
|
}
|
|
|
|
}
|
|
//strcatbuff(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
|
|
|
|
// Custom header(s)
|
|
if (strnotempty(retour->req.headers)) {
|
|
print_buffer(&bstr, "%s", retour->req.headers);
|
|
}
|
|
|
|
// CRLF de fin d'en tête
|
|
print_buffer(&bstr, H_CRLF);
|
|
|
|
// données complémentaires?
|
|
if (search_tag)
|
|
if (mode == 0) // GET!
|
|
print_buffer(&bstr, "%s",
|
|
unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
|
|
search_tag + strlen(POSTTOK) + 1));
|
|
}
|
|
#if HDEBUG
|
|
#endif
|
|
if (_DEBUG_HEAD) {
|
|
if (ioinfo) {
|
|
fprintf(ioinfo, "[%d] request for %s%s:\r\n", retour->debugid,
|
|
jump_identification_const(adr), fil);
|
|
fprintfio(ioinfo, bstr.buffer, "<<< ");
|
|
fprintf(ioinfo, "\r\n");
|
|
fflush(ioinfo);
|
|
}
|
|
} // Fin test pas postfile
|
|
//
|
|
|
|
// Callback
|
|
{
|
|
int test_head =
|
|
RUN_CALLBACK6(opt, sendhead, bstr.buffer, adr, fil, referer_adr, referer_fil,
|
|
retour);
|
|
if (test_head != 1) {
|
|
deletesoc_r(retour);
|
|
strcpybuff(retour->msg, "Header refused by external wrapper");
|
|
retour->soc = INVALID_SOCKET;
|
|
}
|
|
}
|
|
|
|
// Envoi
|
|
HTS_STAT.last_request = mtime_local();
|
|
if (sendc(retour, bstr.buffer) < 0) { // ERREUR, socket rompue?...
|
|
deletesoc_r(retour); // fermer tout de même
|
|
// et tenter de reconnecter
|
|
|
|
strcpybuff(retour->msg, "Write error");
|
|
retour->soc = INVALID_SOCKET;
|
|
}
|
|
|
|
// RX'98
|
|
return 0;
|
|
}
|
|
|
|
// traiter 1ere ligne d'en tête
|
|
void treatfirstline(htsblk * retour, const char *rcvd) {
|
|
const char *a = rcvd;
|
|
|
|
// exemple:
|
|
// HTTP/1.0 200 OK
|
|
if (*a) {
|
|
// note: certains serveurs buggés renvoient HTTP/1.0\n200 OK ou " HTTP/1.0 200 OK"
|
|
while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
|
|
a++; // épurer espaces au début
|
|
if (strfield(a, "HTTP/")) {
|
|
// sauter HTTP/1.x
|
|
while((*a != ' ') && (*a != '\0') && (*a != 10) && (*a != 13)
|
|
&& (*a != 9))
|
|
a++;
|
|
if (*a != '\0') {
|
|
while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
|
|
a++; // épurer espaces
|
|
if ((*a >= '0') && (*a <= '9')) {
|
|
sscanf(a, "%d", &(retour->statuscode));
|
|
// sauter 200
|
|
while((*a != ' ') && (*a != '\0') && (*a != 10) && (*a != 13)
|
|
&& (*a != 9))
|
|
a++;
|
|
while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
|
|
a++; // épurer espaces
|
|
if ((strlen(a) > 1) && (strlen(a) < 64)) // message retour
|
|
strcpybuff(retour->msg, a);
|
|
else
|
|
infostatuscode(retour->msg, retour->statuscode);
|
|
// type MIME par défaut2
|
|
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
|
} else { // pas de code!
|
|
retour->statuscode = STATUSCODE_INVALID;
|
|
strcpybuff(retour->msg, "Unknown response structure");
|
|
}
|
|
} else { // euhh??
|
|
retour->statuscode = STATUSCODE_INVALID;
|
|
strcpybuff(retour->msg, "Unknown response structure");
|
|
}
|
|
} else {
|
|
if (*a == '<') {
|
|
/* This is dirty .. */
|
|
retour->statuscode = HTTP_OK;
|
|
retour->keep_alive = 0;
|
|
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
|
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
|
} else if (strnotempty(a)) {
|
|
retour->statuscode = STATUSCODE_INVALID;
|
|
strcpybuff(retour->msg, "Unknown (not HTTP/xx) response structure");
|
|
} else {
|
|
/* This is dirty .. */
|
|
retour->statuscode = HTTP_OK;
|
|
retour->keep_alive = 0;
|
|
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
|
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
|
}
|
|
}
|
|
} else { // vide!
|
|
/*
|
|
retour->statuscode=STATUSCODE_INVALID;
|
|
strcpybuff(retour->msg,"Empty reponse or internal error");
|
|
*/
|
|
/* This is dirty .. */
|
|
retour->statuscode = HTTP_OK;
|
|
strcpybuff(retour->msg, "Unknown, assuming junky server");
|
|
strcpybuff(retour->contenttype, HTS_UNKNOWN_MIME);
|
|
}
|
|
}
|
|
|
|
// traiter ligne par ligne l'en tête
|
|
// gestion des cookies
|
|
void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * retour,
|
|
char *rcvd) {
|
|
int p;
|
|
|
|
if ((p = strfield(rcvd, "Content-length:")) != 0) {
|
|
#if HDEBUG
|
|
printf("ok, Content-length: détecté\n");
|
|
#endif
|
|
if (sscanf(rcvd + p, LLintP, &(retour->totalsize)) == 1) {
|
|
if (retour->totalsize == 0) {
|
|
retour->empty = 1;
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Content-Disposition:")) != 0) {
|
|
while(is_realspace(*(rcvd + p)))
|
|
p++; // sauter espaces
|
|
if ((int) strlen(rcvd + p) < 250) { // pas trop long?
|
|
char tmp[256];
|
|
char *a = NULL, *b = NULL;
|
|
|
|
strcpybuff(tmp, rcvd + p);
|
|
a = strstr(tmp, "filename=");
|
|
if (a) {
|
|
a += strlen("filename=");
|
|
while(is_space(*a))
|
|
a++;
|
|
//a=strchr(a,'"');
|
|
if (a) {
|
|
char *c = NULL;
|
|
|
|
//a++; /* jump " */
|
|
while((c = strchr(a, '/'))) /* skip all / (see RFC2616) */
|
|
a = c + 1;
|
|
//b=strchr(a+1,'"');
|
|
b = a + strlen(a) - 1;
|
|
while(is_space(*b))
|
|
b--;
|
|
b++;
|
|
if (b) {
|
|
*b = '\0';
|
|
if ((int) strlen(a) < 200) { // pas trop long?
|
|
strcpybuff(retour->cdispo, a);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Last-Modified:")) != 0) {
|
|
while(is_realspace(*(rcvd + p)))
|
|
p++; // sauter espaces
|
|
if ((int) strlen(rcvd + p) < 64) { // pas trop long?
|
|
//struct tm* tm_time=convert_time_rfc822(rcvd+p);
|
|
strcpybuff(retour->lastmodified, rcvd + p);
|
|
}
|
|
} else if ((p = strfield(rcvd, "Date:")) != 0) {
|
|
if (strnotempty(retour->lastmodified) == 0) { /* pas encore de last-modified */
|
|
while(is_realspace(*(rcvd + p)))
|
|
p++; // sauter espaces
|
|
if ((int) strlen(rcvd + p) < 64) { // pas trop long?
|
|
//struct tm* tm_time=convert_time_rfc822(rcvd+p);
|
|
strcpybuff(retour->lastmodified, rcvd + p);
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Etag:")) != 0) { /* Etag */
|
|
if (retour) {
|
|
while(is_realspace(*(rcvd + p)))
|
|
p++; // sauter espaces
|
|
if ((int) strlen(rcvd + p) < 64) // pas trop long?
|
|
strcpybuff(retour->etag, rcvd + p);
|
|
else // erreur.. ignorer
|
|
retour->etag[0] = '\0';
|
|
}
|
|
}
|
|
// else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk!
|
|
else if ((p = strfield(rcvd, "Transfer-Encoding:")) != 0) { // chunk!
|
|
while(is_realspace(*(rcvd + p)))
|
|
p++; // sauter espaces
|
|
if (strfield(rcvd + p, "chunked")) {
|
|
retour->is_chunk = 1; // chunked
|
|
//retour->http11=2; // chunked
|
|
#if HDEBUG
|
|
printf("ok, Transfer-Encoding: détecté\n");
|
|
#endif
|
|
}
|
|
} else if ((p = strfield(rcvd, "Content-type:")) != 0) {
|
|
if (retour) {
|
|
char tempo[1100];
|
|
|
|
// éviter les text/html; charset=foo
|
|
{
|
|
char *a = strchr(rcvd + p, ';');
|
|
|
|
if (a) { // extended information
|
|
*a = '\0';
|
|
a++;
|
|
while(is_space(*a))
|
|
a++;
|
|
if (strfield(a, "charset")) {
|
|
a += 7;
|
|
while(is_space(*a))
|
|
a++;
|
|
if (*a == '=') {
|
|
a++;
|
|
while(is_space(*a))
|
|
a++;
|
|
if (*a == '\"')
|
|
a++;
|
|
while(is_space(*a))
|
|
a++;
|
|
if (*a) {
|
|
char *chs = a;
|
|
|
|
while(*a && !is_space(*a) && *a != '\"' && *a != ';')
|
|
a++;
|
|
*a = '\0';
|
|
if (*chs) {
|
|
if (strlen(chs) < sizeof(retour->charset) - 2) {
|
|
strcpybuff(retour->charset, chs);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// An empty/whitespace Content-Type value yields no token: keep the
|
|
// sentinel default rather than reading an uninitialized tempo.
|
|
if (sscanf(rcvd + p, "%s", tempo) == 1) {
|
|
if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
|
|
strcpybuff(retour->contenttype, tempo);
|
|
else
|
|
strcpybuff(retour->contenttype,
|
|
"application/octet-stream-unknown"); // erreur
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
|
|
// Content-Range: bytes 0-70870/70871
|
|
const char *a;
|
|
|
|
for(a = rcvd + p; is_space(*a); a++) ;
|
|
if (strncasecmp(a, "bytes ", 6) == 0) {
|
|
for(a += 6; is_space(*a); a++) ;
|
|
if (sscanf
|
|
(a, LLintP "-" LLintP "/" LLintP, &retour->crange_start,
|
|
&retour->crange_end, &retour->crange) != 3) {
|
|
retour->crange_start = 0;
|
|
retour->crange_end = 0;
|
|
retour->crange = 0;
|
|
a = strchr(rcvd + p, '/');
|
|
if (a != NULL) {
|
|
a++;
|
|
if (sscanf(a, LLintP, &retour->crange) == 1) {
|
|
retour->crange_start = 0;
|
|
retour->crange_end = retour->crange - 1;
|
|
} else {
|
|
retour->crange = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Connection:")) != 0) {
|
|
char *a = rcvd + p;
|
|
|
|
while(is_space(*a))
|
|
a++;
|
|
if (*a) {
|
|
if (strfield(a, "Keep-Alive")) {
|
|
if (!retour->keep_alive) {
|
|
retour->keep_alive_max = 10;
|
|
retour->keep_alive_t = 15;
|
|
}
|
|
retour->keep_alive = 1;
|
|
} else {
|
|
retour->keep_alive = 0;
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Keep-Alive:")) != 0) {
|
|
char *a = rcvd + p;
|
|
|
|
while(is_space(*a))
|
|
a++;
|
|
if (*a) {
|
|
char *p;
|
|
|
|
retour->keep_alive = 1;
|
|
retour->keep_alive_max = 10;
|
|
retour->keep_alive_t = 15;
|
|
if ((p = strstr(a, "timeout="))) {
|
|
p += strlen("timeout=");
|
|
sscanf(p, "%d", &retour->keep_alive_t);
|
|
}
|
|
if ((p = strstr(a, "max="))) {
|
|
p += strlen("max=");
|
|
sscanf(p, "%d", &retour->keep_alive_max);
|
|
}
|
|
if (retour->keep_alive_max <= 1 || retour->keep_alive_t < 1) {
|
|
retour->keep_alive = 0;
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "TE:")) != 0) {
|
|
char *a = rcvd + p;
|
|
|
|
while(is_space(*a))
|
|
a++;
|
|
if (*a) {
|
|
if (strfield(a, "trailers")) {
|
|
retour->keep_alive_trailers = 1;
|
|
}
|
|
}
|
|
} else if ((p = strfield(rcvd, "Content-Encoding:")) != 0) {
|
|
if (retour) {
|
|
char tempo[1100];
|
|
char *a = rcvd + p;
|
|
|
|
while(is_space(*a))
|
|
a++;
|
|
{
|
|
char *a = strchr(rcvd + p, ';');
|
|
|
|
if (a)
|
|
*a = '\0';
|
|
}
|
|
sscanf(a, "%s", tempo);
|
|
if (strlen(tempo) < 64) // pas trop long!!
|
|
strcpybuff(retour->contentencoding, tempo);
|
|
else
|
|
retour->contentencoding[0] = '\0'; // erreur
|
|
#if HTS_USEZLIB
|
|
/* Check known encodings */
|
|
if (retour->contentencoding[0]) {
|
|
if ((strfield2(retour->contentencoding, "gzip"))
|
|
|| (strfield2(retour->contentencoding, "x-gzip"))
|
|
/*
|
|
|| (strfield2(retour->contentencoding, "compress"))
|
|
|| (strfield2(retour->contentencoding, "x-compress"))
|
|
*/
|
|
|| (strfield2(retour->contentencoding, "deflate"))
|
|
|| (strfield2(retour->contentencoding, "x-deflate"))
|
|
) {
|
|
retour->compressed = 1;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
} else if ((p = strfield(rcvd, "Location:")) != 0) {
|
|
if (retour) {
|
|
if (retour->location) {
|
|
while(is_realspace(*(rcvd + p)))
|
|
p++; // sauter espaces
|
|
if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // not too long?
|
|
/* location aliases location_buffer[HTS_URLMAXSIZE * 2] */
|
|
strlcpybuff(retour->location, rcvd + p, HTS_URLMAXSIZE * 2);
|
|
else // erreur.. ignorer
|
|
retour->location[0] = '\0';
|
|
}
|
|
}
|
|
} else if (((p = strfield(rcvd, "Set-Cookie:")) != 0) && (cookie)) { // ohh un cookie
|
|
char *a = rcvd + p; // pointeur
|
|
char domain[256]; // domaine cookie (.netscape.com)
|
|
char path[256]; // chemin (/)
|
|
char cook_name[256]; // nom cookie (MYCOOK)
|
|
char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234)
|
|
|
|
#if DEBUG_COOK
|
|
printf("set-cookie detected\n");
|
|
#endif
|
|
while(*a) {
|
|
char *token_st, *token_end;
|
|
char *value_st, *value_end;
|
|
char name[256];
|
|
char BIGSTK value[8192];
|
|
int next = 0;
|
|
|
|
name[0] = value[0] = '\0';
|
|
//
|
|
|
|
// initialiser cookie lu actuellement
|
|
if (adr)
|
|
strcpybuff(domain, jump_identification_const(adr)); // domaine
|
|
strcpybuff(path, "/"); // chemin (/)
|
|
strcpybuff(cook_name, ""); // nom cookie (MYCOOK)
|
|
strcpybuff(cook_value, ""); // valeur (ID=toto,S=1234)
|
|
// boucler jusqu'au prochain cookie ou la fin
|
|
do {
|
|
char *start_loop = a;
|
|
|
|
while(is_space(*a))
|
|
a++; // sauter espaces
|
|
token_st = a; // départ token
|
|
while((!is_space(*a)) && (*a) && (*a != ';') && (*a != '='))
|
|
a++; // arrêter si espace, point virgule
|
|
token_end = a;
|
|
while(is_space(*a))
|
|
a++; // sauter espaces
|
|
if (*a == '=') { // name=value
|
|
a++;
|
|
while(is_space(*a))
|
|
a++; // sauter espaces
|
|
value_st = a;
|
|
while((*a != ';') && (*a))
|
|
a++; // prochain ;
|
|
//while( ((*a!='"') || (*(a-1)=='\\')) && (*a)) a++; // prochain " (et pas \")
|
|
value_end = a;
|
|
//if (*a==';') { // finit par un ;
|
|
// vérifier débordements
|
|
if ((((int) (token_end - token_st)) < 200)
|
|
&& (((int) (value_end - value_st)) < 8000)
|
|
&& (((int) (token_end - token_st)) > 0)
|
|
&& (((int) (value_end - value_st)) > 0)) {
|
|
int name_len = (int) (token_end - token_st);
|
|
int value_len = (int) (value_end - value_st);
|
|
|
|
name[0] = '\0';
|
|
value[0] = '\0';
|
|
strncatbuff(name, token_st, name_len);
|
|
strncatbuff(value, value_st, value_len);
|
|
#if DEBUG_COOK
|
|
printf("detected cookie-av: name=\"%s\" value=\"%s\"\n", name,
|
|
value);
|
|
#endif
|
|
if (strfield2(name, "domain")) {
|
|
if (value_len < sizeof(domain) - 1) {
|
|
strcpybuff(domain, value);
|
|
} else {
|
|
cook_name[0] = 0;
|
|
break;
|
|
}
|
|
} else if (strfield2(name, "path")) {
|
|
if (value_len < sizeof(path) - 1) {
|
|
strcpybuff(path, value);
|
|
} else {
|
|
cook_name[0] = 0;
|
|
break;
|
|
}
|
|
} else if (strfield2(name, "max-age")) {
|
|
// ignoré..
|
|
} else if (strfield2(name, "expires")) {
|
|
// ignoré..
|
|
} else if (strfield2(name, "version")) {
|
|
// ignoré..
|
|
} else if (strfield2(name, "comment")) {
|
|
// ignoré
|
|
} else if (strfield2(name, "secure")) { // ne devrait pas arriver ici
|
|
// ignoré
|
|
} else {
|
|
if (value_len < sizeof(cook_value) - 1
|
|
&& name_len < sizeof(cook_name) - 1) {
|
|
if (strnotempty(cook_name) == 0) { // noter premier: nom et valeur cookie
|
|
strcpybuff(cook_name, name);
|
|
strcpybuff(cook_value, value);
|
|
} else { // prochain cookie
|
|
a = start_loop; // on devra recommencer à cette position
|
|
next = 1; // enregistrer
|
|
}
|
|
} else {
|
|
cook_name[0] = 0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!next) {
|
|
while((*a != ';') && (*a))
|
|
a++; // prochain
|
|
while(*a == ';')
|
|
a++; // sauter ;
|
|
}
|
|
} while((*a) && (!next));
|
|
if (strnotempty(cook_name)) { // cookie?
|
|
#if DEBUG_COOK
|
|
printf
|
|
("new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",
|
|
cook_name, cook_value, domain, path);
|
|
#endif
|
|
cookie_add(cookie, cook_name, cook_value, domain, path);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// HTTP status code -> reason phrase (per RFC), or NULL if unknown.
|
|
HTSEXT_API const char *infostatuscode_const(int statuscode) {
|
|
// O(1) dispatch (the compiler builds a jump table); the phrases are static.
|
|
switch (statuscode) {
|
|
case 100:
|
|
return "Continue";
|
|
case 101:
|
|
return "Switching Protocols";
|
|
case 200:
|
|
return "OK";
|
|
case 201:
|
|
return "Created";
|
|
case 202:
|
|
return "Accepted";
|
|
case 203:
|
|
return "Non-Authoritative Information";
|
|
case 204:
|
|
return "No Content";
|
|
case 205:
|
|
return "Reset Content";
|
|
case 206:
|
|
return "Partial Content";
|
|
case 300:
|
|
return "Multiple Choices";
|
|
case 301:
|
|
return "Moved Permanently";
|
|
case 302:
|
|
return "Moved Temporarily";
|
|
case 303:
|
|
return "See Other";
|
|
case 304:
|
|
return "Not Modified";
|
|
case 305:
|
|
return "Use Proxy";
|
|
case 306:
|
|
return "Undefined 306 error";
|
|
case 307:
|
|
return "Temporary Redirect";
|
|
case 400:
|
|
return "Bad Request";
|
|
case 401:
|
|
return "Unauthorized";
|
|
case 402:
|
|
return "Payment Required";
|
|
case 403:
|
|
return "Forbidden";
|
|
case 404:
|
|
return "Not Found";
|
|
case 405:
|
|
return "Method Not Allowed";
|
|
case 406:
|
|
return "Not Acceptable";
|
|
case 407:
|
|
return "Proxy Authentication Required";
|
|
case 408:
|
|
return "Request Time-out";
|
|
case 409:
|
|
return "Conflict";
|
|
case 410:
|
|
return "Gone";
|
|
case 411:
|
|
return "Length Required";
|
|
case 412:
|
|
return "Precondition Failed";
|
|
case 413:
|
|
return "Request Entity Too Large";
|
|
case 414:
|
|
return "Request-URI Too Large";
|
|
case 415:
|
|
return "Unsupported Media Type";
|
|
case 416:
|
|
return "Requested Range Not Satisfiable";
|
|
case 417:
|
|
return "Expectation Failed";
|
|
case 500:
|
|
return "Internal Server Error";
|
|
case 501:
|
|
return "Not Implemented";
|
|
case 502:
|
|
return "Bad Gateway";
|
|
case 503:
|
|
return "Service Unavailable";
|
|
case 504:
|
|
return "Gateway Time-out";
|
|
case 505:
|
|
return "HTTP Version Not Supported";
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// Write the status code's reason phrase into msg. For an unknown code, keep any
|
|
// caller-provided message, otherwise fall back to a default. Callers provide a
|
|
// buffer of at least 64 bytes (the longest reason phrase is 31).
|
|
HTSEXT_API void infostatuscode(char *msg, int statuscode) {
|
|
const char *const text = infostatuscode_const(statuscode);
|
|
|
|
if (text != NULL) {
|
|
strlcpybuff(msg, text, 64);
|
|
} else if (strnotempty(msg) == 0) {
|
|
strlcpybuff(msg, "Unknown error", 64);
|
|
}
|
|
}
|
|
|
|
// check if data is available
|
|
int check_readinput(htsblk * r) {
|
|
if (r->soc != INVALID_SOCKET) {
|
|
fd_set fds; // poll structures
|
|
struct timeval tv; // structure for select
|
|
const int soc = (int) r->soc;
|
|
|
|
assertf(soc == r->soc);
|
|
FD_ZERO(&fds);
|
|
FD_SET(soc, &fds);
|
|
tv.tv_sec = 0;
|
|
tv.tv_usec = 0;
|
|
select(soc + 1, &fds, NULL, NULL, &tv);
|
|
if (FD_ISSET(soc, &fds))
|
|
return 1;
|
|
else
|
|
return 0;
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
// check if data is available
|
|
int check_readinput_t(T_SOC soc, int timeout) {
|
|
if (soc != INVALID_SOCKET) {
|
|
fd_set fds; // poll structures
|
|
struct timeval tv; // structure for select
|
|
const int isoc = (int) soc;
|
|
|
|
assertf(isoc == soc);
|
|
FD_ZERO(&fds);
|
|
FD_SET(isoc, &fds);
|
|
tv.tv_sec = timeout;
|
|
tv.tv_usec = 0;
|
|
select(isoc + 1, &fds, NULL, NULL, &tv);
|
|
if (FD_ISSET(isoc, &fds))
|
|
return 1;
|
|
else
|
|
return 0;
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
// wait until the socket is writable, up to timeout seconds
|
|
int check_writeinput_t(T_SOC soc, int timeout) {
|
|
if (soc != INVALID_SOCKET) {
|
|
fd_set fds;
|
|
struct timeval tv;
|
|
const int isoc = (int) soc;
|
|
|
|
assertf(isoc == soc);
|
|
FD_ZERO(&fds);
|
|
FD_SET(isoc, &fds);
|
|
tv.tv_sec = timeout;
|
|
tv.tv_usec = 0;
|
|
select(isoc + 1, NULL, &fds, NULL, &tv);
|
|
return FD_ISSET(isoc, &fds) ? 1 : 0;
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
// idem, sauf qu'ici on peut choisir la taille max de données à recevoir
|
|
// SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
|
|
// en éliminant les cr (ex: header), arrêt si double-lf
|
|
// SI bufl==-1 alors le buffer est censé être de 8kos, et on recoit ligne par ligne
|
|
// en éliminant les cr (ex: header), arrêt si double-lf
|
|
// Note: les +1 dans les malloc sont dûs à l'octet nul rajouté en fin de fichier
|
|
LLint http_xfread1(htsblk * r, int bufl) {
|
|
int nl = -1;
|
|
|
|
// EOF
|
|
if (r->totalsize >= 0 && r->size == r->totalsize) {
|
|
return READ_EOF;
|
|
}
|
|
|
|
if (bufl > 0) {
|
|
if (!r->is_write) { // stocker en mémoire
|
|
if (r->totalsize >= 0) { // totalsize déterminé ET ALLOUE
|
|
if (r->adr == NULL) {
|
|
r->adr = (char *) malloct((size_t) r->totalsize + 1);
|
|
r->size = 0;
|
|
}
|
|
if (r->adr != NULL) {
|
|
// lecture
|
|
const size_t req_size = r->totalsize - r->size;
|
|
|
|
nl = req_size > 0 ? hts_read(r, r->adr + ((int) r->size), (int) req_size) : 0; /* NO 32 bit overlow possible here (no 4GB html!) */
|
|
// nouvelle taille
|
|
if (nl >= 0)
|
|
r->size += nl;
|
|
|
|
/*
|
|
if (r->size >= r->totalsize)
|
|
nl = -1; // break
|
|
*/
|
|
|
|
r->adr[r->size] = '\0'; // caractère NULL en fin au cas où l'on traite des HTML
|
|
}
|
|
|
|
} else { // inconnu..
|
|
// réserver de la mémoire?
|
|
if (r->adr == NULL) {
|
|
#if HDEBUG
|
|
printf("..alloc xfread\n");
|
|
#endif
|
|
r->adr = (char *) malloct(bufl + 1);
|
|
r->size = 0;
|
|
} else {
|
|
#if HDEBUG
|
|
printf("..realloc xfread1\n");
|
|
#endif
|
|
r->adr = (char *) realloct(r->adr, (int) r->size + bufl + 1);
|
|
}
|
|
|
|
if (r->adr != NULL) {
|
|
// lecture
|
|
nl = hts_read(r, r->adr + (int) r->size, bufl);
|
|
if (nl > 0) {
|
|
// resize
|
|
r->adr = (char *) realloct(r->adr, (int) r->size + nl + 1);
|
|
// nouvelle taille
|
|
r->size += nl;
|
|
// octet nul
|
|
if (r->adr)
|
|
r->adr[r->size] = '\0';
|
|
|
|
} // sinon on a fini
|
|
#if HDEBUG
|
|
else if (nl < 0)
|
|
printf("..end read (%d)\n", nl);
|
|
#endif
|
|
}
|
|
#if HDEBUG
|
|
else
|
|
printf("..-> error\n");
|
|
#endif
|
|
}
|
|
|
|
// pas de adr=erreur
|
|
if (r->adr == NULL)
|
|
nl = READ_ERROR;
|
|
|
|
} else { // stocker sur disque
|
|
char *buff;
|
|
|
|
buff = (char *) malloct(bufl);
|
|
if (buff != NULL) {
|
|
// lecture
|
|
nl = hts_read(r, buff, bufl);
|
|
// nouvelle taille
|
|
if (nl > 0) {
|
|
r->size += nl;
|
|
if (fwrite(buff, 1, nl, r->out) != nl) {
|
|
r->statuscode = STATUSCODE_INVALID;
|
|
strcpybuff(r->msg, "Write error on disk");
|
|
nl = READ_ERROR;
|
|
}
|
|
}
|
|
//if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
|
|
// nl=-1; // break
|
|
|
|
// libérer bloc tempo
|
|
freet(buff);
|
|
} else
|
|
nl = READ_ERROR;
|
|
|
|
if ((nl < 0) && (r->out != NULL)) {
|
|
fflush(r->out);
|
|
}
|
|
|
|
} // stockage disque ou mémoire
|
|
|
|
} else if (bufl == -2) { // force reserve
|
|
if (r->adr == NULL) {
|
|
r->adr = (char *) malloct(8192);
|
|
r->size = 0;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
} else { // réception d'un en-tête octet par octet
|
|
int count = 256;
|
|
int tot_nl = 0;
|
|
int lf_detected = 0;
|
|
int at_beginning = 1;
|
|
|
|
do {
|
|
nl = READ_INTERNAL_ERROR;
|
|
count--;
|
|
if (r->adr == NULL) {
|
|
r->adr = (char *) malloct(8192);
|
|
r->size = 0;
|
|
}
|
|
if (r->adr != NULL) {
|
|
if (r->size < 8190) {
|
|
// lecture
|
|
nl = hts_read(r, r->adr + r->size, 1);
|
|
if (nl > 0) {
|
|
// exit if:
|
|
// lf detected AND already detected before
|
|
// or
|
|
// lf detected AND first character read
|
|
if (*(r->adr + r->size) == 10) {
|
|
if (lf_detected || (at_beginning) || (bufl < 0))
|
|
count = -1;
|
|
lf_detected = 1;
|
|
}
|
|
if (*(r->adr + r->size) != 13) { // sauter caractères 13
|
|
if ((*(r->adr + r->size) != 10)
|
|
&& (*(r->adr + r->size) != 13)
|
|
) {
|
|
// restart for new line
|
|
lf_detected = 0;
|
|
}
|
|
(r->size)++;
|
|
at_beginning = 0;
|
|
}
|
|
*(r->adr + r->size) = '\0'; // terminer par octet nul
|
|
}
|
|
}
|
|
}
|
|
if (nl >= 0) {
|
|
tot_nl += nl;
|
|
if (!check_readinput(r))
|
|
count = -1;
|
|
}
|
|
} while((nl >= 0) && (count > 0));
|
|
if (nl >= 0) {
|
|
nl = tot_nl;
|
|
}
|
|
}
|
|
// EOF
|
|
if (r->totalsize >= 0 && r->size == r->totalsize) {
|
|
return READ_EOF;
|
|
} else {
|
|
return nl;
|
|
}
|
|
}
|
|
|
|
// teste si une URL (validité, header, taille)
|
|
// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
|
|
// en cas de moved xx, dans location
|
|
// abandonne désormais au bout de 30 secondes (aurevoir les sites
|
|
// qui nous font poireauter 5 heures..) -> -2=timeout
|
|
htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) {
|
|
T_SOC soc;
|
|
htsblk retour;
|
|
|
|
//int rcvsize=-1;
|
|
//char* rcv=NULL; // adresse de retour
|
|
//int bufl=TAILLE_BUFFER; // 8Ko de buffer
|
|
TStamp tl;
|
|
int timeout = 30; // timeout pour un check (arbitraire) // **
|
|
|
|
// pour abandonner un site trop lent
|
|
tl = time_local();
|
|
|
|
loc[0] = '\0';
|
|
hts_init_htsblk(&retour);
|
|
//memset(&retour, 0, sizeof(htsblk)); // effacer
|
|
retour.location = loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
|
|
|
|
//soc=http_fopen(adr,fil,&retour,NULL); // ouvrir, + header
|
|
|
|
// on ouvre en head, et on traite l'en tête
|
|
soc = http_xfopen(opt, 1, 0, 1, NULL, adr, fil, &retour); // ouvrir HEAD, + envoi header
|
|
|
|
if (soc != INVALID_SOCKET) {
|
|
int e = 0;
|
|
|
|
// tant qu'on a des données, et qu'on ne recoit pas deux LF, et que le timeout n'arrie pas
|
|
do {
|
|
if (http_xfread1(&retour, 0) < 0)
|
|
e = 1;
|
|
else {
|
|
if (retour.adr != NULL) {
|
|
if ((retour.adr[retour.size - 1] != 10)
|
|
|| (retour.adr[retour.size - 2] != 10))
|
|
e = 1;
|
|
}
|
|
}
|
|
|
|
if (!e) {
|
|
if ((time_local() - tl) >= timeout) {
|
|
e = -1;
|
|
}
|
|
}
|
|
|
|
} while(!e);
|
|
|
|
if (e == 1) {
|
|
if (adr != NULL) {
|
|
int ptr = 0;
|
|
char rcvd[1100];
|
|
|
|
// note: en gros recopie du traitement de back_wait()
|
|
//
|
|
|
|
// ----------------------------------------
|
|
// traiter en-tête!
|
|
// status-line à récupérer
|
|
ptr += binput(retour.adr + ptr, rcvd, 1024);
|
|
if (strnotempty(rcvd) == 0)
|
|
ptr += binput(retour.adr + ptr, rcvd, 1024); // "certains serveurs buggés envoient un \n au début" (RFC)
|
|
|
|
// traiter status-line
|
|
treatfirstline(&retour, rcvd);
|
|
|
|
#if HDEBUG
|
|
printf("(Buffer) Status-Code=%d\n", retour.statuscode);
|
|
#endif
|
|
|
|
// en-tête
|
|
|
|
// header // ** !attention! HTTP/0.9 non supporté
|
|
do {
|
|
ptr += binput(retour.adr + ptr, rcvd, 1024);
|
|
#if HDEBUG
|
|
printf("(buffer)>%s\n", rcvd);
|
|
#endif
|
|
if (strnotempty(rcvd))
|
|
treathead(NULL, NULL, NULL, &retour, rcvd); // traiter
|
|
|
|
} while(strnotempty(rcvd));
|
|
// ----------------------------------------
|
|
|
|
// libérer mémoire
|
|
if (retour.adr != NULL) {
|
|
freet(retour.adr);
|
|
retour.adr = NULL;
|
|
}
|
|
}
|
|
} else {
|
|
retour.statuscode = STATUSCODE_TIMEOUT;
|
|
strcpybuff(retour.msg, "Timeout While Testing");
|
|
}
|
|
|
|
#if HTS_DEBUG_CLOSESOCK
|
|
DEBUG_W("http_test: deletehttp\n");
|
|
#endif
|
|
deletehttp(&retour);
|
|
retour.soc = INVALID_SOCKET;
|
|
}
|
|
return retour;
|
|
}
|
|
|
|
// Crée un lien (http) vers une adresse internet iadr
|
|
// retour: structure (adresse, taille, message si erreur (si !adr))
|
|
// peut ouvrir avec des connect() non bloquants: waitconnect=0/1
|
|
T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
|
|
int waitconnect) {
|
|
return newhttp_addr(opt, _iadr, retour, port, waitconnect, 0, NULL);
|
|
}
|
|
|
|
T_SOC newhttp_addr(httrackp *opt, const char *_iadr, htsblk *retour, int port,
|
|
int waitconnect, int addr_index, int *addr_count) {
|
|
T_SOC soc; // descipteur de la socket
|
|
|
|
if (addr_count != NULL) {
|
|
*addr_count = 0;
|
|
}
|
|
|
|
if (strcmp(_iadr, "file://") != 0) { /* non fichier */
|
|
SOCaddr server;
|
|
SOCaddr addrs[HTS_MAXADDRNUM];
|
|
int naddr;
|
|
const char *error = "unknown error";
|
|
|
|
// tester un éventuel id:pass et virer id:pass@ si détecté
|
|
const char *const iadr = jump_identification_const(_iadr);
|
|
const char *resolve_host = iadr;
|
|
char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
|
|
|
|
SOCaddr_clear(server);
|
|
|
|
#if HDEBUG
|
|
printf("gethostbyname\n");
|
|
#endif
|
|
|
|
// tester un éventuel port
|
|
if (port == -1) {
|
|
const char *a = jump_toport_const(iadr);
|
|
|
|
#if HTS_USEOPENSSL
|
|
if (retour->ssl)
|
|
port = 443;
|
|
else
|
|
port = 80; // port par défaut
|
|
#else
|
|
port = 80; // port par défaut
|
|
#endif
|
|
|
|
if (a != NULL) {
|
|
int i = -1;
|
|
|
|
iadr2[0] = '\0';
|
|
sscanf(a + 1, "%d", &i);
|
|
if (i != -1) {
|
|
port = (unsigned short int) i;
|
|
}
|
|
|
|
// adresse véritable (sans :xx)
|
|
strncatbuff(iadr2, iadr, (int) (a - iadr));
|
|
resolve_host = iadr2;
|
|
}
|
|
}
|
|
|
|
// resolve the full address list and pick the requested candidate; the
|
|
// scheduler retries the next index when a connect fails (dead IPv6 etc.)
|
|
naddr =
|
|
hts_dns_resolve_all(opt, resolve_host, addrs, HTS_MAXADDRNUM, &error);
|
|
if (addr_count != NULL) {
|
|
*addr_count = naddr;
|
|
}
|
|
if (addr_index >= 0 && addr_index < naddr) {
|
|
SOCaddr_copy_SOCaddr(server, addrs[addr_index]);
|
|
}
|
|
|
|
if (!SOCaddr_is_valid(server)) {
|
|
#if DEBUG
|
|
printf("erreur gethostbyname\n");
|
|
#endif
|
|
if (retour && retour->msg) {
|
|
#ifdef _WIN32
|
|
snprintf(retour->msg, sizeof(retour->msg),
|
|
"Unable to get server's address: %s", error);
|
|
#else
|
|
snprintf(retour->msg, sizeof(retour->msg),
|
|
"Unable to get server's address: %s", error);
|
|
#endif
|
|
}
|
|
return INVALID_SOCKET;
|
|
}
|
|
|
|
// make a copy for external clients
|
|
SOCaddr_copy_SOCaddr(retour->address, server);
|
|
retour->address_size = SOCaddr_size(retour->address);
|
|
|
|
// créer ("attachement") une socket (point d'accès) internet,en flot
|
|
#if HDEBUG
|
|
printf("socket\n");
|
|
#endif
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("socket\n");
|
|
#endif
|
|
soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
|
|
if (retour != NULL) {
|
|
retour->debugid = HTS_STAT.stat_sockid++;
|
|
}
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("socket()=%d\n" _(int) soc);
|
|
#endif
|
|
if (soc == INVALID_SOCKET) {
|
|
if (retour && retour->msg) {
|
|
#ifdef _WIN32
|
|
int last_errno = WSAGetLastError();
|
|
|
|
sprintf(retour->msg, "Unable to create a socket: %s",
|
|
strerror(last_errno));
|
|
#else
|
|
int last_errno = errno;
|
|
|
|
sprintf(retour->msg, "Unable to create a socket: %s",
|
|
strerror(last_errno));
|
|
#endif
|
|
}
|
|
return INVALID_SOCKET; // erreur création socket impossible
|
|
}
|
|
// bind this address
|
|
if (retour != NULL && strnotempty(retour->req.proxy.bindhost)) {
|
|
const char *error = "unknown error";
|
|
SOCaddr bind_addr;
|
|
|
|
if (hts_dns_resolve2(opt, retour->req.proxy.bindhost,
|
|
&bind_addr, &error) == NULL
|
|
|| bind(soc, &SOCaddr_sockaddr(bind_addr),
|
|
SOCaddr_size(bind_addr)) != 0) {
|
|
if (retour && retour->msg) {
|
|
#ifdef _WIN32
|
|
snprintf(retour->msg, sizeof(retour->msg),
|
|
"Unable to bind the specificied server address: %s",
|
|
error);
|
|
#else
|
|
snprintf(retour->msg, sizeof(retour->msg),
|
|
"Unable to bind the specificied server address: %s",
|
|
error);
|
|
#endif
|
|
}
|
|
deletesoc(soc);
|
|
return INVALID_SOCKET;
|
|
}
|
|
}
|
|
// structure: connexion au domaine internet, port 80 (ou autre)
|
|
SOCaddr_initport(server, port);
|
|
#if HDEBUG
|
|
printf("==%d\n", soc);
|
|
#endif
|
|
|
|
// connexion non bloquante?
|
|
if (!waitconnect) {
|
|
#ifdef _WIN32
|
|
unsigned long p = 1; // non bloquant
|
|
if (ioctlsocket(soc, FIONBIO, &p)) {
|
|
const int last_errno = WSAGetLastError();
|
|
snprintf(retour->msg, sizeof(retour->msg),
|
|
"Non-blocking socket failed: %s", strerror(last_errno));
|
|
deletesoc(soc);
|
|
return INVALID_SOCKET;
|
|
}
|
|
#else
|
|
const int flags = fcntl(soc, F_GETFL, 0);
|
|
if (flags == -1 || fcntl(soc, F_SETFL, flags | O_NONBLOCK) == -1) {
|
|
snprintf(retour->msg, sizeof(retour->msg),
|
|
"Non-blocking socket failed: %s", strerror(errno));
|
|
deletesoc(soc);
|
|
return INVALID_SOCKET;
|
|
}
|
|
#endif
|
|
}
|
|
// Connexion au serveur lui même
|
|
#if HDEBUG
|
|
printf("connect\n");
|
|
#endif
|
|
HTS_STAT.last_connect = mtime_local();
|
|
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("connect\n");
|
|
#endif
|
|
if (connect(soc, &SOCaddr_sockaddr(server), SOCaddr_size(server)) != 0) {
|
|
// bloquant
|
|
if (waitconnect) {
|
|
#if HDEBUG
|
|
printf("unable to connect!\n");
|
|
#endif
|
|
if (retour != NULL && retour->msg) {
|
|
#ifdef _WIN32
|
|
const int last_errno = WSAGetLastError();
|
|
|
|
sprintf(retour->msg, "Unable to connect to the server: %s",
|
|
strerror(last_errno));
|
|
#else
|
|
const int last_errno = errno;
|
|
|
|
sprintf(retour->msg, "Unable to connect to the server: %s",
|
|
strerror(last_errno));
|
|
#endif
|
|
}
|
|
/* Close the socket and notify the error!!! */
|
|
deletesoc(soc);
|
|
return INVALID_SOCKET;
|
|
}
|
|
}
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("connect done\n");
|
|
#endif
|
|
|
|
#if HDEBUG
|
|
printf("connexion établie\n");
|
|
#endif
|
|
|
|
// A partir de maintenant, on peut envoyer et recevoir des données
|
|
// via le flot identifié par soc (socket): write(soc,adr,taille) et
|
|
// read(soc,adr,taille)
|
|
|
|
} else { // on doit ouvrir un fichier local!
|
|
// il sera géré de la même manière qu'une socket (c'est idem!)
|
|
|
|
soc = LOCAL_SOCKET_ID; // pseudo-socket locale..
|
|
// soc sera remplacé lors d'un http_fopen() par un handle véritable!
|
|
|
|
} // teste fichier local ou http
|
|
|
|
return soc;
|
|
}
|
|
|
|
// couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html
|
|
// retour=-1 si erreur.
|
|
// si file://... alors adresse=file:// (et coupe le ?query dans ce cas)
|
|
int ident_url_absolute(const char *url, lien_adrfil *adrfil) {
|
|
int pos = 0;
|
|
int scheme = 0;
|
|
|
|
// effacer adrfil->adr et adrfil->fil
|
|
adrfil->adr[0] = adrfil->fil[0] = '\0';
|
|
|
|
#if HDEBUG
|
|
printf("protocol: %s\n", url);
|
|
#endif
|
|
|
|
// Scheme?
|
|
{
|
|
const char *a = url;
|
|
|
|
while(isalpha((unsigned char) *a))
|
|
a++;
|
|
if (*a == ':')
|
|
scheme = 1;
|
|
}
|
|
|
|
// 1. optional scheme ":"
|
|
if ((pos = strfield(url, "file:"))) { // fichier local!! (pour les tests)
|
|
//!!p+=3;
|
|
strcpybuff(adrfil->adr, "file://");
|
|
} else if ((pos = strfield(url, "http:"))) { // HTTP
|
|
//!!p+=3;
|
|
} else if ((pos = strfield(url, "ftp:"))) { // FTP
|
|
strcpybuff(adrfil->adr, "ftp://"); // FTP!!
|
|
//!!p+=3;
|
|
#if HTS_USEOPENSSL
|
|
} else if ((pos = strfield(url, "https:"))) { // HTTPS
|
|
strcpybuff(adrfil->adr, "https://");
|
|
#endif
|
|
} else if (scheme) {
|
|
return -1; // erreur non reconnu
|
|
} else
|
|
pos = 0;
|
|
|
|
// 2. optional "//" authority
|
|
if (strncmp(url + pos, "//", 2) == 0)
|
|
pos += 2;
|
|
|
|
// (url+pos) now points to the path (not net path)
|
|
|
|
//## if (adrfil->adr[0]!=lOCAL_CHAR) { // adrfil->adresse normale http
|
|
if (!strfield(adrfil->adr, "file:")) { // PAS adrfil->file://
|
|
const char *p, *q;
|
|
|
|
p = url + pos;
|
|
|
|
// p pointe sur le début de l'adrfil->adresse, ex: www.truc.fr/sommaire/index.html
|
|
q = strchr(jump_identification_const(p), '/');
|
|
if (q == 0)
|
|
q = strchr(jump_identification_const(p), '?'); // http://www.foo.com?bar=1
|
|
if (q == 0)
|
|
q = p + strlen(p); // pointe sur \0
|
|
// q pointe sur le chemin, ex: index.html?query=recherche
|
|
|
|
// chemin www... trop long!!
|
|
if ((((int) (q - p))) > HTS_URLMAXSIZE) {
|
|
//strcpybuff(retour.msg,"Path too long");
|
|
return -1; // erreur
|
|
}
|
|
// recopier adrfil->adresse www..
|
|
strncatbuff(adrfil->adr, p, ((int) (q - p)));
|
|
// *( adrfil->adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
|
|
// recopier chemin /pub/..
|
|
if (q[0] != '/') // page par défaut (/)
|
|
strcatbuff(adrfil->fil, "/");
|
|
strcatbuff(adrfil->fil, q);
|
|
// SECURITE:
|
|
// simplifier url pour les ../
|
|
fil_simplifie(adrfil->fil);
|
|
} else { // localhost adrfil->file://
|
|
const char *p;
|
|
size_t i;
|
|
char *a;
|
|
|
|
p = url + pos;
|
|
if (*p == '/' || *p == '\\') { /* adrfil->file:///.. */
|
|
strcatbuff(adrfil->fil, p); // fichier local ; adrfil->adr="#"
|
|
} else {
|
|
if (p[1] != ':') {
|
|
strcatbuff(adrfil->fil, "//"); /* adrfil->file://server/foo */
|
|
strcatbuff(adrfil->fil, p);
|
|
} else {
|
|
strcatbuff(adrfil->fil, p); // adrfil->file://C:\..
|
|
}
|
|
}
|
|
|
|
a = strchr(adrfil->fil, '?');
|
|
if (a)
|
|
*a = '\0'; /* couper query (inutile pour adrfil->file:// lors de la requête) */
|
|
// adrfil->filtrer les \\ -> / pour les fichiers DOS
|
|
for(i = 0; adrfil->fil[i] != '\0'; i++)
|
|
if (adrfil->fil[i] == '\\')
|
|
adrfil->fil[i] = '/';
|
|
// collapse ../ like the http branch above (path-traversal safety)
|
|
fil_simplifie(adrfil->fil);
|
|
}
|
|
|
|
// no hostname
|
|
if (!strnotempty(adrfil->adr))
|
|
return -1; // erreur non reconnu
|
|
|
|
// nommer au besoin.. (non utilisé normalement)
|
|
if (!strnotempty(adrfil->fil))
|
|
strcpybuff(adrfil->fil, "default-index.html");
|
|
|
|
// case insensitive pour adrfil->adresse
|
|
{
|
|
char *a = jump_identification(adrfil->adr);
|
|
|
|
while(*a) {
|
|
if ((*a >= 'A') && (*a <= 'Z'))
|
|
*a += 'a' - 'A';
|
|
a++;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* simplify ../ and ./ */
|
|
void fil_simplifie(char *f) {
|
|
char *a, *b;
|
|
char *rollback[128];
|
|
int rollid = 0;
|
|
char lc = '/';
|
|
int query = 0;
|
|
int wasAbsolute = (*f == '/');
|
|
|
|
for(a = b = f; *a != '\0';) {
|
|
if (*a == '?')
|
|
query = 1;
|
|
if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '/') { /* foo/./bar or ./foo */
|
|
a += 2;
|
|
} else if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '.' && (a[2] == '/' || a[2] == '\0')) { /* foo/../bar or ../foo or .. */
|
|
if (a[2] == '\0')
|
|
a += 2;
|
|
else
|
|
a += 3;
|
|
if (rollid > 1) {
|
|
rollid--;
|
|
b = rollback[rollid - 1];
|
|
} else { /* too many ../ */
|
|
rollid = 0;
|
|
b = f;
|
|
if (wasAbsolute)
|
|
b++; /* after the / */
|
|
}
|
|
} else {
|
|
*b++ = lc = *a;
|
|
if (*a == '/') {
|
|
rollback[rollid++] = b;
|
|
if (rollid >= 127) {
|
|
*f = '\0'; /* ERROR */
|
|
break;
|
|
}
|
|
}
|
|
a++;
|
|
}
|
|
}
|
|
*b = '\0';
|
|
if (*f == '\0') {
|
|
if (wasAbsolute) {
|
|
f[0] = '/';
|
|
f[1] = '\0';
|
|
} else {
|
|
f[0] = '.';
|
|
f[1] = '/';
|
|
f[2] = '\0';
|
|
}
|
|
}
|
|
}
|
|
|
|
// fermer liaison fichier ou socket
|
|
void deletehttp(htsblk * r) {
|
|
#if HTS_DEBUG_CLOSESOCK
|
|
DEBUG_W("deletehttp: (htsblk*) 0x%p\n" _(void *)r);
|
|
#endif
|
|
#if HTS_USEOPENSSL
|
|
/* Free OpenSSL structures */
|
|
if (r->ssl_con) {
|
|
SSL_shutdown(r->ssl_con);
|
|
SSL_free(r->ssl_con);
|
|
r->ssl_con = NULL;
|
|
}
|
|
#endif
|
|
if (r->soc != INVALID_SOCKET) {
|
|
if (r->is_file) {
|
|
if (r->fp)
|
|
fclose(r->fp);
|
|
r->fp = NULL;
|
|
} else {
|
|
if (r->soc != LOCAL_SOCKET_ID)
|
|
deletesoc_r(r);
|
|
}
|
|
r->soc = INVALID_SOCKET;
|
|
}
|
|
}
|
|
|
|
// free the addr buffer
|
|
// always returns 1
|
|
int deleteaddr(htsblk * r) {
|
|
if (r->adr != NULL) {
|
|
freet(r->adr);
|
|
r->adr = NULL;
|
|
}
|
|
if (r->headers != NULL) {
|
|
freet(r->headers);
|
|
r->headers = NULL;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
// fermer une socket
|
|
void deletesoc(T_SOC soc) {
|
|
if (soc != INVALID_SOCKET && soc != LOCAL_SOCKET_ID) {
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("close %d\n" _(int) soc);
|
|
#endif
|
|
#ifdef _WIN32
|
|
if (closesocket(soc) != 0) {
|
|
int err = WSAGetLastError();
|
|
|
|
fprintf(stderr, "* error closing socket %d: %s\n", soc, strerror(err));
|
|
}
|
|
#else
|
|
if (close(soc) != 0) {
|
|
const int err = errno;
|
|
|
|
fprintf(stderr, "* error closing socket %d: %s\n", soc, strerror(err));
|
|
}
|
|
#endif
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W(".. done\n");
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/* Will also clean other things */
|
|
void deletesoc_r(htsblk * r) {
|
|
#if HTS_USEOPENSSL
|
|
if (r->ssl_con) {
|
|
SSL_shutdown(r->ssl_con);
|
|
// SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1);
|
|
SSL_free(r->ssl_con);
|
|
r->ssl_con = NULL;
|
|
}
|
|
#endif
|
|
if (r->soc != INVALID_SOCKET) {
|
|
deletesoc(r->soc);
|
|
r->soc = INVALID_SOCKET;
|
|
}
|
|
}
|
|
|
|
// renvoi le nombre de secondes depuis 1970
|
|
TStamp time_local(void) {
|
|
return ((TStamp) time(NULL));
|
|
}
|
|
|
|
// number of millisec since 1970
|
|
HTSEXT_API TStamp mtime_local(void) {
|
|
#ifndef _WIN32
|
|
struct timeval tv;
|
|
if (gettimeofday(&tv, NULL) != 0) {
|
|
assert(! "gettimeofday");
|
|
}
|
|
|
|
return (TStamp) (((TStamp) tv.tv_sec * (TStamp) 1000) +
|
|
((TStamp) tv.tv_usec / (TStamp) 1000));
|
|
#else
|
|
struct timeb B;
|
|
ftime(&B);
|
|
return (TStamp) (((TStamp) B.time * (TStamp) 1000)
|
|
+ ((TStamp) B.millitm));
|
|
#endif
|
|
}
|
|
|
|
// convertit un nombre de secondes en temps (chaine)
|
|
void sec2str(char *st, TStamp t) {
|
|
int j, h, m, s;
|
|
|
|
j = (int) (t / (3600 * 24));
|
|
t -= ((TStamp) j) * (3600 * 24);
|
|
h = (int) (t / (3600));
|
|
t -= ((TStamp) h) * 3600;
|
|
m = (int) (t / 60);
|
|
t -= ((TStamp) m) * 60;
|
|
s = (int) t;
|
|
|
|
if (j > 0)
|
|
sprintf(st, "%d days, %d hours %d minutes %d seconds", j, h, m, s);
|
|
else if (h > 0)
|
|
sprintf(st, "%d hours %d minutes %d seconds", h, m, s);
|
|
else if (m > 0)
|
|
sprintf(st, "%d minutes %d seconds", m, s);
|
|
else
|
|
sprintf(st, "%d seconds", s);
|
|
}
|
|
|
|
// idem, plus court (chaine)
|
|
HTSEXT_API void qsec2str(char *st, TStamp t) {
|
|
int j, h, m, s;
|
|
|
|
j = (int) (t / (3600 * 24));
|
|
t -= ((TStamp) j) * (3600 * 24);
|
|
h = (int) (t / (3600));
|
|
t -= ((TStamp) h) * 3600;
|
|
m = (int) (t / 60);
|
|
t -= ((TStamp) m) * 60;
|
|
s = (int) t;
|
|
|
|
if (j > 0)
|
|
sprintf(st, "%dd,%02dh,%02dmin%02ds", j, h, m, s);
|
|
else if (h > 0)
|
|
sprintf(st, "%dh,%02dmin%02ds", h, m, s);
|
|
else if (m > 0)
|
|
sprintf(st, "%dmin%02ds", m, s);
|
|
else
|
|
sprintf(st, "%ds", s);
|
|
}
|
|
|
|
// heure actuelle, GMT, format rfc (taille buffer 256o)
|
|
void time_gmt_rfc822(char *s) {
|
|
time_t tt;
|
|
struct tm *A;
|
|
|
|
tt = time(NULL);
|
|
A = gmtime(&tt);
|
|
if (A == NULL)
|
|
A = localtime(&tt);
|
|
time_rfc822(s, A);
|
|
}
|
|
|
|
// heure actuelle, format rfc (taille buffer 256o)
|
|
void time_local_rfc822(char *s) {
|
|
time_t tt;
|
|
struct tm *A;
|
|
|
|
tt = time(NULL);
|
|
A = localtime(&tt);
|
|
time_rfc822_local(s, A);
|
|
}
|
|
|
|
/* convertir une chaine en temps */
|
|
struct tm *convert_time_rfc822(struct tm *result, const char *s) {
|
|
char months[] = "jan feb mar apr may jun jul aug sep oct nov dec";
|
|
char str[256];
|
|
char *a;
|
|
|
|
/* */
|
|
int result_mm = -1;
|
|
int result_dd = -1;
|
|
int result_n1 = -1;
|
|
int result_n2 = -1;
|
|
int result_n3 = -1;
|
|
int result_n4 = -1;
|
|
|
|
/* */
|
|
|
|
if ((int) strlen(s) > 200)
|
|
return NULL;
|
|
strcpybuff(str, s);
|
|
hts_lowcase(str);
|
|
/* éliminer :,- */
|
|
while((a = strchr(str, '-')))
|
|
*a = ' ';
|
|
while((a = strchr(str, ':')))
|
|
*a = ' ';
|
|
while((a = strchr(str, ',')))
|
|
*a = ' ';
|
|
/* tokeniser */
|
|
a = str;
|
|
while(*a) {
|
|
char *first, *last;
|
|
char tok[256];
|
|
|
|
/* découper mot */
|
|
while(*a == ' ')
|
|
a++; /* sauter espaces */
|
|
first = a;
|
|
while((*a) && (*a != ' '))
|
|
a++;
|
|
last = a;
|
|
tok[0] = '\0';
|
|
if (first != last) {
|
|
char *pos;
|
|
|
|
strncatbuff(tok, first, (int) (last - first));
|
|
/* analyser */
|
|
if ((pos = strstr(months, tok))) { /* month always in letters */
|
|
result_mm = ((int) (pos - months)) / 4;
|
|
} else {
|
|
int number;
|
|
|
|
if (sscanf(tok, "%d", &number) == 1) { /* number token */
|
|
if (result_dd < 0) /* day always first number */
|
|
result_dd = number;
|
|
else if (result_n1 < 0)
|
|
result_n1 = number;
|
|
else if (result_n2 < 0)
|
|
result_n2 = number;
|
|
else if (result_n3 < 0)
|
|
result_n3 = number;
|
|
else if (result_n4 < 0)
|
|
result_n4 = number;
|
|
} /* sinon, bruit de fond(+1GMT for exampel) */
|
|
}
|
|
}
|
|
}
|
|
if ((result_n1 >= 0) && (result_mm >= 0) && (result_dd >= 0)
|
|
&& (result_n2 >= 0) && (result_n3 >= 0) && (result_n4 >= 0)) {
|
|
if (result_n4 >= 1000) { /* Sun Nov 6 08:49:37 1994 */
|
|
result->tm_year = result_n4 - 1900;
|
|
result->tm_hour = result_n1;
|
|
result->tm_min = result_n2;
|
|
result->tm_sec = max(result_n3, 0);
|
|
} else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
|
|
result->tm_hour = result_n2;
|
|
result->tm_min = result_n3;
|
|
result->tm_sec = max(result_n4, 0);
|
|
if (result_n1 <= 50) /* 00 means 2000 */
|
|
result->tm_year = result_n1 + 100;
|
|
else if (result_n1 < 1000) /* 99 means 1999 */
|
|
result->tm_year = result_n1;
|
|
else /* 2000 */
|
|
result->tm_year = result_n1 - 1900;
|
|
}
|
|
result->tm_isdst = 0; /* assume GMT */
|
|
result->tm_yday = -1; /* don't know */
|
|
result->tm_wday = -1; /* don't know */
|
|
result->tm_mon = result_mm;
|
|
result->tm_mday = result_dd;
|
|
return result;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static time_t getGMT(struct tm *tm) {
|
|
time_t t = timegm(tm);
|
|
|
|
if (t != (time_t) - 1 && t != (time_t) 0) {
|
|
return (time_t) t;
|
|
}
|
|
return (time_t) -1;
|
|
}
|
|
|
|
/* sets file time. -1 if error */
|
|
/* Note: utf-8 */
|
|
int set_filetime(const char *file, struct tm *tm_time) {
|
|
time_t t = getGMT(tm_time);
|
|
|
|
if (t != (time_t) - 1) {
|
|
STRUCT_UTIMBUF tim;
|
|
|
|
memset(&tim, 0, sizeof(tim));
|
|
tim.actime = tim.modtime = t;
|
|
return UTIME(file, &tim);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* sets file time from RFC822 date+time, -1 if error*/
|
|
/* Note: utf-8 */
|
|
int set_filetime_rfc822(const char *file, const char *date) {
|
|
struct tm buffer;
|
|
struct tm *tm_s = convert_time_rfc822(&buffer, date);
|
|
|
|
if (tm_s) {
|
|
return set_filetime(file, tm_s);
|
|
} else
|
|
return -1;
|
|
}
|
|
|
|
/* Note: utf-8 */
|
|
int get_filetime_rfc822(const char *file, char *date) {
|
|
STRUCT_STAT buf;
|
|
|
|
date[0] = '\0';
|
|
if (STAT(file, &buf) == 0) {
|
|
struct tm *A;
|
|
time_t tt = buf.st_mtime;
|
|
|
|
A = gmtime(&tt);
|
|
if (A == NULL)
|
|
A = localtime(&tt);
|
|
if (A != NULL) {
|
|
time_rfc822(date, A);
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// heure au format rfc (taille buffer 256o)
|
|
void time_rfc822(char *s, struct tm *A) {
|
|
if (A == NULL) {
|
|
int localtime_returned_null = 0;
|
|
|
|
assertf(localtime_returned_null);
|
|
}
|
|
strftime(s, 256, "%a, %d %b %Y %H:%M:%S GMT", A);
|
|
}
|
|
|
|
// heure locale au format rfc (taille buffer 256o)
|
|
void time_rfc822_local(char *s, struct tm *A) {
|
|
if (A == NULL) {
|
|
int localtime_returned_null = 0;
|
|
|
|
assertf(localtime_returned_null);
|
|
}
|
|
strftime(s, 256, "%a, %d %b %Y %H:%M:%S", A);
|
|
}
|
|
|
|
// conversion en b,Kb,Mb
|
|
HTSEXT_API char *int2bytes(strc_int2bytes2 * strc, LLint n) {
|
|
char **a = int2bytes2(strc, n);
|
|
|
|
strcpybuff(strc->catbuff, a[0]);
|
|
strcatbuff(strc->catbuff, a[1]);
|
|
return strc->catbuff;
|
|
}
|
|
|
|
// conversion en b/s,Kb/s,Mb/s
|
|
HTSEXT_API char *int2bytessec(strc_int2bytes2 * strc, long int n) {
|
|
char buff[256];
|
|
char **a = int2bytes2(strc, n);
|
|
|
|
strcpybuff(buff, a[0]);
|
|
strcatbuff(buff, a[1]);
|
|
return concat(strc->catbuff, sizeof(strc->catbuff), buff, "/s");
|
|
}
|
|
HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n) {
|
|
sprintf(strc->buff2, "%d", n);
|
|
return strc->buff2;
|
|
}
|
|
|
|
// conversion en b,Kb,Mb, nombre et type séparés
|
|
// limite: 2.10^9.10^6B
|
|
|
|
/* See http://physics.nist.gov/cuu/Units/binary.html */
|
|
#define ToLLint(a) ((LLint)(a))
|
|
#define ToLLintKiB (ToLLint(1024))
|
|
#define ToLLintMiB (ToLLintKiB*ToLLintKiB)
|
|
#ifdef HTS_LONGLONG
|
|
#define ToLLintGiB (ToLLintKiB*ToLLintKiB*ToLLintKiB)
|
|
#define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
|
|
#define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
|
|
#endif
|
|
HTSEXT_API char **int2bytes2(strc_int2bytes2 * strc, LLint n) {
|
|
if (n < ToLLintKiB) {
|
|
sprintf(strc->buff1, "%d", (int) (LLint) n);
|
|
strcpybuff(strc->buff2, "B");
|
|
} else if (n < ToLLintMiB) {
|
|
sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / ToLLintKiB)),
|
|
(int) ((LLint) ((n % ToLLintKiB) * 100) / ToLLintKiB));
|
|
strcpybuff(strc->buff2, "KiB");
|
|
}
|
|
#ifdef HTS_LONGLONG
|
|
else if (n < ToLLintGiB) {
|
|
sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintMiB))),
|
|
(int) ((LLint) (((n % (ToLLintMiB)) * 100) / (ToLLintMiB))));
|
|
strcpybuff(strc->buff2, "MiB");
|
|
} else if (n < ToLLintTiB) {
|
|
sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintGiB))),
|
|
(int) ((LLint) (((n % (ToLLintGiB)) * 100) / (ToLLintGiB))));
|
|
strcpybuff(strc->buff2, "GiB");
|
|
} else if (n < ToLLintPiB) {
|
|
sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintTiB))),
|
|
(int) ((LLint) (((n % (ToLLintTiB)) * 100) / (ToLLintTiB))));
|
|
strcpybuff(strc->buff2, "TiB");
|
|
} else {
|
|
sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintPiB))),
|
|
(int) ((LLint) (((n % (ToLLintPiB)) * 100) / (ToLLintPiB))));
|
|
strcpybuff(strc->buff2, "PiB");
|
|
}
|
|
#else
|
|
else {
|
|
sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintMiB))),
|
|
(int) ((LLint) (((n % (ToLLintMiB)) * 100) / (ToLLintMiB))));
|
|
strcpybuff(strc->buff2, "MiB");
|
|
}
|
|
#endif
|
|
strc->buffadr[0] = strc->buff1;
|
|
strc->buffadr[1] = strc->buff2;
|
|
return strc->buffadr;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
#else
|
|
// ignore sigpipe?
|
|
int sig_ignore_flag(int setflag) { // flag ignore
|
|
static int flag = 0; /* YES, this one is true static */
|
|
|
|
if (setflag >= 0)
|
|
flag = setflag;
|
|
return flag;
|
|
}
|
|
#endif
|
|
|
|
// envoi de texte (en têtes généralement) sur la socket soc
|
|
int sendc(htsblk * r, const char *s) {
|
|
int n, ssz = (int) strlen(s);
|
|
|
|
#ifdef _WIN32
|
|
#else
|
|
sig_ignore_flag(1);
|
|
#endif
|
|
#if HDEBUG
|
|
write(0, s, ssz);
|
|
#endif
|
|
|
|
#if HTS_USEOPENSSL
|
|
if (r->ssl) {
|
|
n = SSL_write(r->ssl_con, s, ssz);
|
|
} else
|
|
#endif
|
|
n = send(r->soc, s, ssz, 0);
|
|
|
|
#ifdef _WIN32
|
|
#else
|
|
sig_ignore_flag(0);
|
|
#endif
|
|
|
|
return (n == ssz) ? n : -1;
|
|
}
|
|
|
|
// Remplace read
|
|
int finput(T_SOC fd, char *s, int max) {
|
|
char c;
|
|
int j = 0;
|
|
|
|
do {
|
|
//c=fgetc(fp);
|
|
if (read((int) fd, &c, 1) <= 0) {
|
|
c = 0;
|
|
}
|
|
if (c != 0) {
|
|
switch (c) {
|
|
case 10:
|
|
c = 0;
|
|
break;
|
|
case 13:
|
|
break; // sauter ces caractères
|
|
default:
|
|
s[j++] = c;
|
|
break;
|
|
}
|
|
}
|
|
} while((c != 0) && (j < max - 1));
|
|
s[j] = '\0';
|
|
return j;
|
|
}
|
|
|
|
// Like linput, but in memory (optimized)
|
|
int binput(char *buff, char *s, int max) {
|
|
int count = 0;
|
|
int destCount = 0;
|
|
|
|
// Note: \0 will return 1
|
|
while(destCount < max && buff != NULL && buff[count] != '\0'
|
|
&& buff[count] != '\n') {
|
|
if (buff[count] != '\r') {
|
|
s[destCount++] = buff[count];
|
|
}
|
|
count++;
|
|
}
|
|
s[destCount] = '\0';
|
|
|
|
// then return the supplemental jump offset
|
|
return count + 1;
|
|
}
|
|
|
|
// Lecture d'une ligne (peut être unicode à priori)
|
|
int linput(FILE * fp, char *s, int max) {
|
|
int c;
|
|
int j = 0;
|
|
|
|
do {
|
|
c = fgetc(fp);
|
|
if (c != EOF) {
|
|
switch (c) {
|
|
case 13:
|
|
break; // sauter CR
|
|
case 10:
|
|
c = -1;
|
|
break;
|
|
case 9:
|
|
case 12:
|
|
break; // sauter ces caractères
|
|
default:
|
|
s[j++] = (char) c;
|
|
break;
|
|
}
|
|
}
|
|
} while((c != -1) && (c != EOF) && (j < (max - 1)));
|
|
s[j] = '\0';
|
|
return j;
|
|
}
|
|
int linputsoc(T_SOC soc, char *s, int max) {
|
|
int c;
|
|
int j = 0;
|
|
|
|
do {
|
|
unsigned char ch;
|
|
|
|
if (recv(soc, &ch, 1, 0) == 1) {
|
|
c = ch;
|
|
} else {
|
|
c = EOF;
|
|
}
|
|
if (c != EOF) {
|
|
switch (c) {
|
|
case 13:
|
|
break; // sauter CR
|
|
case 10:
|
|
c = -1;
|
|
break;
|
|
case 9:
|
|
case 12:
|
|
break; // sauter ces caractères
|
|
default:
|
|
s[j++] = (char) c;
|
|
break;
|
|
}
|
|
}
|
|
} while((c != -1) && (c != EOF) && (j < (max - 1)));
|
|
s[j] = '\0';
|
|
return j;
|
|
}
|
|
int linputsoc_t(T_SOC soc, char *s, int max, int timeout) {
|
|
if (check_readinput_t(soc, timeout)) {
|
|
return linputsoc(soc, s, max);
|
|
}
|
|
return -1;
|
|
}
|
|
int linput_trim(FILE * fp, char *s, int max) {
|
|
int rlen = 0;
|
|
char *ls = (char *) malloct(max + 1);
|
|
|
|
s[0] = '\0';
|
|
if (ls) {
|
|
char *a;
|
|
|
|
// lire ligne
|
|
rlen = linput(fp, ls, max);
|
|
if (rlen) {
|
|
// sauter espaces et tabs en fin
|
|
while((rlen > 0)
|
|
&& ((ls[max(rlen - 1, 0)] == ' ')
|
|
|| (ls[max(rlen - 1, 0)] == '\t')))
|
|
ls[--rlen] = '\0';
|
|
// sauter espaces en début
|
|
a = ls;
|
|
while((rlen > 0) && ((*a == ' ') || (*a == '\t'))) {
|
|
a++;
|
|
rlen--;
|
|
}
|
|
if (rlen > 0) {
|
|
memcpy(s, a, rlen); // can copy \0 chars
|
|
s[rlen] = '\0';
|
|
}
|
|
}
|
|
//
|
|
freet(ls);
|
|
}
|
|
return rlen;
|
|
}
|
|
int linput_cpp(FILE * fp, char *s, int max) {
|
|
int rlen = 0;
|
|
|
|
s[0] = '\0';
|
|
do {
|
|
int ret;
|
|
|
|
if (rlen > 0)
|
|
if (s[rlen - 1] == '\\')
|
|
s[--rlen] = '\0'; // couper \ final
|
|
// lire ligne
|
|
ret = linput_trim(fp, s + rlen, max - rlen);
|
|
if (ret > 0)
|
|
rlen += ret;
|
|
} while((s[max(rlen - 1, 0)] == '\\') && (rlen < max));
|
|
return rlen;
|
|
}
|
|
|
|
// idem avec les car spéciaux
|
|
void rawlinput(FILE * fp, char *s, int max) {
|
|
int c;
|
|
int j = 0;
|
|
|
|
do {
|
|
c = fgetc(fp);
|
|
if (c != EOF) {
|
|
switch (c) {
|
|
case 13:
|
|
break; // sauter CR
|
|
case 10:
|
|
c = -1;
|
|
break;
|
|
default:
|
|
s[j++] = (char) c;
|
|
break;
|
|
}
|
|
}
|
|
} while((c != -1) && (c != EOF) && (j < (max - 1)));
|
|
s[j++] = '\0';
|
|
}
|
|
|
|
//cherche chaine, case insensitive
|
|
const char *strstrcase(const char *s, const char *o) {
|
|
while(*s && strfield(s, o) == 0)
|
|
s++;
|
|
if (*s == '\0')
|
|
return NULL;
|
|
return s;
|
|
}
|
|
|
|
// Unicode detector
|
|
// See http://www.unicode.org/unicode/reports/tr28/
|
|
// (sect Table 3.1B. Legal UTF-8 Byte Sequences)
|
|
typedef struct {
|
|
unsigned int pos;
|
|
unsigned char data[4];
|
|
} t_auto_seq;
|
|
|
|
// char between a and b
|
|
#define CHAR_BETWEEN(c, a, b) ( (c) >= 0x##a ) && ( (c) <= 0x##b )
|
|
// sequence start
|
|
#define SEQBEG ( inseq == 0 )
|
|
// in this block
|
|
#define BLK(n,a, b) ( (seq.pos >= n) && ((err = CHAR_BETWEEN(seq.data[n], a, b))) )
|
|
#define ELT(n,a) BLK(n,a,a)
|
|
// end
|
|
#define SEQEND ((ok = 1))
|
|
// sequence started, character will fail if error
|
|
#define IN_SEQ ( (inseq = 1) )
|
|
// decoding error
|
|
#define BAD_SEQ ( (ok == 0) && (inseq != 0) && (!err) )
|
|
// no sequence started
|
|
#define NO_SEQ ( inseq == 0 )
|
|
|
|
// is this block an UTF unicode textfile?
|
|
// 0 : no
|
|
// 1 : yes
|
|
// -1: don't know
|
|
int is_unicode_utf8(const char *buffer_, const size_t size) {
|
|
const unsigned char *buffer = (const unsigned char *) buffer_;
|
|
t_auto_seq seq;
|
|
size_t i;
|
|
int is_utf = -1;
|
|
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
|
|
seq.pos = 0;
|
|
for(i = 0; i < size; i++) {
|
|
unsigned int ok = 0;
|
|
unsigned int inseq = 0;
|
|
unsigned int err = 0;
|
|
|
|
seq.data[seq.pos] = buffer[i];
|
|
/**/ if (SEQBEG && BLK(0, 00, 7F) && IN_SEQ && SEQEND) {
|
|
} else if (SEQBEG && BLK(0, C2, DF) && IN_SEQ && BLK(1, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && ELT(0, E0) && IN_SEQ && BLK(1, A0, BF)
|
|
&& BLK(2, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && BLK(0, E1, EC) && IN_SEQ && BLK(1, 80, BF)
|
|
&& BLK(2, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && ELT(0, ED) && IN_SEQ && BLK(1, 80, 9F)
|
|
&& BLK(2, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && BLK(0, EE, EF) && IN_SEQ && BLK(1, 80, BF)
|
|
&& BLK(2, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && ELT(0, F0) && IN_SEQ && BLK(1, 90, BF)
|
|
&& BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && BLK(0, F1, F3) && IN_SEQ && BLK(1, 80, BF)
|
|
&& BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
|
|
} else if (SEQBEG && ELT(0, F4) && IN_SEQ && BLK(1, 80, 8F)
|
|
&& BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
|
|
} else if (NO_SEQ) { // bad, unknown
|
|
return 0;
|
|
}
|
|
/* */
|
|
|
|
/* Error */
|
|
if (BAD_SEQ) {
|
|
return 0;
|
|
}
|
|
|
|
/* unicode character */
|
|
if (seq.pos > 0)
|
|
is_utf = 1;
|
|
|
|
/* Next */
|
|
if (ok)
|
|
seq.pos = 0;
|
|
else
|
|
seq.pos++;
|
|
|
|
/* Internal error */
|
|
if (seq.pos >= 4)
|
|
return 0;
|
|
|
|
}
|
|
|
|
return is_utf;
|
|
}
|
|
|
|
void map_characters(unsigned char *buffer, unsigned int size, unsigned int *map) {
|
|
unsigned int i;
|
|
|
|
memset(map, 0, sizeof(unsigned int) * 256);
|
|
for(i = 0; i < size; i++) {
|
|
map[buffer[i]]++;
|
|
}
|
|
}
|
|
|
|
// le fichier est-il un fichier html?
|
|
// 0 : non
|
|
// 1 : oui
|
|
// -1 : on sait pas
|
|
// -2 : on sait pas, pas d'extension
|
|
int ishtml(httrackp * opt, const char *fil) {
|
|
/* User-defined MIME types (overrides ishtml()) */
|
|
char BIGSTK fil_noquery[HTS_URLMAXSIZE * 2];
|
|
char mime[256];
|
|
char *a;
|
|
|
|
strcpybuff(fil_noquery, fil);
|
|
if ((a = strchr(fil_noquery, '?')) != NULL) {
|
|
*a = '\0';
|
|
}
|
|
if (get_userhttptype(opt, mime, fil_noquery)) {
|
|
if (is_html_mime_type(mime)) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (!strnotempty(fil_noquery)) {
|
|
return -2;
|
|
}
|
|
|
|
/* Search for known ext */
|
|
for(a = fil_noquery + strlen(fil_noquery) - 1;
|
|
*a != '.' && *a != '/' && a > fil_noquery; a--) ;
|
|
if (*a == '.') { // a une extension
|
|
char BIGSTK fil_noquery[HTS_URLMAXSIZE * 2];
|
|
char *b;
|
|
int ret;
|
|
char *dotted = a;
|
|
|
|
fil_noquery[0] = '\0';
|
|
a++; // pointer sur extension
|
|
strncatbuff(fil_noquery, a, HTS_URLMAXSIZE);
|
|
b = strchr(fil_noquery, '?');
|
|
if (b)
|
|
*b = '\0';
|
|
ret = ishtml_ext(fil_noquery); // retour
|
|
if (ret == -1) {
|
|
switch (is_knowntype(opt, dotted)) {
|
|
case 1:
|
|
ret = 0; // connu, non html
|
|
break;
|
|
case 2:
|
|
ret = 1; // connu, html
|
|
break;
|
|
default:
|
|
ret = -1; // inconnu..
|
|
break;
|
|
}
|
|
}
|
|
return ret;
|
|
} else
|
|
return -2; // indéterminé, par exemple /truc
|
|
}
|
|
|
|
// idem, mais pour uniquement l'extension
|
|
int ishtml_ext(const char *a) {
|
|
int html = 0;
|
|
|
|
//
|
|
if (strfield2(a, "html"))
|
|
html = 1;
|
|
else if (strfield2(a, "htm"))
|
|
html = 1;
|
|
else if (strfield2(a, "shtml"))
|
|
html = 1;
|
|
else if (strfield2(a, "phtml"))
|
|
html = 1;
|
|
else if (strfield2(a, "htmlx"))
|
|
html = 1;
|
|
else if (strfield2(a, "shtm"))
|
|
html = 1;
|
|
else if (strfield2(a, "phtm"))
|
|
html = 1;
|
|
else if (strfield2(a, "htmx"))
|
|
html = 1;
|
|
//
|
|
// insuccès..
|
|
else {
|
|
#if 1
|
|
html = -1; // inconnu..
|
|
#else
|
|
// XXXXXX not suitable (ext)
|
|
switch (is_knownext(a)) {
|
|
case 1:
|
|
html = 0; // connu, non html
|
|
break;
|
|
case 2:
|
|
html = 1; // connu, html
|
|
break;
|
|
default:
|
|
html = -1; // inconnu..
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
return html;
|
|
}
|
|
|
|
// error (404,500..)
|
|
int ishttperror(int err) {
|
|
switch (err / 100) {
|
|
case 4:
|
|
case 5:
|
|
return 1;
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Declare a non-const version of FUN */
|
|
#define DECLARE_NON_CONST_VERSION(FUN) \
|
|
char *FUN(char *source) { \
|
|
const char *const ret = FUN ##_const(source); \
|
|
return ret != NULL ? source + ( ret - source ) : NULL; \
|
|
}
|
|
|
|
// retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant
|
|
// une identification
|
|
HTSEXT_API const char *jump_identification_const(const char *source) {
|
|
const char *a, *trytofind;
|
|
|
|
if (strcmp(source, "file://") == 0)
|
|
return source;
|
|
// rechercher dernier @ (car parfois email transmise dans adresse!)
|
|
// mais sauter ftp:// éventuel
|
|
a = jump_protocol_const(source);
|
|
trytofind = strrchr_limit(a, '@', strchr(a, '/'));
|
|
return trytofind != NULL ? trytofind : a;
|
|
}
|
|
|
|
HTSEXT_API DECLARE_NON_CONST_VERSION(jump_identification)
|
|
|
|
HTSEXT_API const char *jump_normalized_const(const char *source) {
|
|
if (strcmp(source, "file://") == 0)
|
|
return source;
|
|
source = jump_identification_const(source);
|
|
if (strfield(source, "www") && source[3] != '\0') {
|
|
if (source[3] == '.') { // www.foo.com -> foo.com
|
|
source += 4;
|
|
} else { // www-4.foo.com -> foo.com
|
|
const char *a = source + 3;
|
|
|
|
while(*a && (isdigit(*a) || *a == '-'))
|
|
a++;
|
|
if (*a == '.') {
|
|
source = a + 1;
|
|
}
|
|
}
|
|
}
|
|
return source;
|
|
}
|
|
|
|
HTSEXT_API DECLARE_NON_CONST_VERSION(jump_normalized)
|
|
|
|
static int sortNormFnc(const void *a_, const void *b_) {
|
|
const char *const*const a = (const char *const*) a_;
|
|
const char *const*const b = (const char *const*) b_;
|
|
|
|
return strcmp(*a + 1, *b + 1);
|
|
}
|
|
|
|
HTSEXT_API char *fil_normalized(const char *source, char *dest) {
|
|
char lastc = 0;
|
|
int gotquery = 0;
|
|
int ampargs = 0;
|
|
size_t i, j;
|
|
char *query = NULL;
|
|
|
|
for(i = j = 0; source[i] != '\0'; i++) {
|
|
if (!gotquery && source[i] == '?')
|
|
gotquery = ampargs = 1;
|
|
if ((!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar
|
|
) {
|
|
} else {
|
|
if (gotquery && source[i] == '&') {
|
|
ampargs++;
|
|
}
|
|
dest[j++] = source[i];
|
|
}
|
|
lastc = source[i];
|
|
}
|
|
dest[j++] = '\0';
|
|
|
|
/* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */
|
|
if (ampargs > 1) {
|
|
char **amps = malloct(ampargs * sizeof(char *));
|
|
char *copyBuff = NULL;
|
|
size_t qLen = 0;
|
|
|
|
assertf(amps != NULL);
|
|
gotquery = 0;
|
|
for(i = j = 0; dest[i] != '\0'; i++) {
|
|
if ((gotquery && dest[i] == '&') || (!gotquery && dest[i] == '?')) {
|
|
if (!gotquery) {
|
|
gotquery = 1;
|
|
query = &dest[i];
|
|
qLen = strlen(query);
|
|
}
|
|
assertf(j < ampargs);
|
|
amps[j++] = &dest[i];
|
|
dest[i] = '\0';
|
|
}
|
|
}
|
|
assertf(gotquery);
|
|
assertf(j == ampargs);
|
|
|
|
/* Sort 'em all */
|
|
qsort(amps, ampargs, sizeof(char *), sortNormFnc);
|
|
|
|
/* Replace query by sorted query */
|
|
copyBuff = malloct(qLen + 1);
|
|
assertf(copyBuff != NULL);
|
|
{
|
|
htsbuff cb = htsbuff_ptr(copyBuff, qLen + 1);
|
|
|
|
for (i = 0; i < ampargs; i++) {
|
|
htsbuff_cat(&cb, i == 0 ? "?" : "&");
|
|
htsbuff_cat(&cb, amps[i] + 1);
|
|
}
|
|
assertf(cb.len == qLen);
|
|
}
|
|
/* query points into dest where the original qLen-byte query was */
|
|
strlcpybuff(query, copyBuff, qLen + 1);
|
|
|
|
/* Cleanup */
|
|
freet(amps);
|
|
freet(copyBuff);
|
|
}
|
|
|
|
return dest;
|
|
}
|
|
|
|
#define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
|
|
HTSEXT_API char *adr_normalized_sized(const char *source, char *dest,
|
|
size_t destsize) {
|
|
/* not yet too aggressive (no com<->net<->org checkings) */
|
|
strlcpybuff(dest, jump_normalized_const(source), destsize);
|
|
return dest;
|
|
}
|
|
|
|
// deprecated variant; kept for ABI compatibility. Bounds to the implicit
|
|
// contract the old callers relied on (an HTS_URLMAXSIZE*2 URL buffer).
|
|
HTSEXT_API char *adr_normalized(const char *source, char *dest) {
|
|
return adr_normalized_sized(source, dest, HTS_URLMAXSIZE * 2);
|
|
}
|
|
|
|
#undef endwith
|
|
|
|
// find port (:80) or NULL if not found
|
|
// can handle IPV6 addresses
|
|
HTSEXT_API const char *jump_toport_const(const char *source) {
|
|
const char *a, *trytofind;
|
|
|
|
a = jump_identification_const(source);
|
|
trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html)
|
|
a = strchr((trytofind) ? trytofind : a, ':');
|
|
return a;
|
|
}
|
|
|
|
HTSEXT_API DECLARE_NON_CONST_VERSION(jump_toport)
|
|
|
|
// strrchr, but not too far
|
|
const char *strrchr_limit(const char *s, char c, const char *limit) {
|
|
if (limit == NULL) {
|
|
const char *p = strrchr(s, c);
|
|
|
|
return p ? (p + 1) : NULL;
|
|
} else {
|
|
const char *a = NULL, *p;
|
|
|
|
for(;;) {
|
|
p = strchr((a) ? a : s, c);
|
|
if ((p >= limit) || (p == NULL))
|
|
return a;
|
|
a = p + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// retourner adr sans ftp://
|
|
const char *jump_protocol_const(const char *source) {
|
|
int p;
|
|
|
|
// scheme
|
|
// "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
|
|
if ((p = strfield(source, "http:")))
|
|
source += p;
|
|
else if ((p = strfield(source, "ftp:")))
|
|
source += p;
|
|
else if ((p = strfield(source, "https:")))
|
|
source += p;
|
|
else if ((p = strfield(source, "file:")))
|
|
source += p;
|
|
// net_path
|
|
if (strncmp(source, "//", 2) == 0)
|
|
source += 2;
|
|
return source;
|
|
}
|
|
|
|
DECLARE_NON_CONST_VERSION(jump_protocol)
|
|
|
|
// codage base 64 a vers b
|
|
void code64(unsigned char *a, int size_a, unsigned char *b, int crlf) {
|
|
int i1 = 0, i2 = 0, i3 = 0, i4 = 0;
|
|
int loop = 0;
|
|
unsigned long int store;
|
|
int n;
|
|
const char _hts_base64[] =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
while(size_a-- > 0) {
|
|
// 24 bits
|
|
n = 1;
|
|
store = *a++;
|
|
if (size_a-- > 0) {
|
|
n = 2;
|
|
store <<= 8;
|
|
store |= *a++;
|
|
}
|
|
if (size_a-- > 0) {
|
|
n = 3;
|
|
store <<= 8;
|
|
store |= *a++;
|
|
}
|
|
if (n == 3) {
|
|
i4 = store & 63;
|
|
i3 = (store >> 6) & 63;
|
|
i2 = (store >> 12) & 63;
|
|
i1 = (store >> 18) & 63;
|
|
} else if (n == 2) {
|
|
store <<= 2;
|
|
i3 = store & 63;
|
|
i2 = (store >> 6) & 63;
|
|
i1 = (store >> 12) & 63;
|
|
} else {
|
|
store <<= 4;
|
|
i2 = store & 63;
|
|
i1 = (store >> 6) & 63;
|
|
}
|
|
|
|
*b++ = _hts_base64[i1];
|
|
*b++ = _hts_base64[i2];
|
|
if (n >= 2)
|
|
*b++ = _hts_base64[i3];
|
|
else
|
|
*b++ = '=';
|
|
if (n >= 3)
|
|
*b++ = _hts_base64[i4];
|
|
else
|
|
*b++ = '=';
|
|
|
|
if (crlf && ((loop += 3) % 60) == 0) {
|
|
*b++ = '\r';
|
|
*b++ = '\n';
|
|
}
|
|
}
|
|
*b++ = '\0';
|
|
}
|
|
|
|
// return the hex character value, or -1 on error.
|
|
static HTS_INLINE int ehexh(const char c) {
|
|
if (c >= '0' && c <= '9')
|
|
return c - '0';
|
|
else if (c >= 'a' && c <= 'f')
|
|
return (c - 'a' + 10);
|
|
else if (c >= 'A' && c <= 'F')
|
|
return (c - 'A' + 10);
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
// return the two-hex character value, or -1 on error.
|
|
static HTS_INLINE int ehex(const char *s) {
|
|
const int c1 = ehexh(s[0]);
|
|
if (c1 >= 0) {
|
|
const int c2 = ehexh(s[1]);
|
|
if (c2 >= 0) {
|
|
return 16*c1 + c2;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void unescape_amp(char *s) {
|
|
if (hts_unescapeEntities(s, s, strlen(s) + 1) != 0) {
|
|
assertf(! "error escaping html entities");
|
|
}
|
|
}
|
|
|
|
// remplacer %20 par ' ', etc..
|
|
// buffer MAX 1Ko
|
|
HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const char *const s) {
|
|
size_t i, j;
|
|
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
|
|
for(i = 0, j = 0; s[i] != '\0' && j + 1 < size ; i++) {
|
|
int h;
|
|
if (s[i] == '%' && (h = ehex(&s[i + 1])) >= 0) {
|
|
catbuff[j++] = (char) h;
|
|
i += 2;
|
|
}
|
|
else
|
|
catbuff[j++] = s[i];
|
|
}
|
|
catbuff[j++] = '\0';
|
|
return catbuff;
|
|
}
|
|
|
|
// unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI
|
|
// DOES NOT DECODE %25 (part of CHAR_DELIM)
|
|
// no_high & 1: decode high chars
|
|
// no_high & 2: decode space
|
|
HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size,
|
|
const char *s,
|
|
const hts_boolean no_high) {
|
|
size_t i, j;
|
|
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
|
|
for(i = 0, j = 0; s[i] != '\0' && j + 1 < size ; i++) {
|
|
if (s[i] == '%') {
|
|
const int nchar = ehex(&s[i + 1]);
|
|
|
|
const int test =
|
|
( CHAR_RESERVED(nchar) && nchar != '+' ) /* %2B => + (not in query!) */
|
|
|| CHAR_DELIM(nchar)
|
|
|| CHAR_UNWISE(nchar)
|
|
|| CHAR_LOW(nchar) /* CHAR_SPECIAL */
|
|
|| ( CHAR_XXAVOID(nchar) && ( nchar != ' ' || ( no_high & 2) == 0 ) )
|
|
|| ( ( no_high & 1 ) && CHAR_HIG(nchar) )
|
|
;
|
|
|
|
if (!test && nchar >= 0) { /* can safely unescape */
|
|
catbuff[j++] = (char) nchar;
|
|
i += 2;
|
|
} else {
|
|
catbuff[j++] = '%';
|
|
}
|
|
} else {
|
|
catbuff[j++] = s[i];
|
|
}
|
|
}
|
|
catbuff[j++] = '\0';
|
|
return catbuff;
|
|
}
|
|
|
|
// remplacer " par %xx etc..
|
|
// buffer MAX 1Ko
|
|
HTSEXT_API size_t escape_spc_url(const char *const src,
|
|
char *const dest, const size_t size) {
|
|
return x_escape_http(src, dest, size, 2);
|
|
}
|
|
|
|
// smith / john -> smith%20%2f%20john
|
|
HTSEXT_API size_t escape_in_url(const char *const src,
|
|
char *const dest, const size_t size) {
|
|
return x_escape_http(src, dest, size, 1);
|
|
}
|
|
|
|
// smith / john -> smith%20/%20john
|
|
HTSEXT_API size_t escape_uri(const char *const src,
|
|
char *const dest, const size_t size) {
|
|
return x_escape_http(src, dest, size, 3);
|
|
}
|
|
|
|
HTSEXT_API size_t escape_uri_utf(const char *const src,
|
|
char *const dest, const size_t size) {
|
|
return x_escape_http(src, dest, size, 30);
|
|
}
|
|
|
|
HTSEXT_API size_t escape_check_url(const char *const src,
|
|
char *const dest, const size_t size) {
|
|
return x_escape_http(src, dest, size, 0);
|
|
}
|
|
|
|
// same as escape_check_url, but returns char*
|
|
HTSEXT_API char *escape_check_url_addr(const char *const src,
|
|
char *const dest, const size_t size) {
|
|
escape_check_url(src, dest, size);
|
|
return dest;
|
|
}
|
|
|
|
// Same as above, but appending to "dest"
|
|
#undef DECLARE_APPEND_ESCAPE_VERSION
|
|
#define DECLARE_APPEND_ESCAPE_VERSION(NAME) \
|
|
HTSEXT_API size_t append_ ##NAME(const char *const src, char *const dest, const size_t size) { \
|
|
const size_t len = strnlen(dest, size); \
|
|
assertf(len < size); \
|
|
return NAME(src, dest + len, size - len); \
|
|
}
|
|
|
|
DECLARE_APPEND_ESCAPE_VERSION(escape_in_url)
|
|
DECLARE_APPEND_ESCAPE_VERSION(escape_spc_url)
|
|
DECLARE_APPEND_ESCAPE_VERSION(escape_uri_utf)
|
|
DECLARE_APPEND_ESCAPE_VERSION(escape_check_url)
|
|
DECLARE_APPEND_ESCAPE_VERSION(escape_uri)
|
|
|
|
#undef DECLARE_APPEND_ESCAPE_VERSION
|
|
|
|
// Same as above, but in-place
|
|
#undef DECLARE_INPLACE_ESCAPE_VERSION
|
|
#define DECLARE_INPLACE_ESCAPE_VERSION(NAME) \
|
|
HTSEXT_API size_t inplace_ ##NAME(char *const dest, const size_t size) { \
|
|
char buffer[256]; \
|
|
const size_t len = strnlen(dest, size); \
|
|
const int in_buffer = len + 1 < sizeof(buffer); \
|
|
char *src = in_buffer ? buffer : malloct(len + 1); \
|
|
size_t ret; \
|
|
assertf(src != NULL); \
|
|
assertf(len < size); \
|
|
memcpy(src, dest, len + 1); \
|
|
ret = NAME(src, dest, size); \
|
|
if (!in_buffer) { \
|
|
freet(src); \
|
|
} \
|
|
return ret; \
|
|
}
|
|
|
|
DECLARE_INPLACE_ESCAPE_VERSION(escape_in_url)
|
|
DECLARE_INPLACE_ESCAPE_VERSION(escape_spc_url)
|
|
DECLARE_INPLACE_ESCAPE_VERSION(escape_uri_utf)
|
|
DECLARE_INPLACE_ESCAPE_VERSION(escape_check_url)
|
|
DECLARE_INPLACE_ESCAPE_VERSION(escape_uri)
|
|
|
|
#undef DECLARE_INPLACE_ESCAPE_VERSION
|
|
|
|
|
|
HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil,
|
|
char *const dest, const size_t size) {
|
|
char *a;
|
|
size_t esc_size = escape_in_url(adr, dest, size);
|
|
esc_size += escape_in_url(fil, dest + esc_size, size - esc_size);
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
for(a = dest ; (a = strchr(a, '%')) != NULL ; a++) {
|
|
*a = 'X';
|
|
}
|
|
return esc_size;
|
|
}
|
|
|
|
// strip all control characters
|
|
HTSEXT_API void escape_remove_control(char *const s) {
|
|
size_t i, j;
|
|
for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
|
|
const unsigned char c = (unsigned char) s[i];
|
|
if (c >= 32) {
|
|
if (i != j) {
|
|
assertf(j < i);
|
|
s[j] = s[i];
|
|
}
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef ADD_CHAR
|
|
#define ADD_CHAR(C) do { \
|
|
assertf(j < size); \
|
|
if (j + 1 == size) { \
|
|
dest[j] = '\0'; \
|
|
return size; \
|
|
} \
|
|
dest[j++] = (C); \
|
|
} while(0)
|
|
|
|
/* Returns the number of characters written (not taking in account the terminating \0), or 'size' upon overflow. */
|
|
HTSEXT_API size_t x_escape_http(const char *const s, char *const dest,
|
|
const size_t size, const int mode) {
|
|
static const char hex[] = "0123456789abcdef";
|
|
size_t i, j;
|
|
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
|
|
// Out-of-bound.
|
|
// Previous character is supposed to be the terminating \0.
|
|
if (size == 0) {
|
|
return 0;
|
|
}
|
|
|
|
for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
|
|
const unsigned char c = (unsigned char) s[i];
|
|
int test = 0;
|
|
|
|
if (mode == 0)
|
|
test = c == '"' || c == ' ' || CHAR_SPECIAL(c);
|
|
else if (mode == 1)
|
|
test = CHAR_RESERVED(c)
|
|
|| CHAR_DELIM(c)
|
|
|| CHAR_UNWISE(c)
|
|
|| CHAR_SPECIAL(c)
|
|
|| CHAR_XXAVOID(c)
|
|
|| CHAR_MARK(c);
|
|
else if (mode == 2)
|
|
test = c == ' '; // n'escaper que espace
|
|
else if (mode == 3) // échapper que ce qui est nécessaire
|
|
test = CHAR_SPECIAL(c)
|
|
|| CHAR_XXAVOID(c);
|
|
else if (mode == 30) // échapper que ce qui est nécessaire
|
|
test = (c != '/' && CHAR_RESERVED(c))
|
|
|| CHAR_DELIM(c)
|
|
|| CHAR_UNWISE(c)
|
|
|| CHAR_SPECIAL(c)
|
|
|| CHAR_XXAVOID(c);
|
|
|
|
if (!test) {
|
|
ADD_CHAR(c);
|
|
} else {
|
|
ADD_CHAR('%');
|
|
ADD_CHAR(hex[c / 16]);
|
|
ADD_CHAR(hex[c % 16]);
|
|
}
|
|
}
|
|
|
|
assertf(j < size);
|
|
dest[j] = '\0';
|
|
return j;
|
|
}
|
|
|
|
HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest, const size_t size) {
|
|
size_t i, j;
|
|
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
|
|
for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
|
|
const unsigned char c = (unsigned char) s[i];
|
|
if (c == '&') {
|
|
ADD_CHAR('&');
|
|
ADD_CHAR('a');
|
|
ADD_CHAR('m');
|
|
ADD_CHAR('p');
|
|
ADD_CHAR(';');
|
|
} else {
|
|
ADD_CHAR(c);
|
|
}
|
|
}
|
|
assertf(j < size);
|
|
dest[j] = '\0';
|
|
return j;
|
|
}
|
|
|
|
HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const dest, const size_t size) {
|
|
static const char hex[] = "0123456789abcdef";
|
|
size_t i, j;
|
|
|
|
RUNTIME_TIME_CHECK_SIZE(size);
|
|
|
|
for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
|
|
const unsigned char c = (unsigned char) s[i];
|
|
if (c == '&') {
|
|
ADD_CHAR('&');
|
|
ADD_CHAR('a');
|
|
ADD_CHAR('m');
|
|
ADD_CHAR('p');
|
|
ADD_CHAR(';');
|
|
} else if (CHAR_HIG(c)) {
|
|
ADD_CHAR('&');
|
|
ADD_CHAR('#');
|
|
ADD_CHAR('x');
|
|
ADD_CHAR(hex[c / 16]);
|
|
ADD_CHAR(hex[c % 16]);
|
|
ADD_CHAR(';');
|
|
} else {
|
|
ADD_CHAR(c);
|
|
}
|
|
}
|
|
assertf(j < size);
|
|
dest[j] = '\0';
|
|
return j;
|
|
}
|
|
|
|
#undef ADD_CHAR
|
|
|
|
// lower-case conversion into caller buffer (capacity catbuffsize)
|
|
char *convtolower(char *catbuff, size_t catbuffsize, const char *a) {
|
|
strlcpybuff(catbuff, a, catbuffsize);
|
|
hts_lowcase(catbuff); // lower case
|
|
return catbuff;
|
|
}
|
|
|
|
// conversion en minuscules
|
|
void hts_lowcase(char *s) {
|
|
size_t i;
|
|
|
|
for(i = 0; s[i] != '\0'; i++)
|
|
if ((s[i] >= 'A') && (s[i] <= 'Z'))
|
|
s[i] += ('a' - 'A');
|
|
}
|
|
|
|
// remplacer un caractère d'une chaîne dans une autre
|
|
void hts_replace(char *s, char from, char to) {
|
|
char *a;
|
|
|
|
while((a = strchr(s, from)) != NULL) {
|
|
*a = to;
|
|
}
|
|
}
|
|
|
|
// guess a local file's mime type (e.g. fil="toto.gif" -> s="image/gif")
|
|
// returns 1 if a type was written to s, 0 otherwise
|
|
hts_boolean guess_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
|
const char *fil) {
|
|
return get_httptype_sized(opt, s, ssize, fil, 1);
|
|
}
|
|
|
|
// deprecated variant; kept for ABI compatibility. Bounds to the implicit
|
|
// contract the old callers relied on (a contenttype-sized buffer).
|
|
void guess_httptype(httrackp * opt, char *s, const char *fil) {
|
|
(void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, 1);
|
|
}
|
|
|
|
// write the mime type for fil into s (capacity ssize)
|
|
// flag: 1 to always return a type (the "application/..." / octet-stream
|
|
// fallback) returns 1 if a type was written to s, 0 otherwise
|
|
HTSEXT_API hts_boolean get_httptype_sized(httrackp *opt, char *s, size_t ssize,
|
|
const char *fil, hts_boolean flag) {
|
|
// userdef overrides get_httptype (a rule with an empty value, e.g. "--assume
|
|
// cgi=", matches but writes nothing: report it as "no type" like the old
|
|
// code, whose callers tested strnotempty(s))
|
|
if (get_userhttptype(opt, s, fil)) {
|
|
return s[0] != '\0';
|
|
}
|
|
// regular tests
|
|
if (ishtml(opt, fil) == 1) {
|
|
strlcpybuff(s, "text/html", ssize);
|
|
return 1;
|
|
} else {
|
|
/* Check html -> text/html */
|
|
const char *a = fil + strlen(fil) - 1;
|
|
|
|
while((*a != '.') && (*a != '/') && (a > fil))
|
|
a--;
|
|
if (*a == '.' && strlen(a) < 32) {
|
|
int j = 0;
|
|
|
|
a++;
|
|
while(strnotempty(hts_mime[j][1])) {
|
|
if (strfield2(hts_mime[j][1], a)) {
|
|
if (hts_mime[j][0][0] != '*') { // a match exists
|
|
strlcpybuff(s, hts_mime[j][0], ssize);
|
|
return 1;
|
|
}
|
|
}
|
|
j++;
|
|
}
|
|
|
|
if (flag) {
|
|
snprintf(s, ssize, "application/%s", a);
|
|
return 1;
|
|
}
|
|
} else {
|
|
if (flag) {
|
|
strlcpybuff(s, "application/octet-stream", ssize);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// deprecated variant; kept for ABI compatibility. Bounds to the implicit
|
|
// contract the old callers relied on (a contenttype-sized buffer).
|
|
HTSEXT_API void get_httptype(httrackp *opt, char *s, const char *fil,
|
|
int flag) {
|
|
(void) get_httptype_sized(opt, s, HTS_MIMETYPE_SIZE, fil, flag);
|
|
}
|
|
|
|
// get type of fil (php)
|
|
// s: buffer (text/html) or NULL
|
|
// return: 1 if known by user
|
|
int get_userhttptype(httrackp * opt, char *s, const char *fil) {
|
|
if (s != NULL) {
|
|
if (s)
|
|
s[0] = '\0';
|
|
if (fil == NULL || *fil == '\0')
|
|
return 0;
|
|
#if 1
|
|
if (StringLength(opt->mimedefs) > 0) {
|
|
|
|
/* Check --assume foooo/foo/bar.cgi=text/html, then foo/bar.cgi=text/html, then bar.cgi=text/html */
|
|
/* also: --assume baz,bar,foooo/foo/bar.cgi=text/html */
|
|
/* start from path beginning */
|
|
do {
|
|
const char *next;
|
|
const char *mimedefs = StringBuff(opt->mimedefs); /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */
|
|
|
|
while(*mimedefs != '\0') {
|
|
const char *segment = fil + 1;
|
|
|
|
if (*mimedefs == '\n') {
|
|
mimedefs++;
|
|
}
|
|
/* compare current segment with user's definition */
|
|
do {
|
|
int i;
|
|
|
|
/* check current item */
|
|
for(i = 0; mimedefs[i] != '\0' /* end of all defs */
|
|
&& mimedefs[i] != ' ' /* next item in left list */
|
|
&& mimedefs[i] != '=' /* end of left list */
|
|
&& mimedefs[i] != '\n' /* end of this def (?) */
|
|
&& mimedefs[i] == segment[i] /* same item */
|
|
; i++) ;
|
|
/* success */
|
|
if ((mimedefs[i] == '=' || mimedefs[i] == ' ')
|
|
&& segment[i] == '\0') {
|
|
int i2;
|
|
|
|
while(mimedefs[i] != 0 && mimedefs[i] != '\n'
|
|
&& mimedefs[i] != '=')
|
|
i++;
|
|
if (mimedefs[i] == '=') {
|
|
i++;
|
|
for(i2 = 0;
|
|
mimedefs[i + i2] != '\n' && mimedefs[i + i2] != '\0';
|
|
i2++) {
|
|
s[i2] = mimedefs[i + i2];
|
|
}
|
|
s[i2] = '\0';
|
|
return 1; /* SUCCESS! */
|
|
}
|
|
}
|
|
/* next item in list */
|
|
for(mimedefs += i;
|
|
*mimedefs != '\0' && *mimedefs != '\n' && *mimedefs != '='
|
|
&& *mimedefs != ' '; mimedefs++) ;
|
|
if (*mimedefs == ' ') {
|
|
mimedefs++;
|
|
}
|
|
} while(*mimedefs != '\0' && *mimedefs != '\n' && *mimedefs != '=');
|
|
/* next user-def */
|
|
for(; *mimedefs != '\0' && *mimedefs != '\n'; mimedefs++) ;
|
|
}
|
|
/* shorten segment */
|
|
next = strchr(fil + 1, '/');
|
|
if (next == NULL) {
|
|
/* ext tests */
|
|
next = strchr(fil + 1, '.');
|
|
}
|
|
fil = next;
|
|
} while(fil != NULL);
|
|
}
|
|
#else
|
|
if (*buffer) {
|
|
char BIGSTK search[1024];
|
|
char *detect;
|
|
|
|
sprintf(search, "\n%s=", ext); // php=text/html
|
|
detect = strstr(*buffer, search);
|
|
if (!detect) {
|
|
sprintf(search, "\n%s\n", ext); // php\ncgi=text/html
|
|
detect = strstr(*buffer, search);
|
|
}
|
|
if (detect) {
|
|
detect = strchr(detect, '=');
|
|
if (detect) {
|
|
detect++;
|
|
if (s) {
|
|
char *a;
|
|
|
|
a = strchr(detect, '\n');
|
|
if (a) {
|
|
strncatbuff(s, detect, (int) (a - detect));
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// give the file extension for a mime type (e.g. "image/gif" -> "gif")
|
|
// returns 1 if an extension was found (and written to s), 0 otherwise
|
|
int give_mimext(char *s, size_t ssize, const char *st) {
|
|
int ok = 0;
|
|
int j = 0;
|
|
|
|
st = hts_effective_mime(st); /* no declared type: derive an html ext */
|
|
s[0] = '\0';
|
|
while((!ok) && (strnotempty(hts_mime[j][1]))) {
|
|
if (strfield2(hts_mime[j][0], st)) {
|
|
if (hts_mime[j][1][0] != '*') { // a match exists
|
|
strlcpybuff(s, hts_mime[j][1], ssize);
|
|
ok = 1;
|
|
}
|
|
}
|
|
j++;
|
|
}
|
|
// wrap "x" mimetypes, such as:
|
|
// application/x-mp3
|
|
// or
|
|
// application/mp3
|
|
if (!ok) {
|
|
int p;
|
|
const char *a = NULL;
|
|
|
|
if ((p = strfield(st, "application/x-")))
|
|
a = st + p;
|
|
else if ((p = strfield(st, "application/")))
|
|
a = st + p;
|
|
if (a) {
|
|
if ((int) strlen(a) >= 1) {
|
|
if ((int) strlen(a) <= 4) {
|
|
strlcpybuff(s, a, ssize);
|
|
ok = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
// extension connue?..
|
|
// 0 : non
|
|
// 1 : oui
|
|
// 2 : html
|
|
HTSEXT_API int is_knowntype(httrackp * opt, const char *fil) {
|
|
char catbuff[CATBUFF_SIZE];
|
|
const char *ext;
|
|
int j = 0;
|
|
|
|
if (!fil)
|
|
return 0;
|
|
ext = get_ext(catbuff, sizeof(catbuff), fil);
|
|
while(strnotempty(hts_mime[j][1])) {
|
|
if (strfield2(hts_mime[j][1], ext)) {
|
|
if (is_html_mime_type(hts_mime[j][0]))
|
|
return 2;
|
|
else
|
|
return 1;
|
|
}
|
|
j++;
|
|
}
|
|
|
|
// Known by user?
|
|
return (is_userknowntype(opt, fil));
|
|
}
|
|
|
|
// known type?..
|
|
// 0 : no
|
|
// 1 : yes
|
|
// 2 : html
|
|
// setdefs : set mime buffer:
|
|
// file=(char*) "asp=text/html\nphp=text/html\n"
|
|
HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil) {
|
|
char BIGSTK mime[1024];
|
|
|
|
if (!fil)
|
|
return 0;
|
|
if (!strnotempty(fil))
|
|
return 0;
|
|
mime[0] = '\0';
|
|
get_userhttptype(opt, mime, fil);
|
|
if (!strnotempty(mime))
|
|
return 0;
|
|
else if (is_html_mime_type(mime))
|
|
return 2;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
// page dynamique?
|
|
// is_dyntype(get_ext("foo.asp"))
|
|
HTSEXT_API hts_boolean is_dyntype(const char *fil) {
|
|
int j = 0;
|
|
|
|
if (!fil)
|
|
return 0;
|
|
if (!strnotempty(fil))
|
|
return 0;
|
|
while(strnotempty(hts_ext_dynamic[j])) {
|
|
if (strfield2(hts_ext_dynamic[j], fil)) {
|
|
return 1;
|
|
}
|
|
j++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
|
|
// connaissent pas le type
|
|
hts_boolean may_unknown(httrackp *opt, const char *st) {
|
|
int j = 0;
|
|
|
|
// types média
|
|
if (may_be_hypertext_mime(opt, st, "")) {
|
|
return 1;
|
|
}
|
|
while(strnotempty(hts_mime_keep[j])) {
|
|
if (strfield2(hts_mime_keep[j], st)) { // trouvé
|
|
return 1;
|
|
}
|
|
j++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* returns 1 if the mime/filename seems to be bogus because of badly recognized multiple extension
|
|
; such as "application/x-wais-source" for "httrack-3.42-1.el5.src.rpm"
|
|
reported by Hippy Dave 08/2008 (3.43) */
|
|
int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) {
|
|
int j;
|
|
|
|
for(j = 0; strnotempty(hts_mime_bogus_multiple[j]); j++) {
|
|
if (strfield2(hts_mime_bogus_multiple[j], mime)) { /* found mime type in suspicious list */
|
|
char ext[64];
|
|
|
|
if (give_mimext(ext, sizeof(ext),
|
|
mime)) { /* we have an extension for that */
|
|
const size_t ext_size = strlen(ext);
|
|
const char *file = strrchr(filename, '/'); /* fetch terminal filename */
|
|
|
|
if (file != NULL) {
|
|
int i;
|
|
|
|
for(i = 0; file[i] != 0; i++) {
|
|
if (i > 0 && file[i - 1] == '.'
|
|
&& strncasecmp(&file[i], ext, ext_size) == 0
|
|
&& (file[i + ext_size] == 0 || file[i + ext_size] == '.'
|
|
|| file[i + ext_size] == '?')) {
|
|
return 1; /* is ambiguous */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* filename extension should not be changed because potentially bogus ; replaces may_unknown() (3.43) */
|
|
int may_unknown2(httrackp * opt, const char *mime, const char *filename) {
|
|
int ret = may_unknown(opt, mime);
|
|
|
|
if (ret == 0) {
|
|
ret = may_bogus_multiple(opt, mime, filename);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// -- Utils fichiers
|
|
|
|
// pretty print for i/o
|
|
void fprintfio(FILE * fp, const char *buff, const char *prefix) {
|
|
char nl = 1;
|
|
|
|
while(*buff) {
|
|
switch (*buff) {
|
|
case 13:
|
|
break;
|
|
case 10:
|
|
fprintf(fp, "\r\n");
|
|
nl = 1;
|
|
break;
|
|
default:
|
|
if (nl)
|
|
fprintf(fp, "%s", prefix);
|
|
nl = 0;
|
|
fputc(*buff, fp);
|
|
}
|
|
buff++;
|
|
}
|
|
}
|
|
|
|
/* Le fichier existe-t-il? (ou est-il accessible?) */
|
|
/* Note: NOT utf-8 */
|
|
/* Note: preserve errno */
|
|
int fexist(const char *s) {
|
|
char catbuff[CATBUFF_SIZE];
|
|
const int err = errno;
|
|
struct stat st;
|
|
|
|
memset(&st, 0, sizeof(st));
|
|
if (stat(fconv(catbuff, sizeof(catbuff), s), &st) == 0) {
|
|
if (S_ISREG(st.st_mode)) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
errno = err;
|
|
return 0;
|
|
}
|
|
|
|
/* Le fichier existe-t-il? (ou est-il accessible?) */
|
|
/* Note: utf-8 */
|
|
/* Note: preserve errno */
|
|
int fexist_utf8(const char *s) {
|
|
char catbuff[CATBUFF_SIZE];
|
|
const int err = errno;
|
|
STRUCT_STAT st;
|
|
|
|
memset(&st, 0, sizeof(st));
|
|
if (STAT(fconv(catbuff, sizeof(catbuff), s), &st) == 0) {
|
|
if (S_ISREG(st.st_mode)) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
errno = err;
|
|
return 0;
|
|
}
|
|
|
|
/* Taille d'un fichier, -1 si n'existe pas */
|
|
/* Note: NOT utf-8 */
|
|
off_t fsize(const char *s) {
|
|
struct stat st;
|
|
|
|
if (!strnotempty(s)) // nom vide: erreur
|
|
return -1;
|
|
if (stat(s, &st) == 0 && S_ISREG(st.st_mode)) {
|
|
return st.st_size;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Taille d'un fichier, -1 si n'existe pas */
|
|
/* Note: utf-8 */
|
|
off_t fsize_utf8(const char *s) {
|
|
STRUCT_STAT st;
|
|
|
|
if (!strnotempty(s)) // nom vide: erreur
|
|
return -1;
|
|
if (STAT(s, &st) == 0 && S_ISREG(st.st_mode)) {
|
|
return st.st_size;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
off_t fpsize(FILE * fp) {
|
|
off_t oldpos, size;
|
|
|
|
if (!fp)
|
|
return -1;
|
|
#ifdef HTS_FSEEKO
|
|
oldpos = ftello(fp);
|
|
#else
|
|
oldpos = ftell(fp);
|
|
#endif
|
|
fseek(fp, 0, SEEK_END);
|
|
#ifdef HTS_FSEEKO
|
|
size = ftello(fp);
|
|
fseeko(fp, oldpos, SEEK_SET);
|
|
#else
|
|
size = ftell(fp);
|
|
fseek(fp, oldpos, SEEK_SET);
|
|
#endif
|
|
return size;
|
|
}
|
|
|
|
/* root dir, with ending / */
|
|
typedef struct {
|
|
char path[1024 + 4];
|
|
int init;
|
|
} hts_rootdir_strc;
|
|
HTSEXT_API const char *hts_rootdir(char *file) {
|
|
static hts_rootdir_strc strc = { "", 0 };
|
|
if (file) {
|
|
if (!strc.init) {
|
|
strc.path[0] = '\0';
|
|
strc.init = 1;
|
|
if (strnotempty(file)) {
|
|
const size_t file_len = strlen(file);
|
|
char *a;
|
|
|
|
assertf(file_len < sizeof(strc.path));
|
|
strcpybuff(strc.path, file);
|
|
while((a = strrchr(strc.path, '\\')))
|
|
*a = '/';
|
|
if ((a = strrchr(strc.path, '/'))) {
|
|
*(a + 1) = '\0';
|
|
} else
|
|
strc.path[0] = '\0';
|
|
}
|
|
if (!strnotempty(strc.path)) {
|
|
if (getcwd(strc.path, sizeof(strc.path)) == NULL)
|
|
strc.path[0] = '\0';
|
|
else
|
|
strcatbuff(strc.path, "/");
|
|
}
|
|
}
|
|
return NULL;
|
|
} else if (strc.init)
|
|
return strc.path;
|
|
else
|
|
return "";
|
|
}
|
|
|
|
HTSEXT_API hts_stat_struct HTS_STAT;
|
|
|
|
//
|
|
// return number of downloadable bytes, depending on rate limiter
|
|
// see engine_stats() routine, too
|
|
// this routine works quite well for big files and regular ones, but apparently the rate limiter has
|
|
// some problems with very small files (rate too high)
|
|
LLint check_downloadable_bytes(int rate) {
|
|
if (rate > 0) {
|
|
TStamp time_now;
|
|
TStamp elapsed_useconds;
|
|
LLint bytes_transferred_during_period;
|
|
LLint left;
|
|
|
|
// get the older timer
|
|
int id_timer = (HTS_STAT.istat_idlasttimer + 1) % 2;
|
|
|
|
time_now = mtime_local();
|
|
elapsed_useconds = time_now - HTS_STAT.istat_timestart[id_timer];
|
|
// NO totally stupid - elapsed_useconds+=1000; // for the next second, too
|
|
bytes_transferred_during_period =
|
|
(HTS_STAT.HTS_TOTAL_RECV - HTS_STAT.istat_bytes[id_timer]);
|
|
|
|
left = ((rate * elapsed_useconds) / 1000) - bytes_transferred_during_period;
|
|
if (left <= 0)
|
|
left = 0;
|
|
|
|
return left;
|
|
} else
|
|
return TAILLE_BUFFER;
|
|
}
|
|
|
|
//
|
|
// 0 : OK
|
|
// 1 : slow down
|
|
#if 0
|
|
int HTS_TOTAL_RECV_CHECK(int var) {
|
|
if (HTS_STAT.HTS_TOTAL_RECV_STATE)
|
|
return 1;
|
|
/*
|
|
{
|
|
if (HTS_STAT.HTS_TOTAL_RECV_STATE==3) {
|
|
var = min(var,32);
|
|
Sleep(250);
|
|
} else if (HTS_STAT.HTS_TOTAL_RECV_STATE==2) {
|
|
var = min(var,256);
|
|
Sleep(100);
|
|
} else {
|
|
var/=2;
|
|
if (var<=0) var=1;
|
|
Sleep(50);
|
|
}
|
|
}
|
|
*/
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
// Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk)
|
|
// returns:
|
|
// >0 : data received
|
|
// == 0 : not yet data
|
|
// <0: error or no data: READ_ERROR, READ_EOF or READ_TIMEOUT
|
|
int hts_read(htsblk * r, char *buff, int size) {
|
|
int retour;
|
|
|
|
// return read(soc,buff,size);
|
|
if (r->is_file) {
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("read(%p, %d, %d)\n" _(void *)buff _(int) size _(int) r->fp);
|
|
#endif
|
|
if (r->fp) {
|
|
retour = (int) fread(buff, 1, size, r->fp);
|
|
if (retour == 0) // can happen with directories (!)
|
|
retour = READ_ERROR;
|
|
} else
|
|
retour = READ_ERROR;
|
|
} else {
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("recv(%d, %p, %d)\n" _(int) r->soc _(void *)buff _(int) size);
|
|
|
|
if (r->soc == INVALID_SOCKET)
|
|
printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n");
|
|
#endif
|
|
//HTS_TOTAL_RECV_CHECK(size); // Diminuer au besoin si trop de données reçues
|
|
#if HTS_USEOPENSSL
|
|
if (r->ssl) {
|
|
retour = SSL_read(r->ssl_con, buff, size);
|
|
if (retour <= 0) {
|
|
int err_code = SSL_get_error(r->ssl_con, retour);
|
|
|
|
if ((err_code == SSL_ERROR_WANT_READ)
|
|
|| (err_code == SSL_ERROR_WANT_WRITE)
|
|
) {
|
|
retour = 0; /* no data yet (ssl cache) */
|
|
} else if (err_code == SSL_ERROR_ZERO_RETURN) {
|
|
retour = READ_EOF; /* completed */
|
|
} else {
|
|
retour = READ_ERROR; /* eof or error */
|
|
}
|
|
}
|
|
} else {
|
|
#endif
|
|
retour = recv(r->soc, buff, size, 0);
|
|
if (retour == 0) {
|
|
retour = READ_EOF;
|
|
} else if (retour < 0) {
|
|
retour = READ_ERROR;
|
|
}
|
|
}
|
|
if (retour > 0) // compter flux entrant
|
|
HTS_STAT.HTS_TOTAL_RECV += retour;
|
|
#if HTS_USEOPENSSL
|
|
}
|
|
#endif
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("recv/read done (%d bytes)\n" _(int) retour);
|
|
#endif
|
|
return retour;
|
|
}
|
|
|
|
// -- Gestion cache DNS --
|
|
// 'RX98
|
|
|
|
// Free a DNS cache record (coucal value handler).
|
|
static void hts_cache_value_free(coucal_opaque arg, coucal_value value) {
|
|
void *record = value.ptr;
|
|
|
|
(void) arg;
|
|
freet(record);
|
|
}
|
|
|
|
// opt's DNS cache hashtable, created on first use. Records (t_dnscache*) are
|
|
// owned by the table and freed by hts_cache_value_free on coucal_delete.
|
|
coucal hts_cache(httrackp *opt) {
|
|
assertf(opt != NULL);
|
|
if (opt->state.dns_cache == NULL) {
|
|
coucal cache = coucal_new(0);
|
|
|
|
coucal_set_name(cache, "dns_cache");
|
|
coucal_value_set_value_handler(cache, hts_cache_value_free, NULL);
|
|
opt->state.dns_cache = cache;
|
|
}
|
|
assertf(opt->state.dns_cache != NULL);
|
|
return opt->state.dns_cache;
|
|
}
|
|
|
|
// MUST BE LOCKED (coucal is not internally serialized vs FTP/web threads)
|
|
// Look up iadr in the DNS cache, filling out[0..min(count,max)-1].
|
|
// Returns: -1 not yet tested; 0 negative-cached (not in DNS); >0 address count.
|
|
static int hts_ghbn_all(coucal cache, const char *const iadr,
|
|
SOCaddr *const out, const int max) {
|
|
void *ptr;
|
|
|
|
assertf(out != NULL);
|
|
assertf(iadr != NULL);
|
|
if (*iadr == '\0') {
|
|
return -1;
|
|
}
|
|
if (coucal_read_pvoid(cache, iadr, &ptr)) { // ok trouvé
|
|
const t_dnscache *const record = (const t_dnscache *) ptr;
|
|
int i;
|
|
|
|
assertf(record->host_count <= HTS_MAXADDRNUM);
|
|
for (i = 0; i < record->host_count && i < max; i++) {
|
|
assertf(record->host_length[i] <= sizeof(record->host_addr[i]));
|
|
SOCaddr_copyaddr2(out[i], record->host_addr[i], record->host_length[i]);
|
|
}
|
|
return record->host_count;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
#if HTS_INET6 != 0
|
|
/* Active resolver backend; defaults to the libc resolver. The self-test
|
|
reroutes it to script DNS answers in-process (see
|
|
hts_dns_set_resolver_backend). */
|
|
static const hts_resolver_backend hts_resolver_libc = {getaddrinfo,
|
|
freeaddrinfo};
|
|
static const hts_resolver_backend *hts_resolver = &hts_resolver_libc;
|
|
|
|
void hts_dns_set_resolver_backend(const hts_resolver_backend *backend) {
|
|
hts_resolver = (backend != NULL) ? backend : &hts_resolver_libc;
|
|
}
|
|
|
|
/* Debug/test hook: HTTRACK_DEBUG_RESOLVE="host:ip[,ip...]" pins the resolution
|
|
of `host` to the listed addresses (curl --resolve style), so the connect
|
|
fallback can be exercised deterministically (a dead address first, a live one
|
|
next). Any other host resolves normally. Below: an addrinfo backend that owns
|
|
its chain (its own freeaddrinfo), so a synthesized and a delegated result
|
|
free the same way. */
|
|
|
|
/* Deep-copy a libc addrinfo chain into our own allocations. */
|
|
static struct addrinfo *resolver_dup_chain(const struct addrinfo *src) {
|
|
struct addrinfo *head = NULL, *tail = NULL;
|
|
|
|
for (; src != NULL; src = src->ai_next) {
|
|
struct addrinfo *const ai = calloct(1, sizeof(*ai));
|
|
|
|
ai->ai_family = src->ai_family;
|
|
ai->ai_socktype = src->ai_socktype;
|
|
ai->ai_protocol = src->ai_protocol;
|
|
ai->ai_addrlen = src->ai_addrlen;
|
|
ai->ai_addr = malloct(src->ai_addrlen);
|
|
memcpy(ai->ai_addr, src->ai_addr, src->ai_addrlen);
|
|
if (head == NULL)
|
|
head = ai;
|
|
else
|
|
tail->ai_next = ai;
|
|
tail = ai;
|
|
}
|
|
return head;
|
|
}
|
|
|
|
/* Build one addrinfo node from an IPv4/IPv6 literal, or NULL if it does not
|
|
parse or is filtered out by want_family (AF_INET/AF_INET6/PF_UNSPEC). */
|
|
static struct addrinfo *resolver_make_ai(const char *ip, int want_family) {
|
|
struct addrinfo *ai;
|
|
|
|
if (strchr(ip, ':') != NULL) { // IPv6 literal
|
|
struct sockaddr_in6 sa6;
|
|
|
|
if (want_family != PF_UNSPEC && want_family != AF_INET6)
|
|
return NULL;
|
|
memset(&sa6, 0, sizeof(sa6));
|
|
if (inet_pton(AF_INET6, ip, &sa6.sin6_addr) != 1)
|
|
return NULL;
|
|
sa6.sin6_family = AF_INET6;
|
|
ai = calloct(1, sizeof(*ai));
|
|
ai->ai_family = AF_INET6;
|
|
ai->ai_addrlen = sizeof(sa6);
|
|
ai->ai_addr = malloct(sizeof(sa6));
|
|
memcpy(ai->ai_addr, &sa6, sizeof(sa6));
|
|
} else { // IPv4 literal
|
|
struct sockaddr_in sa;
|
|
|
|
if (want_family != PF_UNSPEC && want_family != AF_INET)
|
|
return NULL;
|
|
memset(&sa, 0, sizeof(sa));
|
|
if (inet_pton(AF_INET, ip, &sa.sin_addr) != 1)
|
|
return NULL;
|
|
sa.sin_family = AF_INET;
|
|
ai = calloct(1, sizeof(*ai));
|
|
ai->ai_family = AF_INET;
|
|
ai->ai_addrlen = sizeof(sa);
|
|
ai->ai_addr = malloct(sizeof(sa));
|
|
memcpy(ai->ai_addr, &sa, sizeof(sa));
|
|
}
|
|
return ai;
|
|
}
|
|
|
|
static void override_freeaddrinfo(struct addrinfo *res) {
|
|
while (res != NULL) {
|
|
struct addrinfo *const next = res->ai_next;
|
|
|
|
freet(res->ai_addr);
|
|
freet(res);
|
|
res = next;
|
|
}
|
|
}
|
|
|
|
static int override_getaddrinfo(const char *node, const char *service,
|
|
const struct addrinfo *hints,
|
|
struct addrinfo **res) {
|
|
const char *const spec = getenv("HTTRACK_DEBUG_RESOLVE");
|
|
const int want = (hints != NULL) ? hints->ai_family : PF_UNSPEC;
|
|
const char *colon;
|
|
|
|
*res = NULL;
|
|
if (spec != NULL && node != NULL && (colon = strchr(spec, ':')) != NULL &&
|
|
(size_t) (colon - spec) == strlen(node) &&
|
|
strncmp(spec, node, colon - spec) == 0) {
|
|
struct addrinfo *head = NULL, *tail = NULL;
|
|
char buf[256];
|
|
char *p;
|
|
|
|
buf[0] = '\0';
|
|
strncatbuff(buf, colon + 1, sizeof(buf) - 1);
|
|
for (p = strtok(buf, ","); p != NULL; p = strtok(NULL, ",")) {
|
|
struct addrinfo *const ai = resolver_make_ai(p, want);
|
|
|
|
if (ai != NULL) {
|
|
if (head == NULL)
|
|
head = ai;
|
|
else
|
|
tail->ai_next = ai;
|
|
tail = ai;
|
|
}
|
|
}
|
|
if (head == NULL)
|
|
return EAI_NONAME;
|
|
*res = head;
|
|
return 0;
|
|
}
|
|
|
|
/* not overridden: delegate to libc, copying into our owned format */
|
|
{
|
|
struct addrinfo *sys = NULL;
|
|
int gerr = getaddrinfo(node, service, hints, &sys);
|
|
|
|
if (gerr != 0)
|
|
return gerr;
|
|
*res = resolver_dup_chain(sys);
|
|
freeaddrinfo(sys);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static const hts_resolver_backend hts_resolver_override = {
|
|
override_getaddrinfo, override_freeaddrinfo};
|
|
|
|
/* Install the env override once, unless a backend was already set (self-test).
|
|
*/
|
|
static void hts_resolver_check_env(void) {
|
|
static int checked = 0;
|
|
|
|
if (!checked) {
|
|
checked = 1;
|
|
if (hts_resolver == &hts_resolver_libc &&
|
|
getenv("HTTRACK_DEBUG_RESOLVE") != NULL) {
|
|
hts_resolver = &hts_resolver_override;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Resolve hostname into up to max addresses (resolver/RFC 6724 order), no
|
|
// cache. Returns the count copied into out[0..count-1]; 0 = does not resolve.
|
|
static int hts_dns_resolve_nocache_list_(const char *const hostname,
|
|
SOCaddr *const out, const int max,
|
|
const char **error) {
|
|
int count = 0;
|
|
|
|
#if HTS_INET6==0
|
|
/* IPv4 resolver */
|
|
struct hostent *const hp = gethostbyname(hostname);
|
|
|
|
if (hp != NULL) {
|
|
char **h;
|
|
|
|
for (h = hp->h_addr_list; count < max && h != NULL && *h != NULL; h++) {
|
|
SOCaddr_clear(out[count]);
|
|
SOCaddr_copyaddr2(out[count], *h, hp->h_length);
|
|
if (SOCaddr_is_valid(out[count]))
|
|
count++;
|
|
}
|
|
}
|
|
#else
|
|
/* IPv6 resolver */
|
|
struct addrinfo *res = NULL, *cur;
|
|
struct addrinfo hints;
|
|
int gerr;
|
|
|
|
hts_resolver_check_env();
|
|
memset(&hints, 0, sizeof(hints));
|
|
if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
|
|
hints.ai_family = PF_INET;
|
|
else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
|
|
hints.ai_family = PF_INET6;
|
|
else // V4 + V6
|
|
hints.ai_family = PF_UNSPEC;
|
|
hints.ai_socktype = SOCK_STREAM;
|
|
hints.ai_protocol = IPPROTO_TCP;
|
|
if ((gerr = hts_resolver->getaddrinfo(hostname, NULL, &hints, &res)) == 0) {
|
|
for (cur = res; cur != NULL && count < max; cur = cur->ai_next) {
|
|
if (cur->ai_addr != NULL && cur->ai_addrlen != 0) {
|
|
SOCaddr_clear(out[count]);
|
|
SOCaddr_copyaddr2(out[count], cur->ai_addr, cur->ai_addrlen);
|
|
if (SOCaddr_is_valid(out[count]))
|
|
count++;
|
|
}
|
|
}
|
|
} else if (error != NULL) {
|
|
*error = gai_strerror(gerr);
|
|
}
|
|
if (res) {
|
|
hts_resolver->freeaddrinfo(res);
|
|
}
|
|
#endif
|
|
|
|
return count;
|
|
}
|
|
|
|
// Strip [] around a literal IPv6 ([3ffe:b80:1234:1::1]) the resolver won't
|
|
// take, then resolve into a list. Returns the count.
|
|
static int hts_dns_resolve_nocache_list(const char *const hostname,
|
|
SOCaddr *const out, const int max,
|
|
const char **error) {
|
|
if (!strnotempty(hostname) || max <= 0) {
|
|
return 0;
|
|
}
|
|
if ((hostname[0] == '[') && (hostname[strlen(hostname) - 1] == ']')) {
|
|
size_t size = strlen(hostname);
|
|
char *copy = malloct(size + 1);
|
|
int count;
|
|
|
|
assertf(copy != NULL);
|
|
copy[0] = '\0';
|
|
strncat(copy, hostname + 1, size - 2);
|
|
count = hts_dns_resolve_nocache_list_(copy, out, max, error);
|
|
freet(copy);
|
|
return count;
|
|
} else {
|
|
return hts_dns_resolve_nocache_list_(hostname, out, max, error);
|
|
}
|
|
}
|
|
|
|
HTSEXT_API SOCaddr *hts_dns_resolve_nocache2(const char *const hostname,
|
|
SOCaddr *const addr,
|
|
const char **error) {
|
|
SOCaddr_clear(*addr);
|
|
if (hts_dns_resolve_nocache_list(hostname, addr, 1, error) > 0) {
|
|
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
HTSEXT_API SOCaddr* hts_dns_resolve_nocache(const char *const hostname, SOCaddr *const addr) {
|
|
return hts_dns_resolve_nocache2(hostname, addr, NULL);
|
|
}
|
|
|
|
HTSEXT_API int check_hostname_dns(const char *const hostname) {
|
|
SOCaddr buffer;
|
|
return hts_dns_resolve_nocache(hostname, &buffer) != NULL;
|
|
}
|
|
|
|
// Needs locking
|
|
// Internal DNS cache. Fill out[0..count-1] with up to max addresses for _iadr,
|
|
// resolving (and caching the full list) on a miss. Returns the count.
|
|
static int hts_dns_resolve_list_(httrackp *opt, const char *_iadr,
|
|
SOCaddr *const out, const int max,
|
|
const char **error) {
|
|
char BIGSTK iadr[HTS_URLMAXSIZE * 2];
|
|
coucal cache = hts_cache(opt); // le cache dns
|
|
int count;
|
|
|
|
assertf(opt != NULL);
|
|
assertf(_iadr != NULL);
|
|
assertf(out != NULL);
|
|
|
|
strcpybuff(iadr, jump_identification_const(_iadr));
|
|
// couper éventuel :
|
|
{
|
|
char *a;
|
|
|
|
if ((a = jump_toport(iadr)))
|
|
*a = '\0';
|
|
}
|
|
|
|
/* get IP from the dns cache */
|
|
count = hts_ghbn_all(cache, iadr, out, max);
|
|
if (count >= 0) { // cache hit (0 == negative-cached)
|
|
return count;
|
|
} else { // non présent dans le cache dns, tester
|
|
SOCaddr resolved[HTS_MAXADDRNUM];
|
|
t_dnscache *record;
|
|
int i;
|
|
|
|
#if DEBUGDNS
|
|
printf("resolving (not cached) %s\n", iadr);
|
|
#endif
|
|
|
|
count = hts_dns_resolve_nocache_list(iadr, resolved, HTS_MAXADDRNUM, error);
|
|
|
|
#if HTS_WIDE_DEBUG
|
|
DEBUG_W("gethostbyname done\n");
|
|
#endif
|
|
|
|
/* attempt to store new entry (coucal owns it and dups the host key) */
|
|
record = malloct(sizeof(t_dnscache));
|
|
if (record != NULL) {
|
|
memset(record, 0, sizeof(*record));
|
|
record->host_count = count;
|
|
for (i = 0; i < count; i++) {
|
|
record->host_length[i] = SOCaddr_size(resolved[i]);
|
|
assertf(record->host_length[i] <= sizeof(record->host_addr[i]));
|
|
memcpy(record->host_addr[i], &SOCaddr_sockaddr(resolved[i]),
|
|
record->host_length[i]);
|
|
}
|
|
coucal_add_pvoid(cache, iadr, record);
|
|
}
|
|
|
|
/* copy result to caller (cache store may have failed; result still valid)
|
|
*/
|
|
for (i = 0; i < count && i < max; i++) {
|
|
SOCaddr_copy_SOCaddr(out[i], resolved[i]);
|
|
}
|
|
return count;
|
|
} // retour hp du cache
|
|
}
|
|
|
|
int hts_dns_resolve_all(httrackp *opt, const char *iadr, SOCaddr *out, int max,
|
|
const char **error) {
|
|
int count;
|
|
|
|
if (!strnotempty(iadr) || max <= 0) {
|
|
return 0;
|
|
}
|
|
hts_mutexlock(&opt->state.lock);
|
|
count = hts_dns_resolve_list_(opt, iadr, out, max, error);
|
|
hts_mutexrelease(&opt->state.lock);
|
|
return count;
|
|
}
|
|
|
|
SOCaddr *hts_dns_resolve2(httrackp *opt, const char *_iadr, SOCaddr *const addr,
|
|
const char **error) {
|
|
SOCaddr_clear(*addr);
|
|
if (hts_dns_resolve_all(opt, _iadr, addr, 1, error) > 0) {
|
|
return SOCaddr_is_valid(*addr) ? addr : NULL;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
SOCaddr* hts_dns_resolve(httrackp * opt, const char *_iadr, SOCaddr *const addr) {
|
|
return hts_dns_resolve2(opt, _iadr, addr, NULL);
|
|
}
|
|
|
|
// --- Tracage des mallocs() ---
|
|
#ifdef HTS_TRACE_MALLOC
|
|
//#define htsLocker(A, N) htsLocker(A, N)
|
|
#define htsLocker(A, N) do {} while(0)
|
|
static mlink trmalloc = { NULL, 0, 0, NULL };
|
|
|
|
static int trmalloc_id = 0;
|
|
static htsmutex *mallocMutex = NULL;
|
|
static void hts_meminit(void) {
|
|
//if (mallocMutex == NULL) {
|
|
// mallocMutex = calloc(sizeof(*mallocMutex), 1);
|
|
// htsLocker(mallocMutex, -999);
|
|
//}
|
|
}
|
|
void *hts_malloc(size_t len) {
|
|
void *adr;
|
|
|
|
hts_meminit();
|
|
htsLocker(mallocMutex, 1);
|
|
assertf(len > 0);
|
|
adr = hts_xmalloc(len, 0);
|
|
htsLocker(mallocMutex, 0);
|
|
return adr;
|
|
}
|
|
void *hts_calloc(size_t len, size_t len2) {
|
|
void *adr;
|
|
|
|
hts_meminit();
|
|
assertf(len > 0);
|
|
assertf(len2 > 0);
|
|
htsLocker(mallocMutex, 1);
|
|
adr = hts_xmalloc(len, len2);
|
|
htsLocker(mallocMutex, 0);
|
|
memset(adr, 0, len * len2);
|
|
return adr;
|
|
}
|
|
void *hts_strdup(char *str) {
|
|
size_t size = str ? strlen(str) : 0;
|
|
char *adr = (char *) hts_malloc(size + 1);
|
|
|
|
assertf(adr != NULL);
|
|
strcpy(adr, str ? str : "");
|
|
return adr;
|
|
}
|
|
void *hts_xmalloc(size_t len, size_t len2) {
|
|
mlink *lnk = (mlink *) calloc(1, sizeof(mlink));
|
|
|
|
assertf(lnk != NULL);
|
|
assertf(len > 0);
|
|
assertf(len2 >= 0);
|
|
if (lnk) {
|
|
void *r = NULL;
|
|
int size, bsize = sizeof(t_htsboundary);
|
|
|
|
if (len2)
|
|
size = len * len2;
|
|
else
|
|
size = len;
|
|
size += ((bsize - (size % bsize)) % bsize); /* check alignement */
|
|
r = malloc(size + bsize * 2);
|
|
assertf(r != NULL);
|
|
if (r) {
|
|
*((t_htsboundary *) ((char *) r))
|
|
= *((t_htsboundary *) ((char *) r + size + bsize))
|
|
= htsboundary;
|
|
((char *) r) += bsize; /* boundary */
|
|
lnk->adr = r;
|
|
lnk->len = size;
|
|
lnk->id = trmalloc_id++;
|
|
lnk->next = trmalloc.next;
|
|
trmalloc.next = lnk;
|
|
return r;
|
|
} else {
|
|
free(lnk);
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
void hts_free(void *adr) {
|
|
mlink *lnk = &trmalloc;
|
|
int bsize = sizeof(t_htsboundary);
|
|
|
|
assertf(adr != NULL);
|
|
if (!adr) {
|
|
return;
|
|
}
|
|
htsLocker(mallocMutex, 1);
|
|
while(lnk->next != NULL) {
|
|
if (lnk->next->adr == adr) {
|
|
mlink *blk_free = lnk->next;
|
|
|
|
assertf(blk_free->id != -1);
|
|
assertf(*((t_htsboundary *) ((char *) adr - bsize)) == htsboundary);
|
|
assertf(*((t_htsboundary *) ((char *) adr + blk_free->len)) ==
|
|
htsboundary);
|
|
lnk->next = lnk->next->next;
|
|
free((void *) blk_free);
|
|
//blk_free->id=-1;
|
|
free((char *) adr - bsize);
|
|
htsLocker(mallocMutex, 0);
|
|
return;
|
|
}
|
|
lnk = lnk->next;
|
|
assertf(lnk->next != NULL);
|
|
}
|
|
free(adr);
|
|
htsLocker(mallocMutex, 0);
|
|
}
|
|
void *hts_realloc(void *adr, size_t len) {
|
|
int bsize = sizeof(t_htsboundary);
|
|
|
|
len += ((bsize - (len % bsize)) % bsize); /* check alignement */
|
|
if (adr != NULL) {
|
|
mlink *lnk = &trmalloc;
|
|
|
|
htsLocker(mallocMutex, 1);
|
|
while(lnk->next != NULL) {
|
|
if (lnk->next->adr == adr) {
|
|
{
|
|
mlink *blk_free = lnk->next;
|
|
|
|
assertf(blk_free->id != -1);
|
|
assertf(*((t_htsboundary *) ((char *) adr - bsize)) == htsboundary);
|
|
assertf(*((t_htsboundary *) ((char *) adr + blk_free->len)) ==
|
|
htsboundary);
|
|
}
|
|
adr = realloc((char *) adr - bsize, len + bsize * 2);
|
|
assertf(adr != NULL);
|
|
lnk->next->adr = (char *) adr + bsize;
|
|
lnk->next->len = len;
|
|
*((t_htsboundary *) ((char *) adr))
|
|
= *((t_htsboundary *) ((char *) adr + len + bsize))
|
|
= htsboundary;
|
|
htsLocker(mallocMutex, 0);
|
|
return (char *) adr + bsize;
|
|
}
|
|
lnk = lnk->next;
|
|
assertf(lnk->next != NULL);
|
|
}
|
|
htsLocker(mallocMutex, 0);
|
|
}
|
|
return hts_malloc(len);
|
|
}
|
|
mlink *hts_find(char *adr) {
|
|
char *stkframe = (char *) &stkframe;
|
|
mlink *lnk = &trmalloc;
|
|
int bsize = sizeof(t_htsboundary);
|
|
|
|
assertf(adr != NULL);
|
|
if (!adr) {
|
|
return NULL;
|
|
}
|
|
htsLocker(mallocMutex, 1);
|
|
while(lnk->next != NULL) {
|
|
if (adr >= lnk->next->adr && adr <= lnk->next->adr + lnk->next->len) { /* found */
|
|
htsLocker(mallocMutex, 0);
|
|
return lnk->next;
|
|
}
|
|
lnk = lnk->next;
|
|
}
|
|
htsLocker(mallocMutex, 0);
|
|
{
|
|
int depl = (int) (adr - stkframe);
|
|
|
|
if (depl < 0)
|
|
depl = -depl;
|
|
//assertf(depl < 512000); /* near the stack frame.. doesn't look like malloc but stack variable */
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// check the malloct() and calloct() trace stack
|
|
void hts_freeall(void) {
|
|
int bsize = sizeof(t_htsboundary);
|
|
|
|
while(trmalloc.next) {
|
|
#if MEMDEBUG
|
|
printf("* block %d\t not released: at %d\t (%d\t bytes)\n",
|
|
trmalloc.next->id, trmalloc.next->adr, trmalloc.next->len);
|
|
#endif
|
|
if (trmalloc.next->id != -1) {
|
|
free((char *) trmalloc.next->adr - bsize);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// -- divers //
|
|
|
|
// cut path and project name
|
|
// patch also initial path
|
|
void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
|
|
size_t pname_size) {
|
|
path[0] = pname[0] = '\0';
|
|
if (strnotempty(fullpath)) {
|
|
if ((fullpath[strlen(fullpath) - 1] == '/')
|
|
|| (fullpath[strlen(fullpath) - 1] == '\\'))
|
|
fullpath[strlen(fullpath) - 1] = '\0';
|
|
if (strlen(fullpath) > 1) {
|
|
char *a;
|
|
|
|
while((a = strchr(fullpath, '\\')))
|
|
*a = '/'; // remplacer par /
|
|
a = fullpath + strlen(fullpath) - 2;
|
|
while((*a != '/') && (a > fullpath))
|
|
a--;
|
|
if (*a == '/')
|
|
a++;
|
|
strlcpybuff(pname, a, pname_size);
|
|
strlncatbuff(path, fullpath, path_size, (size_t) (a - fullpath));
|
|
}
|
|
}
|
|
}
|
|
|
|
// -- Gestion protocole ftp --
|
|
|
|
#ifdef _WIN32
|
|
int ftp_available(void) {
|
|
return 1;
|
|
}
|
|
#else
|
|
int ftp_available(void) {
|
|
return 1; // ok!
|
|
//return 0; // SOUS UNIX, PROBLEMESs
|
|
}
|
|
#endif
|
|
|
|
static void hts_debug_log_print(const char *format, ...);
|
|
|
|
static int hts_dgb_init = 0;
|
|
static FILE *hts_dgb_init_fp = NULL;
|
|
HTSEXT_API void hts_debug(int level) {
|
|
hts_dgb_init = level;
|
|
if (hts_dgb_init > 0) {
|
|
hts_debug_log_print("hts_debug() called");
|
|
}
|
|
}
|
|
|
|
static FILE *hts_dgb_(void) {
|
|
if (hts_dgb_init_fp == NULL) {
|
|
if ((hts_dgb_init & 0x80) == 0) {
|
|
hts_dgb_init_fp = stderr;
|
|
} else {
|
|
hts_dgb_init_fp = FOPEN("hts-debug.txt", "wb");
|
|
if (hts_dgb_init_fp != NULL) {
|
|
fprintf(hts_dgb_init_fp, "* Creating file\r\n");
|
|
}
|
|
}
|
|
}
|
|
return hts_dgb_init_fp;
|
|
}
|
|
|
|
static void hts_debug_log_print(const char *format, ...) {
|
|
if (hts_dgb_init > 0) {
|
|
const int error = errno;
|
|
FILE *const fp = hts_dgb_();
|
|
va_list args;
|
|
|
|
assertf(format != NULL);
|
|
va_start(args, format);
|
|
(void) vfprintf(fp, format, args);
|
|
va_end(args);
|
|
fputs("\n", fp);
|
|
fflush(fp);
|
|
errno = error;
|
|
}
|
|
}
|
|
|
|
HTSEXT_API const char* hts_version(void) {
|
|
return HTTRACK_VERSIONID;
|
|
}
|
|
|
|
static int ssl_vulnerable(const char *version) {
|
|
#ifdef _WIN32
|
|
static const char *const match = "OpenSSL 1.0.1";
|
|
const size_t match_len = strlen(match);
|
|
if (version != NULL && strncmp(version, match, match_len) == 0) {
|
|
// CVE-2014-0160
|
|
// "OpenSSL 1.0.1g 7 Apr 2014"
|
|
const char minor = version[match_len];
|
|
return minor == ' ' || ( minor >= 'a' && minor <= 'f' );
|
|
}
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/* user abort callback */
|
|
htsErrorCallback htsCallbackErr = NULL;
|
|
|
|
HTSEXT_API void hts_set_error_callback(htsErrorCallback handler) {
|
|
htsCallbackErr = handler;
|
|
}
|
|
|
|
HTSEXT_API htsErrorCallback hts_get_error_callback(void) {
|
|
return htsCallbackErr;
|
|
}
|
|
|
|
static void default_coucal_asserthandler(void *arg, const char* exp, const char* file, int line) {
|
|
abortf_(exp, file, line);
|
|
}
|
|
|
|
static int get_loglevel_from_coucal(coucal_loglevel level) {
|
|
switch(level) {
|
|
case coucal_log_critical:
|
|
return LOG_PANIC;
|
|
break;
|
|
case coucal_log_warning:
|
|
return LOG_WARNING;
|
|
break;
|
|
case coucal_log_info:
|
|
return LOG_INFO;
|
|
break;
|
|
case coucal_log_debug:
|
|
return LOG_DEBUG;
|
|
break;
|
|
case coucal_log_trace:
|
|
return LOG_TRACE;
|
|
break;
|
|
default:
|
|
return LOG_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* log to default console */
|
|
static void default_coucal_loghandler(void *arg, coucal_loglevel level,
|
|
const char* format, va_list args) {
|
|
|
|
/* informational chatter (hashtable stats on delete, etc.) only when
|
|
debugging; keep warnings and critical errors always visible. */
|
|
if (level > coucal_log_warning && hts_dgb_init <= 0) {
|
|
return;
|
|
}
|
|
if (level <= coucal_log_warning) {
|
|
fprintf(stderr, "** warning: ");
|
|
}
|
|
vfprintf(stderr, format, args);
|
|
fprintf(stderr, "\n");
|
|
}
|
|
|
|
/* log to project log */
|
|
static void htsopt_coucal_loghandler(void *arg, coucal_loglevel level,
|
|
const char* format, va_list args) {
|
|
httrackp *const opt = (httrackp*) arg;
|
|
if (opt != NULL && opt->log != NULL) {
|
|
hts_log_vprint(opt, get_loglevel_from_coucal(level),
|
|
format, args);
|
|
} else {
|
|
default_coucal_loghandler(NULL, level, format, args);
|
|
}
|
|
}
|
|
|
|
/* attach hashtable logger to project log */
|
|
void hts_set_hash_handler(coucal hashtable, httrackp *opt) {
|
|
/* Init hashtable default assertion handler. */
|
|
coucal_set_assert_handler(hashtable,
|
|
htsopt_coucal_loghandler,
|
|
default_coucal_asserthandler,
|
|
opt);
|
|
}
|
|
|
|
static int hts_init_ok = 0;
|
|
HTSEXT_API int hts_init(void) {
|
|
const char *dbg_env;
|
|
|
|
/* */
|
|
if (hts_init_ok)
|
|
return 1;
|
|
hts_init_ok = 1;
|
|
|
|
/* enable debugging ? */
|
|
dbg_env = getenv("HTS_LOG");
|
|
if (dbg_env != NULL && *dbg_env != 0) {
|
|
int level = 0;
|
|
|
|
if (sscanf(dbg_env, "%d", &level) == 1) {
|
|
hts_debug(level);
|
|
}
|
|
}
|
|
|
|
hts_debug_log_print("entering hts_init()"); /* debug */
|
|
|
|
/* Init hashtable default assertion handler. */
|
|
coucal_set_global_assert_handler(default_coucal_loghandler,
|
|
default_coucal_asserthandler);
|
|
|
|
/* Init threads (lazy init) */
|
|
htsthread_init();
|
|
|
|
/* Ensure external modules are loaded */
|
|
hts_debug_log_print("calling htspe_init()"); /* debug */
|
|
htspe_init(); /* module load (lazy) */
|
|
|
|
/* MD5 Auto-test */
|
|
{
|
|
char digest[32 + 2];
|
|
const char *atest = "MD5 Checksum Autotest";
|
|
|
|
digest[0] = '\0';
|
|
domd5mem(atest, strlen(atest), digest, 1); /* a42ec44369da07ace5ec1d660ba4a69a */
|
|
if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) {
|
|
int fatal_broken_md5 = 0;
|
|
|
|
assertf(fatal_broken_md5);
|
|
}
|
|
}
|
|
|
|
hts_debug_log_print("initializing SSL"); /* debug */
|
|
#if HTS_USEOPENSSL
|
|
/*
|
|
Initialize the OpensSSL library
|
|
*/
|
|
if (!openssl_ctx) {
|
|
const char *version;
|
|
const SSL_METHOD *method;
|
|
|
|
/* OpenSSL >= 1.1.0 / LibreSSL >= 2.7.0 auto-init and provide the generic
|
|
methods. The legacy init and SSLv23/SSLeay calls (deprecated since 1.1.0,
|
|
likely gone in 4.0) are kept only for older OpenSSL. */
|
|
#if OPENSSL_VERSION_NUMBER < 0x10100000L \
|
|
|| (defined(LIBRESSL_VERSION_NUMBER) && LIBRESSL_VERSION_NUMBER < 0x2070000fL)
|
|
SSL_load_error_strings();
|
|
SSL_library_init();
|
|
version = SSLeay_version(SSLEAY_VERSION);
|
|
method = SSLv23_client_method();
|
|
#else
|
|
version = OpenSSL_version(OPENSSL_VERSION);
|
|
method = TLS_client_method();
|
|
#endif
|
|
|
|
// Check CVE-2014-0160.
|
|
if (ssl_vulnerable(version)) {
|
|
fprintf(stderr, "OpenSSL version == '%s'\n", version);
|
|
abortLog("unable to initialize TLS: OpenSSL version seems vulnerable to heartbleed bug (CVE-2014-0160)");
|
|
assertf("OpenSSL version seems vulnerable to heartbleed bug (CVE-2014-0160)" == NULL);
|
|
}
|
|
|
|
// OpenSSL_add_all_algorithms();
|
|
openssl_ctx = SSL_CTX_new(method);
|
|
if (!openssl_ctx) {
|
|
fprintf(stderr, "fatal: unable to initialize TLS: SSL_CTX_new()\n");
|
|
abortLog("unable to initialize TLS: SSL_CTX_new()");
|
|
assertf("unable to initialize TLS" == NULL);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
hts_debug_log_print("ending hts_init()"); /* debug */
|
|
return 1;
|
|
}
|
|
|
|
/* will not free thread env. */
|
|
HTSEXT_API int hts_uninit(void) {
|
|
/* hts_init() is a lazy initializer, with limited a allocation (one or two mutexes) ;
|
|
we won't free anything here as the .h semantic was never being very clear */
|
|
return 1;
|
|
}
|
|
|
|
HTSEXT_API int hts_uninit_module(void) {
|
|
if (!hts_init_ok)
|
|
return 1;
|
|
htsthread_uninit();
|
|
htspe_uninit();
|
|
hts_init_ok = 0;
|
|
return 1;
|
|
}
|
|
|
|
// legacy. do not use
|
|
HTSEXT_API hts_boolean hts_log(httrackp *opt, const char *prefix,
|
|
const char *msg) {
|
|
if (opt->log != NULL) {
|
|
fspc(opt, opt->log, prefix);
|
|
fprintf(opt->log, "%s" LF, msg);
|
|
return 0;
|
|
}
|
|
return 1; /* Error */
|
|
}
|
|
|
|
static void (*hts_log_print_callback)(httrackp * opt, int type, const char *format, va_list args) = NULL;
|
|
|
|
HTSEXT_API void hts_set_log_vprint_callback(void (*callback)(httrackp * opt,
|
|
int type, const char *format, va_list args)) {
|
|
hts_log_print_callback = callback;
|
|
}
|
|
|
|
HTSEXT_API void hts_log_vprint(httrackp * opt, int type, const char *format, va_list args) {
|
|
assertf(format != NULL);
|
|
if (hts_log_print_callback != NULL) {
|
|
va_list args_copy;
|
|
va_copy(args_copy, args);
|
|
hts_log_print_callback(opt, type, format, args);
|
|
va_end(args_copy);
|
|
}
|
|
if (opt != NULL && opt->log != NULL) {
|
|
const int save_errno = errno;
|
|
const char *s_type = "unknown";
|
|
const int level = type & 0xff;
|
|
|
|
// Check log level
|
|
if (opt->debug < level) {
|
|
return;
|
|
}
|
|
|
|
switch (level) {
|
|
case LOG_TRACE:
|
|
s_type = "trace";
|
|
break;
|
|
case LOG_DEBUG:
|
|
s_type = "debug";
|
|
break;
|
|
case LOG_INFO:
|
|
s_type = "info";
|
|
break;
|
|
case LOG_NOTICE:
|
|
case LOG_WARNING:
|
|
s_type = "warning";
|
|
break;
|
|
case LOG_ERROR:
|
|
s_type = "error";
|
|
break;
|
|
case LOG_PANIC:
|
|
s_type = "panic";
|
|
break;
|
|
}
|
|
fspc(opt, opt->log, s_type);
|
|
(void) vfprintf(opt->log, format, args);
|
|
if ((type & LOG_ERRNO) != 0) {
|
|
fprintf(opt->log, ": %s", strerror(save_errno));
|
|
}
|
|
fputs(LF, opt->log);
|
|
if (opt->flush) {
|
|
fflush(opt->log);
|
|
}
|
|
errno = save_errno;
|
|
}
|
|
}
|
|
|
|
HTSEXT_API void hts_log_print(httrackp * opt, int type, const char *format, ...) {
|
|
va_list args;
|
|
assertf(format != NULL);
|
|
va_start(args, format);
|
|
hts_log_vprint(opt, type, format, args);
|
|
va_end(args);
|
|
}
|
|
|
|
HTSEXT_API void set_wrappers(httrackp * opt) { // LEGACY
|
|
}
|
|
|
|
HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
|
|
const char *argv) {
|
|
void *handle = openFunctionLib(moduleName);
|
|
|
|
if (handle != NULL) {
|
|
t_hts_plug plug = (t_hts_plug) getFunctionPtr(handle, "hts_plug");
|
|
t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(handle, "hts_unplug");
|
|
|
|
if (plug != NULL) {
|
|
int ret = plug(opt, argv);
|
|
|
|
if (hts_dgb_init > 0 && opt->log != NULL) {
|
|
hts_debug_log_print("plugged module '%s' (return code=%d)", moduleName,
|
|
ret);
|
|
}
|
|
if (ret == 1) { /* Success! */
|
|
opt->libHandles.handles =
|
|
(htslibhandle *) realloct(opt->libHandles.handles,
|
|
(opt->libHandles.count +
|
|
1) * sizeof(htslibhandle));
|
|
opt->libHandles.handles[opt->libHandles.count].handle = handle;
|
|
opt->libHandles.handles[opt->libHandles.count].moduleName =
|
|
strdupt(moduleName);
|
|
opt->libHandles.count++;
|
|
return 1;
|
|
} else {
|
|
hts_debug_log_print
|
|
("* note: error while running entry point 'hts_plug' in %s",
|
|
moduleName);
|
|
if (unplug)
|
|
unplug(opt);
|
|
}
|
|
} else {
|
|
int last_errno = errno;
|
|
|
|
hts_debug_log_print("* note: can't find entry point 'hts_plug' in %s: %s",
|
|
moduleName, strerror(last_errno));
|
|
}
|
|
closeFunctionLib(handle);
|
|
return 0;
|
|
} else {
|
|
int last_errno = errno;
|
|
|
|
hts_debug_log_print("* note: can't load %s: %s", moduleName,
|
|
strerror(last_errno));
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static void unplug_wrappers(httrackp * opt) {
|
|
if (opt->libHandles.handles != NULL) {
|
|
int i;
|
|
|
|
for(i = 0; i < opt->libHandles.count; i++) {
|
|
if (opt->libHandles.handles[i].handle != NULL) {
|
|
/* hts_unplug(), the dll exit point (finalizer) */
|
|
t_hts_unplug unplug =
|
|
(t_hts_unplug) getFunctionPtr(opt->libHandles.handles[i].handle,
|
|
"hts_unplug");
|
|
if (unplug != NULL)
|
|
unplug(opt);
|
|
closeFunctionLib(opt->libHandles.handles[i].handle);
|
|
opt->libHandles.handles[i].handle = NULL;
|
|
}
|
|
if (opt->libHandles.handles[i].moduleName != NULL) {
|
|
freet(opt->libHandles.handles[i].moduleName);
|
|
opt->libHandles.handles[i].moduleName = NULL;
|
|
}
|
|
}
|
|
freet(opt->libHandles.handles);
|
|
opt->libHandles.handles = NULL;
|
|
opt->libHandles.count = 0;
|
|
}
|
|
}
|
|
|
|
int multipleStringMatch(const char *s, const char *match) {
|
|
int ret = 0;
|
|
String name = STRING_EMPTY;
|
|
|
|
if (match == NULL || s == NULL || *s == 0)
|
|
return 0;
|
|
for(; *match != 0; match++) {
|
|
StringClear(name);
|
|
for(; *match != 0 && *match != '\n'; match++) {
|
|
StringAddchar(name, *match);
|
|
}
|
|
if (StringLength(name) > 0 && strstr(s, StringBuff(name)) != NULL) {
|
|
ret = 1;
|
|
break;
|
|
}
|
|
}
|
|
StringFree(name);
|
|
return ret;
|
|
}
|
|
|
|
HTSEXT_API httrackp *hts_create_opt(void) {
|
|
#if ( defined(_WIN32) || defined(__ANDROID__) )
|
|
static const char *defaultModules[] = {
|
|
"htsswf", "htsjava", "httrack-plugin", NULL
|
|
};
|
|
#else
|
|
static const char *defaultModules[] = {
|
|
"libhtsswf.so.1", "libhtsjava.so.2", "httrack-plugin", NULL
|
|
};
|
|
#endif
|
|
httrackp *opt = malloc(sizeof(httrackp));
|
|
|
|
/* default options */
|
|
memset(opt, 0, sizeof(httrackp));
|
|
opt->size_httrackp = sizeof(httrackp);
|
|
|
|
/* mutexes */
|
|
hts_mutexinit(&opt->state.lock);
|
|
|
|
/* custom wrappers */
|
|
opt->libHandles.count = 0;
|
|
|
|
/* default settings */
|
|
|
|
opt->wizard = HTS_WIZARD_AUTO; // wizard automatique
|
|
opt->quiet = HTS_FALSE;
|
|
//
|
|
opt->travel = HTS_TRAVEL_SAME_ADDRESS; // même adresse
|
|
opt->depth = 9999; // mirror total par défaut
|
|
opt->extdepth = 0; // mais pas à l'extérieur
|
|
opt->seeker = HTS_SEEKER_DOWN; // down
|
|
opt->urlmode = HTS_URLMODE_RELATIVE; // relatif par défaut
|
|
opt->no_type_change = HTS_FALSE;
|
|
opt->debug = LOG_NOTICE; // small log
|
|
opt->getmode = HTS_GETMODE_HTML | HTS_GETMODE_NONHTML;
|
|
opt->maxsite = -1; // taille max site (aucune)
|
|
opt->maxfile_nonhtml = -1; // taille max fichier non html
|
|
opt->maxfile_html = -1; // idem pour html
|
|
opt->maxsoc = 4; // nbre socket max
|
|
opt->fragment = -1; // pas de fragmentation
|
|
opt->nearlink = HTS_FALSE;
|
|
opt->makeindex = HTS_TRUE;
|
|
opt->kindex = HTS_FALSE;
|
|
opt->delete_old = HTS_TRUE;
|
|
opt->background_on_suspend = HTS_TRUE;
|
|
opt->makestat = HTS_FALSE;
|
|
opt->maketrack = HTS_FALSE;
|
|
opt->timeout = 120; // timeout par défaut (2 minutes)
|
|
opt->cache = HTS_CACHE_PRIORITY; // cache prioritaire
|
|
opt->shell = HTS_FALSE;
|
|
opt->proxy.active = 0; // pas de proxy
|
|
opt->user_agent_send = HTS_TRUE;
|
|
StringCopy(opt->user_agent,
|
|
"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
|
|
StringCopy(opt->referer, "");
|
|
StringCopy(opt->from, "");
|
|
opt->savename_83 = HTS_SAVENAME_83_LONG; // long names by default
|
|
opt->savename_type = 0; // avec structure originale
|
|
opt->savename_delayed =
|
|
HTS_SAVENAME_DELAYED_HARD; // always delay the type check (default)
|
|
opt->delayed_cached = HTS_TRUE;
|
|
opt->mimehtml = HTS_FALSE;
|
|
opt->parsejava = HTSPARSE_DEFAULT; // parser classes
|
|
opt->hostcontrol = 0; // PAS de control host pour timeout et traffic jammer
|
|
opt->retry = 2; // 2 retry par défaut
|
|
opt->errpage = HTS_TRUE;
|
|
// d'erreur (404 etc.)
|
|
opt->check_type = HTS_TRUE;
|
|
// considéré comme html
|
|
opt->all_in_cache = HTS_FALSE;
|
|
opt->robots = HTS_ROBOTS_ALWAYS; // traiter les robots.txt
|
|
opt->external = HTS_FALSE;
|
|
opt->passprivacy = HTS_FALSE;
|
|
opt->includequery = HTS_TRUE;
|
|
opt->mirror_first_page = HTS_FALSE;
|
|
opt->accept_cookie = HTS_TRUE;
|
|
opt->cookie = NULL;
|
|
opt->http10 = HTS_FALSE;
|
|
opt->nokeepalive = HTS_FALSE;
|
|
opt->nocompression = HTS_FALSE;
|
|
opt->tolerant = HTS_FALSE;
|
|
opt->parseall = HTS_TRUE;
|
|
opt->parsedebug = HTS_FALSE;
|
|
opt->norecatch = HTS_FALSE;
|
|
opt->verbosedisplay = HTS_VERBOSE_NONE; // no text animation
|
|
opt->sizehack = HTS_FALSE;
|
|
opt->urlhack = HTS_TRUE;
|
|
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
|
|
opt->ftp_proxy = HTS_TRUE;
|
|
opt->convert_utf8 = HTS_TRUE;
|
|
StringCopy(opt->filelist, "");
|
|
StringCopy(opt->lang_iso, "en, *");
|
|
StringCopy(opt->accept,
|
|
"text/html,image/png,image/jpeg,image/pjpeg,image/x-xbitmap,image/svg+xml,image/gif;q=0.9,*/*;q=0.1");
|
|
StringCopy(opt->headers, "");
|
|
StringCopy(opt->mimedefs, "\n"); // aucun filtre mime (\n IMPORTANT)
|
|
StringClear(opt->mod_blacklist);
|
|
//
|
|
opt->log = stdout;
|
|
opt->errlog = stderr;
|
|
opt->flush = HTS_TRUE;
|
|
// opt->aff_progress=0;
|
|
opt->keyboard = HTS_FALSE;
|
|
//
|
|
StringCopy(opt->path_html, "");
|
|
StringCopy(opt->path_html_utf8, "");
|
|
StringCopy(opt->path_log, "");
|
|
StringCopy(opt->path_bin, "");
|
|
//
|
|
opt->maxlink = 100000; // 100,000 liens max par défaut
|
|
opt->maxfilter = 200; // 200 filtres max par défaut
|
|
opt->maxcache = 1048576 * 32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT --
|
|
//opt->maxcache_anticipate=256; // maximum de liens à anticiper
|
|
opt->maxtime = -1; // temps max en secondes
|
|
opt->maxrate = 100000; // taux maxi
|
|
opt->maxconn = 5.0; // nombre connexions/s
|
|
opt->waittime = -1; // wait until.. hh*3600+mm*60+ss
|
|
//
|
|
opt->exec = "";
|
|
opt->is_update = HTS_FALSE;
|
|
opt->dir_topindex = HTS_FALSE;
|
|
//
|
|
opt->bypass_limits = HTS_FALSE;
|
|
opt->state.stop = 0; // stopper
|
|
opt->state.exit_xh = 0; // abort
|
|
//
|
|
opt->state.is_ended = 0;
|
|
|
|
/* Alocated buffers */
|
|
|
|
opt->callbacks_fun =
|
|
(t_hts_htmlcheck_callbacks *) malloct(sizeof(t_hts_htmlcheck_callbacks));
|
|
memset(opt->callbacks_fun, 0, sizeof(t_hts_htmlcheck_callbacks));
|
|
|
|
/* Preload callbacks : java and flash parser, and the automatic user-defined callback */
|
|
|
|
{
|
|
int i;
|
|
|
|
for(i = 0; defaultModules[i] != NULL; i++) {
|
|
int ret = plug_wrapper(opt, defaultModules[i], defaultModules[i]);
|
|
|
|
if (ret == 0) { /* Module aborted initialization */
|
|
/* Ignored. */
|
|
}
|
|
}
|
|
}
|
|
|
|
return opt;
|
|
}
|
|
|
|
HTSEXT_API size_t hts_sizeof_opt(void) {
|
|
return sizeof(httrackp);
|
|
}
|
|
|
|
HTSEXT_API void hts_free_opt(httrackp * opt) {
|
|
if (opt != NULL) {
|
|
|
|
/* Alocated callbacks */
|
|
|
|
if (opt->callbacks_fun != NULL) {
|
|
int i;
|
|
t_hts_htmlcheck_callbacks_item *items =
|
|
(t_hts_htmlcheck_callbacks_item *) opt->callbacks_fun;
|
|
const int size =
|
|
(int) sizeof(t_hts_htmlcheck_callbacks) /
|
|
sizeof(t_hts_htmlcheck_callbacks_item);
|
|
assertf(sizeof(t_hts_htmlcheck_callbacks_item) * size ==
|
|
sizeof(t_hts_htmlcheck_callbacks));
|
|
|
|
/* Free all linked lists */
|
|
for(i = 0; i < size; i++) {
|
|
t_hts_callbackarg *carg, *next_carg;
|
|
|
|
for(carg = items[i].carg;
|
|
carg != NULL && (next_carg = carg->prev.carg, carg != NULL);
|
|
carg = next_carg) {
|
|
hts_free(carg);
|
|
}
|
|
}
|
|
|
|
freet(opt->callbacks_fun);
|
|
opt->callbacks_fun = NULL;
|
|
}
|
|
|
|
/* Close library handles */
|
|
unplug_wrappers(opt);
|
|
|
|
/* Cache */
|
|
if (opt->state.dns_cache != NULL) {
|
|
coucal root;
|
|
|
|
hts_mutexlock(&opt->state.lock);
|
|
root = opt->state.dns_cache;
|
|
opt->state.dns_cache = NULL;
|
|
hts_mutexrelease(&opt->state.lock);
|
|
|
|
coucal_delete(&root); // frees records via hts_cache_value_free
|
|
}
|
|
|
|
/* Cancel chain */
|
|
if (opt->state.cancel != NULL) {
|
|
htsoptstatecancel *cancel;
|
|
|
|
for(cancel = opt->state.cancel; cancel != NULL;) {
|
|
htsoptstatecancel *next = cancel->next;
|
|
|
|
if (cancel->url != NULL) {
|
|
freet(cancel->url);
|
|
}
|
|
freet(cancel);
|
|
cancel = next;
|
|
}
|
|
opt->state.cancel = NULL;
|
|
}
|
|
|
|
/* Free strings */
|
|
|
|
StringFree(opt->proxy.name);
|
|
StringFree(opt->proxy.bindhost);
|
|
|
|
StringFree(opt->savename_userdef);
|
|
StringFree(opt->user_agent);
|
|
StringFree(opt->referer);
|
|
StringFree(opt->from);
|
|
StringFree(opt->lang_iso);
|
|
StringFree(opt->sys_com);
|
|
StringFree(opt->mimedefs);
|
|
StringFree(opt->filelist);
|
|
StringFree(opt->urllist);
|
|
StringFree(opt->footer);
|
|
StringFree(opt->mod_blacklist);
|
|
|
|
StringFree(opt->path_html);
|
|
StringFree(opt->path_html_utf8);
|
|
StringFree(opt->path_log);
|
|
StringFree(opt->path_bin);
|
|
|
|
/* mutexes */
|
|
hts_mutexfree(&opt->state.lock);
|
|
|
|
/* Free structure */
|
|
free(opt);
|
|
}
|
|
}
|
|
|
|
// TEMPORARY - PUT THIS STRUCTURE INSIDE httrackp !
|
|
const hts_stat_struct* hts_get_stats(httrackp * opt) {
|
|
if (opt == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
HTS_STAT.stat_nsocket = 0;
|
|
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
|
|
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
|
|
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
|
|
HTS_STAT.nbk = 0;
|
|
HTS_STAT.nb = 0;
|
|
|
|
return &HTS_STAT;
|
|
}
|
|
|
|
// defaut wrappers
|
|
static void __cdecl htsdefault_init(t_hts_callbackarg * carg) {
|
|
}
|
|
static void __cdecl htsdefault_uninit(t_hts_callbackarg * carg) {
|
|
// hts_freevar();
|
|
}
|
|
static int __cdecl htsdefault_start(t_hts_callbackarg * carg, httrackp * opt) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_chopt(t_hts_callbackarg * carg, httrackp * opt) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_end(t_hts_callbackarg * carg, httrackp * opt) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_preprocesshtml(t_hts_callbackarg * carg,
|
|
httrackp * opt, char **html,
|
|
int *len, const char *url_adresse,
|
|
const char *url_fichier) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_postprocesshtml(t_hts_callbackarg * carg,
|
|
httrackp * opt, char **html,
|
|
int *len, const char *url_adresse,
|
|
const char *url_fichier) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_checkhtml(t_hts_callbackarg * carg,
|
|
httrackp * opt, char *html, int len,
|
|
const char *url_adresse,
|
|
const char *url_fichier) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_loop(t_hts_callbackarg * carg, httrackp * opt, lien_back * back, int back_max, int back_index, int lien_n, int lien_tot, int stat_time, hts_stat_struct * stats) { // appelé à chaque boucle de HTTrack
|
|
return 1;
|
|
}
|
|
static const char *__cdecl htsdefault_query(t_hts_callbackarg * carg,
|
|
httrackp * opt,
|
|
const char *question) {
|
|
return "";
|
|
}
|
|
static const char *__cdecl htsdefault_query2(t_hts_callbackarg * carg,
|
|
httrackp * opt,
|
|
const char *question) {
|
|
return "";
|
|
}
|
|
static const char *__cdecl htsdefault_query3(t_hts_callbackarg * carg,
|
|
httrackp * opt,
|
|
const char *question) {
|
|
return "";
|
|
}
|
|
static int __cdecl htsdefault_check(t_hts_callbackarg * carg, httrackp * opt,
|
|
const char *adr, const char *fil,
|
|
int status) {
|
|
return -1;
|
|
}
|
|
static int __cdecl htsdefault_check_mime(t_hts_callbackarg * carg,
|
|
httrackp * opt, const char *adr,
|
|
const char *fil, const char *mime,
|
|
int status) {
|
|
return -1;
|
|
}
|
|
static void __cdecl htsdefault_pause(t_hts_callbackarg * carg, httrackp * opt,
|
|
const char *lockfile) {
|
|
while(fexist(lockfile)) {
|
|
Sleep(1000);
|
|
}
|
|
}
|
|
static void __cdecl htsdefault_filesave(t_hts_callbackarg * carg,
|
|
httrackp * opt, const char *file) {
|
|
}
|
|
static void __cdecl htsdefault_filesave2(t_hts_callbackarg * carg,
|
|
httrackp * opt, const char *adr,
|
|
const char *file, const char *sav,
|
|
int is_new, int is_modified,
|
|
int not_updated) {
|
|
}
|
|
static int __cdecl htsdefault_linkdetected(t_hts_callbackarg * carg,
|
|
httrackp * opt, char *link) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_linkdetected2(t_hts_callbackarg * carg,
|
|
httrackp * opt, char *link,
|
|
const char *start_tag) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_xfrstatus(t_hts_callbackarg * carg,
|
|
httrackp * opt, lien_back * back) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_savename(t_hts_callbackarg * carg, httrackp * opt,
|
|
const char *adr_complete,
|
|
const char *fil_complete,
|
|
const char *referer_adr,
|
|
const char *referer_fil, char *save) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_sendhead(t_hts_callbackarg * carg, httrackp * opt,
|
|
char *buff, const char *adr,
|
|
const char *fil, const char *referer_adr,
|
|
const char *referer_fil,
|
|
htsblk * outgoing) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_receivehead(t_hts_callbackarg * carg,
|
|
httrackp * opt, char *buff,
|
|
const char *adr, const char *fil,
|
|
const char *referer_adr,
|
|
const char *referer_fil,
|
|
htsblk * incoming) {
|
|
return 1;
|
|
}
|
|
static int __cdecl htsdefault_detect(t_hts_callbackarg * carg, httrackp * opt,
|
|
htsmoduleStruct * str) {
|
|
return 0;
|
|
}
|
|
static int __cdecl htsdefault_parse(t_hts_callbackarg * carg, httrackp * opt,
|
|
htsmoduleStruct * str) {
|
|
return 0;
|
|
}
|
|
|
|
/* Default internal dummy callbacks */
|
|
const t_hts_htmlcheck_callbacks default_callbacks = {
|
|
{htsdefault_init, NULL},
|
|
{htsdefault_uninit, NULL},
|
|
{htsdefault_start, NULL},
|
|
{htsdefault_end, NULL},
|
|
{htsdefault_chopt, NULL},
|
|
{htsdefault_preprocesshtml, NULL},
|
|
{htsdefault_postprocesshtml, NULL},
|
|
{htsdefault_checkhtml, NULL},
|
|
{htsdefault_query, NULL},
|
|
{htsdefault_query2, NULL},
|
|
{htsdefault_query3, NULL},
|
|
{htsdefault_loop, NULL},
|
|
{htsdefault_check, NULL},
|
|
{htsdefault_check_mime, NULL},
|
|
{htsdefault_pause, NULL},
|
|
{htsdefault_filesave, NULL},
|
|
{htsdefault_filesave2, NULL},
|
|
{htsdefault_linkdetected, NULL},
|
|
{htsdefault_linkdetected2, NULL},
|
|
{htsdefault_xfrstatus, NULL},
|
|
{htsdefault_savename, NULL},
|
|
{htsdefault_sendhead, NULL},
|
|
{htsdefault_receivehead, NULL},
|
|
{htsdefault_detect, NULL},
|
|
{htsdefault_parse, NULL}
|
|
};
|
|
|
|
#define CALLBACK_OP(CB, NAME, OPERATION, S, FUN) do { \
|
|
if (strcmp(NAME, S) == 0) { \
|
|
OPERATION(t_hts_htmlcheck_ ##FUN, (CB)->FUN.fun); \
|
|
} \
|
|
} while(0)
|
|
|
|
#define DISPATCH_CALLBACK(CB, NAME, OPERATION) do { \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "init", init); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "free", uninit); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "start", start); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "end", end); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "change-options", chopt); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "preprocess-html", preprocess); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "postprocess-html", postprocess); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "check-html", check_html); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "query", query); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "query2", query2); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "query3", query3); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "loop", loop); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "check-link", check_link); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "check-mime", check_mime); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "pause", pause); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "save-file", filesave); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "save-file2", filesave2); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "link-detected", linkdetected); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "link-detected2", linkdetected2); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "transfer-status", xfrstatus); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "save-name", savename); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "send-header", sendhead); \
|
|
CALLBACK_OP(CB, NAME, OPERATION, "receive-header", receivehead); \
|
|
} while(0)
|
|
|
|
int hts_set_callback(t_hts_htmlcheck_callbacks * callbacks, const char *name,
|
|
void *function) {
|
|
int error = 1;
|
|
#define CALLBACK_OPERATION(TYPE, FUNCTION) do { \
|
|
FUNCTION = (TYPE) function; \
|
|
error = 0; \
|
|
} while(0)
|
|
DISPATCH_CALLBACK(callbacks, name, CALLBACK_OPERATION);
|
|
#undef CALLBACK_OPERATION
|
|
return error;
|
|
}
|
|
|
|
void *hts_get_callback(t_hts_htmlcheck_callbacks * callbacks, const char *name) {
|
|
#define CALLBACK_OPERATION(TYPE, FUNCTION) do { \
|
|
return (void*) FUNCTION; \
|
|
} while(0)
|
|
DISPATCH_CALLBACK(callbacks, name, CALLBACK_OPERATION);
|
|
#undef CALLBACK_OPERATION
|
|
return NULL;
|
|
}
|
|
|
|
// end defaut wrappers
|
|
|
|
/* libc stubs */
|
|
|
|
HTSEXT_API char *hts_strdup(const char *str) {
|
|
return strdup(str);
|
|
}
|
|
|
|
HTSEXT_API void *hts_malloc(size_t size) {
|
|
return malloc(size);
|
|
}
|
|
|
|
HTSEXT_API void *hts_realloc(void *const data, const size_t size) {
|
|
return realloc(data, size);
|
|
}
|
|
|
|
HTSEXT_API void hts_free(void *data) {
|
|
free(data);
|
|
}
|
|
|
|
/* Dummy functions */
|
|
HTSEXT_API int hts_resetvar(void) {
|
|
return 0;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
typedef struct dirent dirent;
|
|
DIR *opendir(const char *name) {
|
|
WIN32_FILE_ATTRIBUTE_DATA st;
|
|
DIR *dir;
|
|
size_t len;
|
|
int i;
|
|
|
|
if (name == NULL || *name == '\0') {
|
|
errno = ENOENT;
|
|
return NULL;
|
|
}
|
|
if (!GetFileAttributesEx(name, GetFileExInfoStandard, &st)
|
|
|| (st.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) {
|
|
errno = ENOENT;
|
|
return NULL;
|
|
}
|
|
dir = calloc(sizeof(DIR), 1);
|
|
if (dir == NULL) {
|
|
errno = ENOMEM;
|
|
return NULL;
|
|
}
|
|
len = strlen(name);
|
|
dir->h = INVALID_HANDLE_VALUE;
|
|
dir->name = malloc(len + 2 + 1);
|
|
strcpy(dir->name, name);
|
|
for(i = 0; dir->name[i] != '\0'; i++) {
|
|
if (dir->name[i] == '/') {
|
|
dir->name[i] = '\\';
|
|
}
|
|
}
|
|
strcat(dir->name, "\\*");
|
|
return dir;
|
|
}
|
|
|
|
struct dirent *readdir(DIR * dir) {
|
|
WIN32_FIND_DATAA find;
|
|
|
|
if (dir->h == INVALID_HANDLE_VALUE) {
|
|
dir->h = FindFirstFileA(dir->name, &find);
|
|
} else {
|
|
if (!FindNextFile(dir->h, &find)) {
|
|
FindClose(dir->h);
|
|
dir->h = INVALID_HANDLE_VALUE;
|
|
}
|
|
}
|
|
if (dir->h != INVALID_HANDLE_VALUE) {
|
|
dir->entry.d_name[0] = 0;
|
|
strncat(dir->entry.d_name, find.cFileName, HTS_DIRENT_SIZE - 1);
|
|
return &dir->entry;
|
|
}
|
|
errno = ENOENT;
|
|
return NULL;
|
|
}
|
|
|
|
int closedir(DIR * dir) {
|
|
if (dir != NULL) {
|
|
if (dir->h != INVALID_HANDLE_VALUE) {
|
|
CloseHandle(dir->h);
|
|
}
|
|
if (dir->name != NULL) {
|
|
free(dir->name);
|
|
}
|
|
free(dir);
|
|
return 0;
|
|
}
|
|
errno = EBADF;
|
|
return -1;
|
|
}
|
|
|
|
// UTF-8 aware FILE API
|
|
|
|
static void copyWchar(LPWSTR dest, const char *src) {
|
|
int i;
|
|
|
|
for(i = 0; src[i]; i++) {
|
|
dest[i] = src[i];
|
|
}
|
|
dest[i] = '\0';
|
|
}
|
|
|
|
FILE *hts_fopen_utf8(const char *path, const char *mode) {
|
|
WCHAR wmode[32];
|
|
LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
|
|
|
|
assertf(strlen(mode) < sizeof(wmode) / sizeof(WCHAR));
|
|
copyWchar(wmode, mode);
|
|
if (wpath != NULL) {
|
|
FILE *const fp = _wfopen(wpath, wmode);
|
|
|
|
free(wpath);
|
|
return fp;
|
|
} else {
|
|
// Fallback on conversion error.
|
|
return fopen(path, mode);
|
|
}
|
|
}
|
|
|
|
int hts_stat_utf8(const char *path, STRUCT_STAT * buf) {
|
|
LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
|
|
|
|
if (wpath != NULL) {
|
|
const int result = _wstat(wpath, buf);
|
|
|
|
free(wpath);
|
|
return result;
|
|
} else {
|
|
// Fallback on conversion error.
|
|
return _stat(path, buf);
|
|
}
|
|
}
|
|
|
|
int hts_unlink_utf8(const char *path) {
|
|
LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
|
|
|
|
if (wpath != NULL) {
|
|
const int result = _wunlink(wpath);
|
|
|
|
free(wpath);
|
|
return result;
|
|
} else {
|
|
// Fallback on conversion error.
|
|
return _unlink(path);
|
|
}
|
|
}
|
|
|
|
int hts_rename_utf8(const char *oldpath, const char *newpath) {
|
|
LPWSTR woldpath =
|
|
hts_convertUTF8StringToUCS2(oldpath, (int) strlen(oldpath), NULL);
|
|
LPWSTR wnewpath =
|
|
hts_convertUTF8StringToUCS2(newpath, (int) strlen(newpath), NULL);
|
|
if (woldpath != NULL && wnewpath != NULL) {
|
|
const int result = _wrename(woldpath, wnewpath);
|
|
|
|
free(woldpath);
|
|
free(wnewpath);
|
|
return result;
|
|
} else {
|
|
if (woldpath != NULL)
|
|
free(woldpath);
|
|
if (wnewpath != NULL)
|
|
free(wnewpath);
|
|
// Fallback on conversion error.
|
|
return rename(oldpath, newpath);
|
|
}
|
|
}
|
|
|
|
int hts_mkdir_utf8(const char *path) {
|
|
LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
|
|
|
|
if (wpath != NULL) {
|
|
const int result = _wmkdir(wpath);
|
|
|
|
free(wpath);
|
|
return result;
|
|
} else {
|
|
// Fallback on conversion error.
|
|
return _mkdir(path);
|
|
}
|
|
}
|
|
|
|
HTSEXT_API int hts_utime_utf8(const char *path, const STRUCT_UTIMBUF * times) {
|
|
STRUCT_UTIMBUF mtimes = *times;
|
|
LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
|
|
|
|
if (wpath != NULL) {
|
|
const int result = _wutime(wpath, &mtimes);
|
|
|
|
free(wpath);
|
|
return result;
|
|
} else {
|
|
// Fallback on conversion error.
|
|
return _utime(path, &mtimes);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// Fin
|