Compare commits

...

2 Commits

Author SHA1 Message Date
Xavier Roche
36a9f5a827 Merge pull request #365 from xroche/cleanup/htslib-bounds
Bound htslib.c pointer-destination buffer writes (batch 9)
2026-06-16 03:54:38 +02:00
Xavier Roche
20880c1a4d Bound htslib.c pointer-destination buffer writes (batch 9)
Continues the htssafe.h pointer-destination migration (X1), where the
strcpybuff/strcatbuff macros silently fall back to a raw strcpy/strcat
when the destination is a bare char* rather than a sized array.

In htslib.c:
* fil_normalized() rebuilds the sorted query through an htsbuff bounded
  builder over the malloc'd copyBuff, then copies it back with strlcpybuff
  (capacity is the known qLen + 1).
* treathead() bounds the Location: copy with strlcpybuff against the
  location_buffer[HTS_URLMAXSIZE*2] contract.
* give_mimext(), convtolower() and cut_path() are internal (hidden, not
  HTSEXT_API), so they take an explicit destination size and the callers
  pass it: give_mimext in htsname.c/htscoremain.c/htslib.c, convtolower in
  htshash.c. cut_path has no callers.

Add strlncatbuff(dst, src, size, n) to htssafe.h: a bounded n-limited
append with explicit capacity, the missing parallel to strlcatbuff.

Cover fil_normalized query-sort, give_mimext, convtolower and cut_path with
the -#7 basic_selftests.

get_httptype() and adr_normalized() are left for a follow-up: both are
exported (HTSEXT_API), and get_httptype() exposes a real latent overflow
(a .docx/.pptx/.xlsx URL writes a 65-73 char mime type into 64-byte
contenttype callers) whose fix is a public-ABI decision.

htslib.c pointer-destination warnings: 14 -> 4.

Signed-off-by: Xavier Roche <roche@httrack.com>
2026-06-16 03:48:52 +02:00
6 changed files with 88 additions and 33 deletions

View File

@@ -285,6 +285,46 @@ static void basic_selftests(void) {
assertf(end == NULL && strcmp(tok, "a\\") == 0);
}
}
// fil_normalized(): canonicalizes a URL path. Query arguments are sorted
// alphabetically (by the text after each '?'/'&') and the query is rebuilt
// through a bounded builder; outside the query, "//" collapses to "/".
// Regression for that builder.
{
char norm[256];
assertf(strcmp(fil_normalized("/p?b=2&a=1&c=3", norm), "/p?a=1&b=2&c=3") ==
0);
assertf(strcmp(fil_normalized("/a//b", norm), "/a/b") == 0);
}
// give_mimext(): mime type -> file extension, bounded into the caller buffer.
{
char ext[16];
give_mimext(ext, sizeof(ext), "image/gif");
assertf(strcmp(ext, "gif") == 0);
give_mimext(ext, sizeof(ext), "text/html");
assertf(strcmp(ext, "html") == 0);
give_mimext(ext, sizeof(ext), "no/such-mime-type");
assertf(ext[0] == '\0');
}
// convtolower(): lower-cases into the caller buffer (bounded by its size).
{
char low[64];
assertf(strcmp(convtolower(low, sizeof(low), "ABC/Def.HTML"),
"abc/def.html") == 0);
}
// cut_path(): splits a path into directory (with trailing '/') and basename,
// each bounded by its buffer size.
{
char full[] = "/dir/sub/file.html";
char path[256];
char pname[256];
cut_path(full, path, sizeof(path), pname, sizeof(pname));
assertf(strcmp(path, "/dir/sub/") == 0);
assertf(strcmp(pname, "file.html") == 0);
}
}
/* Self-tests for the htssafe.h bounded string ops (driven by httrack -#8).
@@ -2605,7 +2645,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
printf("%s is '%s'\n", argv[na + 1], mime);
ext[0] = '\0';
give_mimext(ext, mime);
give_mimext(ext, sizeof(ext), mime);
if (ext[0]) {
printf("and its local type is '.%s'\n", ext);
}

View File

@@ -76,7 +76,7 @@ static coucal_key key_duphandler(void *arg, coucal_key_const name) {
/* Key sav hashes are using case-insensitive version */
static coucal_hashkeys key_sav_hashes(void *arg, coucal_key_const key) {
hash_struct *const hash = (hash_struct*) arg;
convtolower(hash->catbuff, (const char*) key);
convtolower(hash->catbuff, sizeof(hash->catbuff), (const char *) key);
return coucal_hash_string(hash->catbuff);
}

View File

@@ -1530,8 +1530,9 @@ void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * ret
if (retour->location) {
while(is_realspace(*(rcvd + p)))
p++; // sauter espaces
if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // pas trop long?
strcpybuff(retour->location, rcvd + p);
if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // not too long?
/* location aliases location_buffer[HTS_URLMAXSIZE * 2] */
strlcpybuff(retour->location, rcvd + p, HTS_URLMAXSIZE * 2);
else // erreur.. ignorer
retour->location[0] = '\0';
}
@@ -3444,16 +3445,17 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
/* Replace query by sorted query */
copyBuff = malloct(qLen + 1);
assertf(copyBuff != NULL);
copyBuff[0] = '\0';
for(i = 0; i < ampargs; i++) {
if (i == 0)
strcatbuff(copyBuff, "?");
else
strcatbuff(copyBuff, "&");
strcatbuff(copyBuff, amps[i] + 1);
{
htsbuff cb = htsbuff_ptr(copyBuff, qLen + 1);
for (i = 0; i < ampargs; i++) {
htsbuff_cat(&cb, i == 0 ? "?" : "&");
htsbuff_cat(&cb, amps[i] + 1);
}
assertf(cb.len == qLen);
}
assertf(strlen(copyBuff) == qLen);
strcpybuff(query, copyBuff);
/* query points into dest where the original qLen-byte query was */
strlcpybuff(query, copyBuff, qLen + 1);
/* Cleanup */
freet(amps);
@@ -3894,9 +3896,9 @@ HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const de
#undef ADD_CHAR
// conversion minuscules, avec buffer
char *convtolower(char *catbuff, const char *a) {
strcpybuff(catbuff, a);
// lower-case conversion into caller buffer (capacity catbuffsize)
char *convtolower(char *catbuff, size_t catbuffsize, const char *a) {
strlcpybuff(catbuff, a, catbuffsize);
hts_lowcase(catbuff); // lower case
return catbuff;
}
@@ -4073,15 +4075,15 @@ int get_userhttptype(httrackp * opt, char *s, const char *fil) {
// renvoyer extesion d'un type mime..
// ex: "image/gif" -> gif
void give_mimext(char *s, const char *st) {
void give_mimext(char *s, size_t ssize, const char *st) {
int ok = 0;
int j = 0;
s[0] = '\0';
while((!ok) && (strnotempty(hts_mime[j][1]))) {
if (strfield2(hts_mime[j][0], st)) {
if (hts_mime[j][1][0] != '*') { // Une correspondance existe
strcpybuff(s, hts_mime[j][1]);
if (hts_mime[j][1][0] != '*') { // a match exists
strlcpybuff(s, hts_mime[j][1], ssize);
ok = 1;
}
}
@@ -4102,7 +4104,7 @@ void give_mimext(char *s, const char *st) {
if (a) {
if ((int) strlen(a) >= 1) {
if ((int) strlen(a) <= 4) {
strcpybuff(s, a);
strlcpybuff(s, a, ssize);
ok = 1;
}
}
@@ -4206,7 +4208,7 @@ int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) {
char ext[64];
ext[0] = '\0';
give_mimext(ext, mime);
give_mimext(ext, sizeof(ext), mime);
if (ext[0] != 0) { /* we have an extension for that */
const size_t ext_size = strlen(ext);
const char *file = strrchr(filename, '/'); /* fetch terminal filename */
@@ -4930,7 +4932,8 @@ void hts_freeall(void) {
// cut path and project name
// patch also initial path
void cut_path(char *fullpath, char *path, char *pname) {
void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
size_t pname_size) {
path[0] = pname[0] = '\0';
if (strnotempty(fullpath)) {
if ((fullpath[strlen(fullpath) - 1] == '/')
@@ -4946,8 +4949,8 @@ void cut_path(char *fullpath, char *path, char *pname) {
a--;
if (*a == '/')
a++;
strcpybuff(pname, a);
strncatbuff(path, fullpath, (int) (a - fullpath));
strlcpybuff(pname, a, pname_size);
strlncatbuff(path, fullpath, path_size, (size_t) (a - fullpath));
}
}
}

View File

@@ -252,7 +252,7 @@ int ishtml_ext(const char *a);
int ishttperror(int err);
int get_userhttptype(httrackp * opt, char *s, const char *fil);
void give_mimext(char *s, const char *st);
void give_mimext(char *s, size_t ssize, const char *st);
int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename);
int may_unknown2(httrackp * opt, const char *mime, const char *filename);
@@ -264,7 +264,7 @@ void code64(unsigned char *a, int size_a, unsigned char *b, int crlf);
#define copychar(catbuff,a) concat(catbuff,(a),NULL)
char *convtolower(char *catbuff, const char *a);
char *convtolower(char *catbuff, size_t catbuffsize, const char *a);
void hts_lowcase(char *s);
void hts_replace(char *s, char from, char to);
int multipleStringMatch(const char *s, const char *match);
@@ -276,7 +276,8 @@ void fprintfio(FILE * fp, const char *buff, const char *prefix);
int sig_ignore_flag(int setflag); // flag ignore
#endif
void cut_path(char *fullpath, char *path, char *pname);
void cut_path(char *fullpath, char *path, size_t path_size, char *pname,
size_t pname_size);
int fexist(const char *s);
int fexist_utf8(const char *s);

View File

@@ -344,7 +344,7 @@ int url_savename(lien_adrfilsave *const afs,
mime[0] = ext[0] = '\0';
get_userhttptype(opt, mime, fil);
if (strnotempty(mime)) {
give_mimext(ext, mime);
give_mimext(ext, sizeof(ext), mime);
if (strnotempty(ext)) {
ext_chg = 1;
}
@@ -378,7 +378,7 @@ int url_savename(lien_adrfilsave *const afs,
ext_chg = 2; /* change filename */
strcpybuff(ext, r.cdispo);
} else if (!may_unknown2(opt, r.contenttype, fil)) { // on peut patcher à priori?
give_mimext(s, r.contenttype); // obtenir extension
give_mimext(s, sizeof(s), r.contenttype); // get extension
if (strnotempty(s) > 0) { // on a reconnu l'extension
ext_chg = 1;
strcpybuff(ext, s);
@@ -403,7 +403,7 @@ int url_savename(lien_adrfilsave *const afs,
mime[0] = ext[0] = '\0';
get_userhttptype(opt, mime, fil);
if (strnotempty(mime)) {
give_mimext(ext, mime);
give_mimext(ext, sizeof(ext), mime);
if (strnotempty(ext)) {
ext_chg = 1;
}
@@ -421,7 +421,8 @@ int url_savename(lien_adrfilsave *const afs,
} else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
char s[16];
s[0] = '\0';
give_mimext(s, headers->r.contenttype); // obtenir extension
give_mimext(s, sizeof(s),
headers->r.contenttype); // get extension
if (strnotempty(s) > 0) { // on a reconnu l'extension
ext_chg = 1;
strcpybuff(ext, s);
@@ -431,7 +432,7 @@ int url_savename(lien_adrfilsave *const afs,
else if (mime_type != NULL) {
ext[0] = '\0';
if (*mime_type) {
give_mimext(ext, mime_type);
give_mimext(ext, sizeof(ext), mime_type);
}
if (strnotempty(ext)) {
char mime_from_file[128];
@@ -646,7 +647,8 @@ int url_savename(lien_adrfilsave *const afs,
ext_chg = 2; /* change filename */
strcpybuff(ext, back[b].r.cdispo);
} else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) { // on peut patcher à priori? (pas interdit ou pas de type)
give_mimext(s, back[b].r.contenttype); // obtenir extension
give_mimext(s, sizeof(s),
back[b].r.contenttype); // get extension
if (strnotempty(s) > 0) { // on a reconnu l'extension
ext_chg = 1;
strcpybuff(ext, s);

View File

@@ -237,6 +237,15 @@ static char *strncatbuff_ptr_(char *dest, const char *src, size_t n) {
HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), (size_t) -1, \
"overflow while appending '" #B "' to '"#A"'", __FILE__, __LINE__)
/**
* Append at most "N" characters of "B" to "A", "A" having a maximum capacity
* of "S".
*/
#define strlncatbuff(A, B, S, N) \
strncat_safe_(A, S, B, HTS_IS_NOT_CHAR_BUFFER(B) ? (size_t) -1 : sizeof(B), \
N, "overflow while appending '" #B "' to '" #A "'", __FILE__, \
__LINE__)
/**
* Copy characters of "B" to "A", "A" having a maximum capacity of "S".
*/