Compare commits

..

5 Commits

Author SHA1 Message Date
Xavier Roche
c4ef18f5a5 Add htsbuff: a bounded string builder over a fixed buffer
Many pointer-destination buff() sites are cursors walking a buffer of known
capacity, with a manual "p += strlen(p)" after each write (the url_savename
renderer does this ~40 times). That hand-rolled pointer math is where several
of the off-by-one hazards live.

htsbuff captures the pattern: a non-owning builder (buf/cap/len) built from an
in-scope array (htsbuff_array, capacity via sizeof) or a pointer of known size
(htsbuff_ptr). htsbuff_cat/catn/cpy bound every write against the real capacity
and abort on overflow, same contract as the *_safe_ helpers, so the pointer
math goes away.

Extend the -#8 self-test and tests/01_engine-strsafe.test with builder
correctness (append, truncating append, reset, length) and an overflow-abort
case. No call sites are converted yet; that follows per file.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 10:38:22 +02:00
Xavier Roche
d76dad47f7 Merge pull request #330 from xroche/cleanup/htssafe-pointer-diagnostics
Flag unchecked pointer-destination uses of the buff() string macros
2026-06-14 08:49:26 +02:00
Xavier Roche
055e17b057 Merge pull request #328 from xroche/cli/header-ua-length-152
Raise the user-agent and custom-header length limits
2026-06-14 01:43:31 +02:00
Xavier Roche
d7bb97d697 Merge pull request #329 from xroche/parser/lock-background-image-237
Lock CSS background-image url() rewriting in the parser test
2026-06-14 01:37:51 +02:00
Xavier Roche
ca810ef7e3 Lock CSS background-image url() rewriting in the parser test
background-image is already captured and rewritten through the style/CSS
url() path, in both an external <style> block and an inline style attribute,
with the URL unquoted, double-quoted or single-quoted. Extend the offline
parser test to cover all of these so the behavior stays locked.

closes #237
2026-06-14 01:07:42 +02:00
4 changed files with 159 additions and 11 deletions

View File

@@ -193,6 +193,37 @@ static int string_safety_selftests(void) {
#pragma GCC diagnostic pop
#endif
/* htsbuff: bounded builder over a fixed array (append, truncating append,
reset, and length tracking) */
{
char dst[8];
htsbuff b = htsbuff_array(dst);
htsbuff_cat(&b, "ab");
htsbuff_cat(&b, "cd");
if (strcmp(htsbuff_str(&b), "abcd") != 0 || b.len != 4)
return 1;
htsbuff_catn(&b, "efghij", 2); /* append at most 2 */
if (strcmp(htsbuff_str(&b), "abcdef") != 0)
return 1;
htsbuff_cpy(&b, "xyz"); /* reset */
if (strcmp(htsbuff_str(&b), "xyz") != 0 || b.len != 3)
return 1;
}
/* boundary: filling to exactly cap-1 must succeed (one more aborts, which the
-#8 overflow-buff mode checks) */
{
char d2[4];
htsbuff c = htsbuff_array(d2);
htsbuff_cat(&c, "abc");
if (strcmp(htsbuff_str(&c), "abc") != 0 || c.len != 3)
return 1;
}
return 0;
}
@@ -2494,16 +2525,23 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
return 0;
break;
case '8': /* string-safety selftest: httrack -#8 [overflow <bigstr>] */
if (na + 1 < argc && strcmp(argv[na + 1], "overflow") == 0) {
/* Deliberately exceed a sized buffer: the bounded macro must
if (na + 1 < argc
&& strncmp(argv[na + 1], "overflow", 8) == 0) {
/* Deliberately exceed a sized buffer: the bounded op must
abort. The source comes from argv so its length is opaque
to the compiler (no static -Wstringop-overflow, genuine
runtime check). */
runtime check). "overflow-buff" exercises htsbuff. */
char small[4];
const char *const src =
(na + 2 < argc) ? argv[na + 2] : "overflowing";
strcpybuff(small, src);
if (strcmp(argv[na + 1], "overflow-buff") == 0) {
htsbuff b = htsbuff_array(small);
htsbuff_cat(&b, src);
} else {
strcpybuff(small, src);
}
printf("strsafe: NOT aborted\n"); /* must be unreachable */
htsmain_free();
return 1;

View File

@@ -287,6 +287,81 @@ static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t s
return strncat_safe_(dest, sizeof_dest, source, sizeof_source, (size_t) -1, exp, file, line);
}
/**
* htsbuff: a non-owning bounded string builder over a fixed buffer.
*
* Companion to the strcpybuff()/strcatbuff() macros for the common case of a
* cursor walking a buffer of known capacity (building a name into a fixed
* array, assembling a status line, etc.). It tracks the write position, bounds
* every write against the real capacity, and aborts on overflow (same contract
* as the *_safe_ helpers), so the error-prone manual "p += strlen(p)" dance
* goes away.
*
* Build one from an in-scope array with htsbuff_array() (capacity via sizeof,
* so pass an array, not a pointer), or from a pointer of known capacity with
* htsbuff_ptr(). The buffer is kept NUL-terminated; htsbuff_str() returns it.
*/
typedef struct {
char *buf; /* backing buffer (kept NUL-terminated) */
size_t cap; /* total capacity of buf, including the NUL */
size_t len; /* current length, excluding the NUL */
} htsbuff;
static HTS_INLINE HTS_UNUSED htsbuff htsbuff_ptr_(char *buf, size_t cap) {
htsbuff b;
b.buf = buf;
b.cap = cap;
b.len = 0;
assertf(cap != 0);
buf[0] = '\0';
return b;
}
/**
* Builder over the in-scope array ARR (capacity = sizeof(ARR)).
* On GCC/Clang this rejects a non-array (e.g. a char* pointer), whose sizeof
* would be the pointer size and silently wrong; use htsbuff_ptr() for pointers.
* On other compilers there is no such guard, so pass only true arrays there.
*/
#if (defined(__GNUC__) && !defined(__cplusplus))
/* 0 for an array, a -1 array-size compile error for a pointer. */
#define htsbuff_must_be_array_(A) \
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), typeof(&(A)[0]))]) - 1)
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
#else
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR))
#endif
/** Builder over pointer P of known capacity N (N includes the NUL). */
#define htsbuff_ptr(P, N) htsbuff_ptr_((P), (N))
/** Append at most n characters of s (stopping at its NUL). Aborts on overflow. */
static HTS_INLINE HTS_UNUSED void htsbuff_catn(htsbuff *b, const char *s, size_t n) {
const size_t add = strnlen(s, n);
/* Overflow-safe: keep the (potentially huge) 'add' alone on one side. The
maintained invariant len < cap makes 'cap - len' >= 1 (no underflow), so
'add < cap - len' cannot wrap the way 'len + add < cap' could. */
assertf__(add < b->cap - b->len, "htsbuff append overflow", __FILE__, __LINE__);
memcpy(b->buf + b->len, s, add);
b->len += add;
b->buf[b->len] = '\0';
}
/** Append s. Aborts on overflow. */
static HTS_INLINE HTS_UNUSED void htsbuff_cat(htsbuff *b, const char *s) {
htsbuff_catn(b, s, (size_t) -1);
}
/** Reset content to s. Aborts on overflow. */
static HTS_INLINE HTS_UNUSED void htsbuff_cpy(htsbuff *b, const char *s) {
b->len = 0;
htsbuff_catn(b, s, (size_t) -1);
}
/** Current NUL-terminated content. */
static HTS_INLINE HTS_UNUSED const char *htsbuff_str(const htsbuff *b) {
return b->buf;
}
#define malloct(A) malloc(A)
#define calloct(A,B) calloc((A), (B))
#define freet(A) do { if ((A) != NULL) { free(A); (A) = NULL; } } while(0)

View File

@@ -99,17 +99,25 @@ grep -Eq 'srcset="j\.gif 2x"' "$saved" ||
! grep -Eq 'srcset="[^"]*file://' "$saved" ||
! echo "FAIL: a file:// URL survived inside a rewritten srcset attribute" || exit 1
# xlink:href (#298) and inline background-image (#237): detected and rewritten
# to local; no-detect attributes (title, alt, ...) left untouched. Asserted by
# rewrite (deterministic), not download. data-* (#201/#203) is omitted: its
# detection is currently nondeterministic and can't be locked yet.
# xlink:href (#298) and CSS background-image (#237): detected and rewritten to
# local. background-image is covered in both an external <style> block and an
# inline style attribute, with the URL unquoted, double-quoted and single-quoted
# (the quote style is preserved on rewrite). No-detect attributes (title, alt,
# ...) are left untouched. Asserted by rewrite (deterministic), not download.
# data-* (#201/#203) is omitted: its detection is currently nondeterministic and
# can't be locked yet.
site2="$tmp/attrs"
mkdir -p "$site2"
for f in xl ibg tt; do gif "$site2/$f.gif"; done
for f in xl ibg ibgs cex cexd cexs tt; do gif "$site2/$f.gif"; done
cat >"$site2/index.html" <<EOF
<html><body>
<html><head><style>
.a { background-image: url(file://$site2/cex.gif); }
.b { background-image: url("file://$site2/cexd.gif"); }
.c { background-image: url('file://$site2/cexs.gif'); }
</style></head><body>
<a xlink:href="file://$site2/xl.gif">xlink:href (#298)</a>
<div style="background-image:url(file://$site2/ibg.gif)"></div>
<div style="background-image:url('file://$site2/ibgs.gif')"></div>
<span title="file://$site2/tt.gif">excluded attribute</span>
</body></html>
EOF
@@ -121,8 +129,24 @@ test -n "$saved2" || ! echo "FAIL: saved attrs page not found" || exit 1
# detected attributes: the absolute URL is rewritten to a local link
grep -Eq 'xlink:href="xl\.gif"' "$saved2" ||
! echo "FAIL #298: xlink:href not detected/rewritten" || exit 1
# #237 external <style> block, each quoting form, quote style preserved
grep -Eq 'url\(cex\.gif\)' "$saved2" ||
! echo "FAIL #237: unquoted background-image in <style> not rewritten" || exit 1
grep -Eq 'url\("cexd\.gif"\)' "$saved2" ||
! echo "FAIL #237: double-quoted background-image in <style> not rewritten" || exit 1
grep -Eq "url\('cexs\.gif'\)" "$saved2" ||
! echo "FAIL #237: single-quoted background-image in <style> not rewritten" || exit 1
# #237 inline style attribute, unquoted and single-quoted url()
grep -Eq 'style="background-image:url\(ibg\.gif\)"' "$saved2" ||
! echo "FAIL #237: inline background-image url() not detected/rewritten" || exit 1
! echo "FAIL #237: inline unquoted background-image not rewritten" || exit 1
grep -Eq "style=\"background-image:url\('ibgs\.gif'\)\"" "$saved2" ||
! echo "FAIL #237: inline single-quoted background-image not rewritten" || exit 1
# no file:// URL survived inside any rewritten background-image
! grep -Eq 'background-image:[^;"]*file://' "$saved2" ||
! echo "FAIL #237: a file:// URL survived inside a rewritten background-image" || exit 1
# excluded attribute: title is on the no-detect list, so its value is left as-is
grep -q 'title="file://' "$saved2" ||

View File

@@ -21,3 +21,14 @@ case "$err" in
*"overflow while copying"*) ;;
*) echo "expected htssafe overflow abort, got: $err" >&2; exit 1 ;;
esac
# Same guarantee for the htsbuff builder. The source is exactly the buffer
# capacity (4 bytes into a 4-byte buffer), so this also pins the boundary: a
# '<=' off-by-one in the capacity check would let it through (and print "NOT
# aborted"). Match the specific htsbuff abort message, not just any assert.
err=$(httrack -#8 overflow-buff "abcd" 2>&1)
case "$err" in
*"strsafe: NOT aborted"*) echo "htsbuff over-capacity write was NOT caught" >&2; exit 1 ;;
*"htsbuff append overflow"*) ;;
*) echo "expected htsbuff overflow abort, got: $err" >&2; exit 1 ;;
esac