mirror of
https://github.com/xroche/httrack.git
synced 2026-06-14 22:33:54 +03:00
Compare commits
5 Commits
cleanup/ht
...
cleanup/ht
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4ef18f5a5 | ||
|
|
d76dad47f7 | ||
|
|
055e17b057 | ||
|
|
d7bb97d697 | ||
|
|
ca810ef7e3 |
@@ -193,6 +193,37 @@ static int string_safety_selftests(void) {
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
/* htsbuff: bounded builder over a fixed array (append, truncating append,
|
||||
reset, and length tracking) */
|
||||
{
|
||||
char dst[8];
|
||||
htsbuff b = htsbuff_array(dst);
|
||||
|
||||
htsbuff_cat(&b, "ab");
|
||||
htsbuff_cat(&b, "cd");
|
||||
if (strcmp(htsbuff_str(&b), "abcd") != 0 || b.len != 4)
|
||||
return 1;
|
||||
|
||||
htsbuff_catn(&b, "efghij", 2); /* append at most 2 */
|
||||
if (strcmp(htsbuff_str(&b), "abcdef") != 0)
|
||||
return 1;
|
||||
|
||||
htsbuff_cpy(&b, "xyz"); /* reset */
|
||||
if (strcmp(htsbuff_str(&b), "xyz") != 0 || b.len != 3)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* boundary: filling to exactly cap-1 must succeed (one more aborts, which the
|
||||
-#8 overflow-buff mode checks) */
|
||||
{
|
||||
char d2[4];
|
||||
htsbuff c = htsbuff_array(d2);
|
||||
|
||||
htsbuff_cat(&c, "abc");
|
||||
if (strcmp(htsbuff_str(&c), "abc") != 0 || c.len != 3)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2494,16 +2525,23 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
|
||||
return 0;
|
||||
break;
|
||||
case '8': /* string-safety selftest: httrack -#8 [overflow <bigstr>] */
|
||||
if (na + 1 < argc && strcmp(argv[na + 1], "overflow") == 0) {
|
||||
/* Deliberately exceed a sized buffer: the bounded macro must
|
||||
if (na + 1 < argc
|
||||
&& strncmp(argv[na + 1], "overflow", 8) == 0) {
|
||||
/* Deliberately exceed a sized buffer: the bounded op must
|
||||
abort. The source comes from argv so its length is opaque
|
||||
to the compiler (no static -Wstringop-overflow, genuine
|
||||
runtime check). */
|
||||
runtime check). "overflow-buff" exercises htsbuff. */
|
||||
char small[4];
|
||||
const char *const src =
|
||||
(na + 2 < argc) ? argv[na + 2] : "overflowing";
|
||||
|
||||
strcpybuff(small, src);
|
||||
if (strcmp(argv[na + 1], "overflow-buff") == 0) {
|
||||
htsbuff b = htsbuff_array(small);
|
||||
|
||||
htsbuff_cat(&b, src);
|
||||
} else {
|
||||
strcpybuff(small, src);
|
||||
}
|
||||
printf("strsafe: NOT aborted\n"); /* must be unreachable */
|
||||
htsmain_free();
|
||||
return 1;
|
||||
|
||||
@@ -287,6 +287,81 @@ static HTS_INLINE HTS_UNUSED char* strcpy_safe_(char *const dest, const size_t s
|
||||
return strncat_safe_(dest, sizeof_dest, source, sizeof_source, (size_t) -1, exp, file, line);
|
||||
}
|
||||
|
||||
/**
|
||||
* htsbuff: a non-owning bounded string builder over a fixed buffer.
|
||||
*
|
||||
* Companion to the strcpybuff()/strcatbuff() macros for the common case of a
|
||||
* cursor walking a buffer of known capacity (building a name into a fixed
|
||||
* array, assembling a status line, etc.). It tracks the write position, bounds
|
||||
* every write against the real capacity, and aborts on overflow (same contract
|
||||
* as the *_safe_ helpers), so the error-prone manual "p += strlen(p)" dance
|
||||
* goes away.
|
||||
*
|
||||
* Build one from an in-scope array with htsbuff_array() (capacity via sizeof,
|
||||
* so pass an array, not a pointer), or from a pointer of known capacity with
|
||||
* htsbuff_ptr(). The buffer is kept NUL-terminated; htsbuff_str() returns it.
|
||||
*/
|
||||
typedef struct {
|
||||
char *buf; /* backing buffer (kept NUL-terminated) */
|
||||
size_t cap; /* total capacity of buf, including the NUL */
|
||||
size_t len; /* current length, excluding the NUL */
|
||||
} htsbuff;
|
||||
|
||||
static HTS_INLINE HTS_UNUSED htsbuff htsbuff_ptr_(char *buf, size_t cap) {
|
||||
htsbuff b;
|
||||
b.buf = buf;
|
||||
b.cap = cap;
|
||||
b.len = 0;
|
||||
assertf(cap != 0);
|
||||
buf[0] = '\0';
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder over the in-scope array ARR (capacity = sizeof(ARR)).
|
||||
* On GCC/Clang this rejects a non-array (e.g. a char* pointer), whose sizeof
|
||||
* would be the pointer size and silently wrong; use htsbuff_ptr() for pointers.
|
||||
* On other compilers there is no such guard, so pass only true arrays there.
|
||||
*/
|
||||
#if (defined(__GNUC__) && !defined(__cplusplus))
|
||||
/* 0 for an array, a -1 array-size compile error for a pointer. */
|
||||
#define htsbuff_must_be_array_(A) \
|
||||
(sizeof(char[1 - 2 * !!__builtin_types_compatible_p(typeof(A), typeof(&(A)[0]))]) - 1)
|
||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR) + htsbuff_must_be_array_(ARR))
|
||||
#else
|
||||
#define htsbuff_array(ARR) htsbuff_ptr_((ARR), sizeof(ARR))
|
||||
#endif
|
||||
/** Builder over pointer P of known capacity N (N includes the NUL). */
|
||||
#define htsbuff_ptr(P, N) htsbuff_ptr_((P), (N))
|
||||
|
||||
/** Append at most n characters of s (stopping at its NUL). Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_catn(htsbuff *b, const char *s, size_t n) {
|
||||
const size_t add = strnlen(s, n);
|
||||
/* Overflow-safe: keep the (potentially huge) 'add' alone on one side. The
|
||||
maintained invariant len < cap makes 'cap - len' >= 1 (no underflow), so
|
||||
'add < cap - len' cannot wrap the way 'len + add < cap' could. */
|
||||
assertf__(add < b->cap - b->len, "htsbuff append overflow", __FILE__, __LINE__);
|
||||
memcpy(b->buf + b->len, s, add);
|
||||
b->len += add;
|
||||
b->buf[b->len] = '\0';
|
||||
}
|
||||
|
||||
/** Append s. Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_cat(htsbuff *b, const char *s) {
|
||||
htsbuff_catn(b, s, (size_t) -1);
|
||||
}
|
||||
|
||||
/** Reset content to s. Aborts on overflow. */
|
||||
static HTS_INLINE HTS_UNUSED void htsbuff_cpy(htsbuff *b, const char *s) {
|
||||
b->len = 0;
|
||||
htsbuff_catn(b, s, (size_t) -1);
|
||||
}
|
||||
|
||||
/** Current NUL-terminated content. */
|
||||
static HTS_INLINE HTS_UNUSED const char *htsbuff_str(const htsbuff *b) {
|
||||
return b->buf;
|
||||
}
|
||||
|
||||
#define malloct(A) malloc(A)
|
||||
#define calloct(A,B) calloc((A), (B))
|
||||
#define freet(A) do { if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
|
||||
|
||||
@@ -99,17 +99,25 @@ grep -Eq 'srcset="j\.gif 2x"' "$saved" ||
|
||||
! grep -Eq 'srcset="[^"]*file://' "$saved" ||
|
||||
! echo "FAIL: a file:// URL survived inside a rewritten srcset attribute" || exit 1
|
||||
|
||||
# xlink:href (#298) and inline background-image (#237): detected and rewritten
|
||||
# to local; no-detect attributes (title, alt, ...) left untouched. Asserted by
|
||||
# rewrite (deterministic), not download. data-* (#201/#203) is omitted: its
|
||||
# detection is currently nondeterministic and can't be locked yet.
|
||||
# xlink:href (#298) and CSS background-image (#237): detected and rewritten to
|
||||
# local. background-image is covered in both an external <style> block and an
|
||||
# inline style attribute, with the URL unquoted, double-quoted and single-quoted
|
||||
# (the quote style is preserved on rewrite). No-detect attributes (title, alt,
|
||||
# ...) are left untouched. Asserted by rewrite (deterministic), not download.
|
||||
# data-* (#201/#203) is omitted: its detection is currently nondeterministic and
|
||||
# can't be locked yet.
|
||||
site2="$tmp/attrs"
|
||||
mkdir -p "$site2"
|
||||
for f in xl ibg tt; do gif "$site2/$f.gif"; done
|
||||
for f in xl ibg ibgs cex cexd cexs tt; do gif "$site2/$f.gif"; done
|
||||
cat >"$site2/index.html" <<EOF
|
||||
<html><body>
|
||||
<html><head><style>
|
||||
.a { background-image: url(file://$site2/cex.gif); }
|
||||
.b { background-image: url("file://$site2/cexd.gif"); }
|
||||
.c { background-image: url('file://$site2/cexs.gif'); }
|
||||
</style></head><body>
|
||||
<a xlink:href="file://$site2/xl.gif">xlink:href (#298)</a>
|
||||
<div style="background-image:url(file://$site2/ibg.gif)"></div>
|
||||
<div style="background-image:url('file://$site2/ibgs.gif')"></div>
|
||||
<span title="file://$site2/tt.gif">excluded attribute</span>
|
||||
</body></html>
|
||||
EOF
|
||||
@@ -121,8 +129,24 @@ test -n "$saved2" || ! echo "FAIL: saved attrs page not found" || exit 1
|
||||
# detected attributes: the absolute URL is rewritten to a local link
|
||||
grep -Eq 'xlink:href="xl\.gif"' "$saved2" ||
|
||||
! echo "FAIL #298: xlink:href not detected/rewritten" || exit 1
|
||||
|
||||
# #237 external <style> block, each quoting form, quote style preserved
|
||||
grep -Eq 'url\(cex\.gif\)' "$saved2" ||
|
||||
! echo "FAIL #237: unquoted background-image in <style> not rewritten" || exit 1
|
||||
grep -Eq 'url\("cexd\.gif"\)' "$saved2" ||
|
||||
! echo "FAIL #237: double-quoted background-image in <style> not rewritten" || exit 1
|
||||
grep -Eq "url\('cexs\.gif'\)" "$saved2" ||
|
||||
! echo "FAIL #237: single-quoted background-image in <style> not rewritten" || exit 1
|
||||
|
||||
# #237 inline style attribute, unquoted and single-quoted url()
|
||||
grep -Eq 'style="background-image:url\(ibg\.gif\)"' "$saved2" ||
|
||||
! echo "FAIL #237: inline background-image url() not detected/rewritten" || exit 1
|
||||
! echo "FAIL #237: inline unquoted background-image not rewritten" || exit 1
|
||||
grep -Eq "style=\"background-image:url\('ibgs\.gif'\)\"" "$saved2" ||
|
||||
! echo "FAIL #237: inline single-quoted background-image not rewritten" || exit 1
|
||||
|
||||
# no file:// URL survived inside any rewritten background-image
|
||||
! grep -Eq 'background-image:[^;"]*file://' "$saved2" ||
|
||||
! echo "FAIL #237: a file:// URL survived inside a rewritten background-image" || exit 1
|
||||
|
||||
# excluded attribute: title is on the no-detect list, so its value is left as-is
|
||||
grep -q 'title="file://' "$saved2" ||
|
||||
|
||||
@@ -21,3 +21,14 @@ case "$err" in
|
||||
*"overflow while copying"*) ;;
|
||||
*) echo "expected htssafe overflow abort, got: $err" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
# Same guarantee for the htsbuff builder. The source is exactly the buffer
|
||||
# capacity (4 bytes into a 4-byte buffer), so this also pins the boundary: a
|
||||
# '<=' off-by-one in the capacity check would let it through (and print "NOT
|
||||
# aborted"). Match the specific htsbuff abort message, not just any assert.
|
||||
err=$(httrack -#8 overflow-buff "abcd" 2>&1)
|
||||
case "$err" in
|
||||
*"strsafe: NOT aborted"*) echo "htsbuff over-capacity write was NOT caught" >&2; exit 1 ;;
|
||||
*"htsbuff append overflow"*) ;;
|
||||
*) echo "expected htsbuff overflow abort, got: $err" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
Reference in New Issue
Block a user