sync with OpenBSD -current

This commit is contained in:
purplerain 2024-04-01 19:33:31 +00:00
parent 61b0c3d2c1
commit 297ba4a59a
Signed by: purplerain
GPG key ID: F42C07F07E2E35B7
20 changed files with 338 additions and 251 deletions

View file

@ -1,13 +1,83 @@
NOTE: We are looking for help with a few things:
https://github.com/libexpat/libexpat/labels/help%20wanted
If you can help, please get in touch. Thanks!
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
| __// \| |_) | (_| | |_
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!! <blink>Expat is UNDERSTAFFED and WITHOUT FUNDING.</blink> !!
!! ~~~~~~~~~~~~ !!
!! The following topics need *additional skilled C developers* to progress !!
!! in a timely manner or at all (loosely ordered by descending priority): !!
!! !!
!! - <blink>fixing a complex non-public security issue</blink>, !!
!! - teaming up on researching and fixing future security reports and !!
!! ClusterFuzz findings with few-days-max response times in communication !!
!! in order to (1) have a sound fix ready before the end of a 90 days !!
!! grace period and (2) in a sustainable manner, !!
!! - implementing and auto-testing XML 1.0r5 support !!
!! (needs discussion before pull requests), !!
!! - smart ideas on fixing the Autotools CMake files generation issue !!
!! without breaking CI (needs discussion before pull requests), !!
!! - the Windows binaries topic (needs requirements engineering first), !!
!! - pushing migration from `int` to `size_t` further !!
!! including edge-cases test coverage (needs discussion before anything). !!
!! !!
!! For details, please reach out via e-mail to sebastian@pipping.org so we !!
!! can schedule a voice call on the topic, in English or German. !!
!! !!
!! THANK YOU! Sebastian Pipping -- Berlin, 2024-03-09 !!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Release 2.6.2 Wed March 13 2024
Security fixes:
#839 #842 CVE-2024-28757 -- Prevent billion laughs attacks with
isolated use of external parsers. Please see the commit
message of commit 1d50b80cf31de87750103656f6eb693746854aa8
for details.
Bug fixes:
#839 #841 Reject direct parameter entity recursion
and avoid the related undefined behavior
Other changes:
#847 Autotools: Fix build for DOCBOOK_TO_MAN containing spaces
#837 Add missing #821 and #824 to 2.6.1 change log
#838 #843 Version info bumped from 10:1:9 (libexpat*.so.1.9.1)
to 10:2:9 (libexpat*.so.1.9.2); see https://verbump.de/
for what these numbers do
Special thanks to:
Philippe Antoine
Tomas Korbar
and
Clang UndefinedBehaviorSanitizer
OSS-Fuzz / ClusterFuzz
Release 2.6.1 Thu February 29 2024
Bug fixes:
#817 Make tests independent of CPU speed, and thus more robust
#828 #836 Expose billion laughs API with XML_DTD defined and
XML_GE undefined, regression from 2.6.0
Other changes:
#829 Hide test-only code behind new internal macro
#833 Autotools: Reject expat_config.h.in defining SIZEOF_VOID_P
#821 #824 Autotools: Fix "make clean" for case:
./configure --without-docbook && make clean all
#819 Address compiler warnings
#832 #834 Version info bumped from 10:0:9 (libexpat*.so.1.9.0)
to 10:1:9 (libexpat*.so.1.9.1); see https://verbump.de/
for what these numbers do
Infrastructure:
#818 CI: Adapt to breaking changes in clang-format
Special thanks to:
David Hall
Snild Dolkow
Release 2.6.0 Tue February 6 2024
Security fixes:
#789 #814 CVE-2023-52425 -- Fix quadratic runtime issues with big tokens

View file

@ -5,7 +5,7 @@
[![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases)
# Expat, Release 2.6.0
# Expat, Release 2.6.2
This is Expat, a C99 library for parsing
[XML 1.0 Fourth Edition](https://www.w3.org/TR/2006/REC-xml-20060816/), started by

View file

@ -52,7 +52,7 @@
<div>
<h1>
The Expat XML Parser
<small>Release 2.6.0</small>
<small>Release 2.6.2</small>
</h1>
</div>
<div class="content">
@ -356,10 +356,7 @@ library and header would get installed in
<h3>Configuring Expat Using the Pre-Processor</h3>
<p>Expat's feature set can be configured using a small number of
pre-processor definitions. The definition of this symbols does not
affect the set of entry points for Expat, only the behavior of the API
and the definition of character types in the case of
<code>XML_UNICODE_WCHAR_T</code>. The symbols are:</p>
pre-processor definitions. The symbols are:</p>
<dl class="cpp-symbols">
<dt><a name="XML_GE">XML_GE</a></dt>

View file

@ -18,6 +18,7 @@
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
Copyright (c) 2023 Hanno Böck <hanno@gentoo.org>
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -1042,7 +1043,7 @@ typedef struct {
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
#if XML_GE == 1
#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1)
/* Added in Expat 2.4.0 for XML_DTD defined and
* added in Expat 2.6.0 for XML_GE == 1. */
XMLPARSEAPI(XML_Bool)
@ -1065,7 +1066,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 6
#define XML_MICRO_VERSION 0
#define XML_MICRO_VERSION 2
#ifdef __cplusplus
}

View file

@ -28,10 +28,11 @@
Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -155,14 +156,20 @@ extern "C" {
void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef);
#if XML_GE == 1
#if defined(XML_GE) && XML_GE == 1
unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
const char *unsignedCharToPrintable(unsigned char c);
#endif
extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
extern unsigned int g_parseAttempts; // used for testing only
extern
#if ! defined(XML_TESTING)
const
#endif
XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
#if defined(XML_TESTING)
extern unsigned int g_bytesScanned; // used for testing only
#endif
#ifdef __cplusplus
}

View file

@ -1,4 +1,4 @@
/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+)
/* 2a14271ad4d35e82bde8ba210b4edb7998794bcbae54deab114046a300f9639a (2.6.2+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@ -38,7 +38,7 @@
Copyright (c) 2022 Jann Horn <jannh@google.com>
Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
Copyright (c) 2023 Owain Davies <owaind@bath.edu>
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -210,7 +210,7 @@ typedef char ICHAR;
#endif
/* Round up n to be a multiple of sz, where sz is a power of 2. */
#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
/* Do safe (NULL-aware) pointer arithmetic */
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
@ -248,7 +248,7 @@ static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
it odd, since odd numbers are always relative prime to a power of 2.
*/
#define SECOND_HASH(hash, mask, power) \
((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
#define PROBE_STEP(hash, mask, power) \
((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
@ -629,8 +629,14 @@ static unsigned long getDebugLevel(const char *variableName,
? 0 \
: ((*((pool)->ptr)++ = c), 1))
XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
unsigned int g_parseAttempts = 0; // used for testing only
#if ! defined(XML_TESTING)
const
#endif
XML_Bool g_reparseDeferralEnabledDefault
= XML_TRUE; // write ONLY in runtests.c
#if defined(XML_TESTING)
unsigned int g_bytesScanned = 0; // used for testing only
#endif
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
@ -1017,7 +1023,9 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
return XML_ERROR_NONE;
}
}
g_parseAttempts += 1;
#if defined(XML_TESTING)
g_bytesScanned += (unsigned)have_now;
#endif
const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
if (ret == XML_ERROR_NONE) {
// if we consumed nothing, remember what we had on this parse attempt.
@ -6232,7 +6240,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
dtd->keepProcessing = dtd->standalone;
goto endEntityValue;
}
if (entity->open) {
if (entity->open || (entity == parser->m_declEntity)) {
if (enc == parser->m_encoding)
parser->m_eventPtr = entityTextPtr;
result = XML_ERROR_RECURSIVE_ENTITY_REF;

View file

@ -1202,6 +1202,49 @@ START_TEST(test_wfc_no_recursive_entity_refs) {
}
END_TEST
START_TEST(test_recursive_external_parameter_entity_2) {
struct TestCase {
const char *doc;
enum XML_Status expectedStatus;
};
struct TestCase cases[] = {
{"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
{"<!ENTITY % p1 '%p1;'>"
"<!ENTITY % p1 'first declaration wins'>",
XML_STATUS_ERROR},
{"<!ENTITY % p1 'first declaration wins'>"
"<!ENTITY % p1 '%p1;'>",
XML_STATUS_OK},
{"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
};
for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
const char *const doc = cases[i].doc;
const enum XML_Status expectedStatus = cases[i].expectedStatus;
set_subtest("%s", doc);
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(parser != NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
assert_true(ext_parser != NULL);
const enum XML_Status actualStatus
= _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
assert_true(actualStatus == expectedStatus);
if (actualStatus != XML_STATUS_OK) {
assert_true(XML_GetErrorCode(ext_parser)
== XML_ERROR_RECURSIVE_ENTITY_REF);
}
XML_ParserFree(ext_parser);
XML_ParserFree(parser);
}
}
END_TEST
/* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse) {
const char *text = "<!DOCTYPE doc [\n"
@ -5201,14 +5244,9 @@ START_TEST(test_nested_entity_suspend) {
}
END_TEST
#if defined(XML_TESTING)
/* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_take_linear_time) {
const char *const too_slow_failure_message
= "Compared to the baseline runtime of the first test, this test has a "
"slowdown of more than <max_slowdown>. "
"Please keep increasing the value by 1 until it reliably passes the "
"test on your hardware and open a bug sharing that number with us. "
"Thanks in advance!";
START_TEST(test_big_tokens_scale_linearly) {
const struct {
const char *pre;
const char *post;
@ -5220,65 +5258,57 @@ START_TEST(test_big_tokens_take_linear_time) {
{"<e><", "/></e>"}, // big elem name, used to be O(N²)
};
const int num_cases = sizeof(text) / sizeof(text[0]);
// For the test we need a <max_slowdown> value that is:
// (1) big enough that the test passes reliably (avoiding flaky tests), and
// (2) small enough that the test actually catches regressions.
const int max_slowdown = 15;
char aaaaaa[4096];
const int fillsize = (int)sizeof(aaaaaa);
const int fillcount = 100;
const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
const unsigned max_factor = 4;
const unsigned max_scanned = max_factor * approx_bytes;
memset(aaaaaa, 'a', fillsize);
if (! g_reparseDeferralEnabledDefault) {
return; // heuristic is disabled; we would get O(n^2) and fail.
}
#if ! defined(__linux__)
if (CLOCKS_PER_SEC < 100000) {
// Skip this test if clock() doesn't have reasonably good resolution.
// This workaround is primarily targeting Windows and FreeBSD, since
// XSI requires the value to be 1.000.000 (10x the condition here), and
// we want to be very sure that at least one platform in CI can catch
// regressions (through a failing test).
return;
}
#endif
clock_t baseline = 0;
for (int i = 0; i < num_cases; ++i) {
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(parser != NULL);
enum XML_Status status;
set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
text[i].pre, text[i].post);
const clock_t start = clock();
set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
// parse the start text
g_bytesScanned = 0;
status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
(int)strlen(text[i].pre), XML_FALSE);
if (status != XML_STATUS_OK) {
xml_failure(parser);
}
// parse lots of 'a', failing the test early if it takes too long
unsigned past_max_count = 0;
for (int f = 0; f < fillcount; ++f) {
status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
if (status != XML_STATUS_OK) {
xml_failure(parser);
}
// i == 0 means we're still calculating the baseline value
if (i > 0) {
const clock_t now = clock();
const clock_t clocks_so_far = now - start;
const int slowdown = clocks_so_far / baseline;
if (slowdown >= max_slowdown) {
fprintf(
stderr,
"fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
fail(too_slow_failure_message);
}
if (g_bytesScanned > max_scanned) {
// We're not done, and have already passed the limit -- the test will
// definitely fail. This block allows us to save time by failing early.
const unsigned pushed
= (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
fprintf(
stderr,
"after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
f + 1, fillcount, pushed, g_bytesScanned,
g_bytesScanned / (double)pushed, max_scanned, max_factor);
past_max_count++;
// We are failing, but allow a few log prints first. If we don't reach
// a count of five, the test will fail after the loop instead.
assert_true(past_max_count < 5);
}
}
// parse the end text
status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
(int)strlen(text[i].post), XML_TRUE);
@ -5286,24 +5316,21 @@ START_TEST(test_big_tokens_take_linear_time) {
xml_failure(parser);
}
// how long did it take in total?
const clock_t end = clock();
const clock_t taken = end - start;
if (i == 0) {
assert_true(taken > 0); // just to make sure we don't div-by-0 later
baseline = taken;
}
const int slowdown = taken / baseline;
if (slowdown >= max_slowdown) {
fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
(int)taken, (int)baseline, slowdown, max_slowdown);
fail(too_slow_failure_message);
assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
if (g_bytesScanned > max_scanned) {
fprintf(
stderr,
"after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
max_factor);
fail("scanned too many bytes");
}
XML_ParserFree(parser);
}
}
END_TEST
#endif
START_TEST(test_set_reparse_deferral) {
const char *const pre = "<d>";
@ -5702,6 +5729,7 @@ START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
}
END_TEST
#if defined(XML_TESTING)
START_TEST(test_varying_buffer_fills) {
const int KiB = 1024;
const int MiB = 1024 * KiB;
@ -5774,19 +5802,17 @@ START_TEST(test_varying_buffer_fills) {
fillsize[2], fillsize[3]);
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(parser != NULL);
g_parseAttempts = 0;
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, start_element_event_handler);
g_bytesScanned = 0;
int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
int scanned_bytes = 0; // sum of (buffered bytes at each actual parse)
int offset = 0;
while (*fillsize >= 0) {
assert_true(offset + *fillsize <= document_length); // or test is invalid
const unsigned attempts_before = g_parseAttempts;
const enum XML_Status status
= XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
if (status != XML_STATUS_OK) {
@ -5796,34 +5822,27 @@ START_TEST(test_varying_buffer_fills) {
fillsize++;
assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
worstcase_bytes += offset; // we might've tried to parse all pending bytes
if (g_parseAttempts != attempts_before) {
assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow
scanned_bytes += offset; // we *did* try to parse all pending bytes
}
}
assert_true(storage.count == 1); // the big token should've been parsed
assert_true(scanned_bytes > 0); // test-the-test: does our counter work?
assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
if (g_reparseDeferralEnabledDefault) {
// heuristic is enabled; some XML_Parse calls may have deferred reparsing
const int max_bytes_scanned = -*fillsize;
if (scanned_bytes > max_bytes_scanned) {
const unsigned max_bytes_scanned = -*fillsize;
if (g_bytesScanned > max_bytes_scanned) {
fprintf(stderr,
"bytes scanned in parse attempts: actual=%d limit=%d \n",
scanned_bytes, max_bytes_scanned);
"bytes scanned in parse attempts: actual=%u limit=%u \n",
g_bytesScanned, max_bytes_scanned);
fail("too many bytes scanned in parse attempts");
}
assert_true(scanned_bytes <= worstcase_bytes);
} else {
// heuristic is disabled; every XML_Parse() will have reparsed
assert_true(scanned_bytes == worstcase_bytes);
}
assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
XML_ParserFree(parser);
}
free(document);
}
END_TEST
#endif
void
make_basic_test_case(Suite *s) {
@ -5972,6 +5991,8 @@ make_basic_test_case(Suite *s) {
tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_recursive_external_parameter_entity);
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_recursive_external_parameter_entity_2);
tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
tcase_add_test(tc_basic, test_suspend_xdecl);
tcase_add_test(tc_basic, test_abort_epilog);
@ -6065,12 +6086,16 @@ make_basic_test_case(Suite *s) {
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_pool_integrity_with_unfinished_attr);
tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
#if defined(XML_TESTING)
tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
#endif
tcase_add_test(tc_basic, test_set_reparse_deferral);
tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
tcase_add_test(tc_basic, test_set_bad_reparse_option);
tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
#if defined(XML_TESTING)
tcase_add_test(tc_basic, test_varying_buffer_fills);
#endif
}

View file

@ -208,7 +208,7 @@ START_TEST(test_misc_version) {
if (! versions_equal(&read_version, &parsed_version))
fail("Version mismatch");
if (xcstrcmp(version_text, XCS("expat_2.6.0"))) /* needs bump on releases */
if (xcstrcmp(version_text, XCS("expat_2.6.2"))) /* needs bump on releases */
fail("XML_*_VERSION in expat.h out of sync?\n");
}
END_TEST

View file

@ -101,7 +101,9 @@ main(int argc, char *argv[]) {
for (g_chunkSize = 0; g_chunkSize <= 5; g_chunkSize++) {
for (int enabled = 0; enabled <= 1; ++enabled) {
char context[100];
#if defined(XML_TESTING)
g_reparseDeferralEnabledDefault = enabled;
#endif
snprintf(context, sizeof(context), "chunksize=%d deferral=%d",
g_chunkSize, enabled);
context[sizeof(context) - 1] = '\0';