| | #include "unity/unity.h" |
| | #include <libxml/HTMLparser.h> |
| |
|
| | #include <libxml/parserInternals.h> |
| | #include <libxml/parser.h> |
| | #include <stdlib.h> |
| | #include <string.h> |
| |
|
| | |
| | int test_htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len, int partial); |
| |
|
| | |
| | static xmlParserCtxtPtr make_ctxt(void) { |
| | htmlParserCtxtPtr hctxt = htmlNewParserCtxt(); |
| | TEST_ASSERT_NOT_NULL(hctxt); |
| | xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)hctxt; |
| |
|
| | xmlParserInputPtr input = xmlNewInputStream(ctxt); |
| | TEST_ASSERT_NOT_NULL(input); |
| | xmlPushInput(ctxt, input); |
| |
|
| | |
| | if (ctxt->input) |
| | ctxt->input->flags = 0; |
| |
|
| | TEST_ASSERT_NOT_NULL(ctxt->input); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | return ctxt; |
| | } |
| |
|
| | static void free_ctxt(xmlParserCtxtPtr ctxt) { |
| | if (ctxt != NULL) { |
| | htmlFreeParserCtxt((htmlParserCtxtPtr)ctxt); |
| | } |
| | } |
| |
|
| | void setUp(void) { |
| | |
| | } |
| |
|
| | void tearDown(void) { |
| | |
| | } |
| |
|
| | |
| |
|
| | void test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char seq[] = { 0xC2, 0xA2 }; |
| | int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
| |
|
| | TEST_ASSERT_EQUAL_INT(2, ret); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char seq[] = { 0xC1, 0x80 }; |
| | unsigned int before = ctxt->input->flags; |
| | int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
| |
|
| | TEST_ASSERT_EQUAL_INT(-1, ret); |
| | TEST_ASSERT_TRUE(ctxt->input->flags != before); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char seq[] = { 0xC2, 0x20 }; |
| | int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
| |
|
| | TEST_ASSERT_EQUAL_INT(-1, ret); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char seq[] = { 0xC2 }; |
| | int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 1); |
| |
|
| | TEST_ASSERT_EQUAL_INT(0, ret); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char seq[] = { 0xC2 }; |
| | int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
| |
|
| | TEST_ASSERT_EQUAL_INT(-1, ret); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_valid_3byte_general_and_minimum(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char euro[] = { 0xE2, 0x82, 0xAC }; |
| | int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)euro, sizeof(euro), 0); |
| | TEST_ASSERT_EQUAL_INT(3, r1); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | const unsigned char min3[] = { 0xE0, 0xA0, 0x80 }; |
| | int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min3, sizeof(min3), 0); |
| | TEST_ASSERT_EQUAL_INT(3, r2); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | |
| | const unsigned char ed_valid[] = { 0xED, 0x9F, 0xBF }; |
| | int r3 = test_htmlValidateUtf8(ctxt, (const xmlChar *)ed_valid, sizeof(ed_valid), 0); |
| | TEST_ASSERT_EQUAL_INT(3, r3); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate(void) { |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char overlong[] = { 0xE0, 0x80, 0x80 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong, sizeof(overlong), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char surrogate[] = { 0xED, 0xA0, 0x80 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)surrogate, sizeof(surrogate), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | } |
| |
|
| | void test_htmlValidateUtf8_valid_4byte_boundaries(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char min4[] = { 0xF0, 0x90, 0x80, 0x80 }; |
| | int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min4, sizeof(min4), 0); |
| | TEST_ASSERT_EQUAL_INT(4, r1); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | const unsigned char max4[] = { 0xF4, 0x8F, 0xBF, 0xBF }; |
| | int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)max4, sizeof(max4), 0); |
| | TEST_ASSERT_EQUAL_INT(4, r2); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | void test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max(void) { |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char overlong4[] = { 0xF0, 0x80, 0x80, 0x80 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong4, sizeof(overlong4), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char above_max[] = { 0xF4, 0x90, 0x80, 0x80 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)above_max, sizeof(above_max), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | } |
| |
|
| | void test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior(void) { |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char seq3[] = { 0xE2, 0x82 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq3, sizeof(seq3), 1); |
| | TEST_ASSERT_EQUAL_INT(0, r); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 1); |
| | TEST_ASSERT_EQUAL_INT(0, r); |
| | TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | |
| | { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| | const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; |
| | int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r); |
| | TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| | free_ctxt(ctxt); |
| | } |
| | } |
| |
|
| | void test_htmlValidateUtf8_error_flag_only_set_once_per_context(void) { |
| | xmlParserCtxtPtr ctxt = make_ctxt(); |
| |
|
| | const unsigned char bad1[] = { 0xC1, 0x80 }; |
| | const unsigned char bad2[] = { 0xE0, 0x80, 0x80 }; |
| |
|
| | int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad1, sizeof(bad1), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r1); |
| | unsigned int flags_after_first = ctxt->input->flags; |
| | TEST_ASSERT_NOT_EQUAL(0u, flags_after_first); |
| |
|
| | int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad2, sizeof(bad2), 0); |
| | TEST_ASSERT_EQUAL_INT(-1, r2); |
| | unsigned int flags_after_second = ctxt->input->flags; |
| |
|
| | |
| | TEST_ASSERT_EQUAL_UINT(flags_after_first, flags_after_second); |
| |
|
| | free_ctxt(ctxt); |
| | } |
| |
|
| | int main(void) { |
| | xmlInitParser(); |
| |
|
| | UNITY_BEGIN(); |
| | RUN_TEST(test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag); |
| | RUN_TEST(test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1); |
| | RUN_TEST(test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag); |
| | RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag); |
| | RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1); |
| | RUN_TEST(test_htmlValidateUtf8_valid_3byte_general_and_minimum); |
| | RUN_TEST(test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate); |
| | RUN_TEST(test_htmlValidateUtf8_valid_4byte_boundaries); |
| | RUN_TEST(test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max); |
| | RUN_TEST(test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior); |
| | RUN_TEST(test_htmlValidateUtf8_error_flag_only_set_once_per_context); |
| | int rc = UNITY_END(); |
| |
|
| | xmlCleanupParser(); |
| | return rc; |
| | } |