| | #include "unity/unity.h" |
| | #include <libxml/HTMLparser.h> |
| | #include <libxml/parser.h> |
| | #include <libxml/parserInternals.h> |
| | #include <libxml/xmlmemory.h> |
| | #include <libxml/xmlerror.h> |
| |
|
| | #include <string.h> |
| | #include <stdio.h> |
| |
|
| | |
| | |
| | void test_htmlParseEndTag(htmlParserCtxtPtr ctxt); |
| |
|
| | |
| | typedef struct { |
| | int end_count; |
| | int char_count; |
| | int comment_count; |
| | char last_end_name[128]; |
| | char last_chars[256]; |
| | char last_comment[256]; |
| | } TestSAXData; |
| |
|
| | static TestSAXData gSAXData; |
| |
|
| | static void cb_endElement(void *ctx, const xmlChar *name) { |
| | TestSAXData *d = (TestSAXData *)ctx; |
| | d->end_count++; |
| | if (name) { |
| | size_t n = strlen((const char *)name); |
| | if (n >= sizeof(d->last_end_name)) n = sizeof(d->last_end_name) - 1; |
| | memcpy(d->last_end_name, name, n); |
| | d->last_end_name[n] = '\0'; |
| | } |
| | } |
| |
|
| | static void cb_characters(void *ctx, const xmlChar *ch, int len) { |
| | TestSAXData *d = (TestSAXData *)ctx; |
| | d->char_count++; |
| | if (ch && len > 0) { |
| | size_t n = (size_t)len; |
| | if (n >= sizeof(d->last_chars)) n = sizeof(d->last_chars) - 1; |
| | memcpy(d->last_chars, ch, n); |
| | d->last_chars[n] = '\0'; |
| | } else { |
| | d->last_chars[0] = '\0'; |
| | } |
| | } |
| |
|
| | static void cb_comment(void *ctx, const xmlChar *value) { |
| | TestSAXData *d = (TestSAXData *)ctx; |
| | d->comment_count++; |
| | if (value) { |
| | size_t n = strlen((const char *)value); |
| | if (n >= sizeof(d->last_comment)) n = sizeof(d->last_comment) - 1; |
| | memcpy(d->last_comment, value, n); |
| | d->last_comment[n] = '\0'; |
| | } else { |
| | d->last_comment[0] = '\0'; |
| | } |
| | } |
| |
|
| | static xmlSAXHandler gSAX; |
| |
|
| | |
| |
|
| | static void reset_sax_data(void) { |
| | memset(&gSAXData, 0, sizeof(gSAXData)); |
| | gSAXData.last_end_name[0] = '\0'; |
| | gSAXData.last_chars[0] = '\0'; |
| | gSAXData.last_comment[0] = '\0'; |
| | } |
| |
|
| | static htmlParserCtxtPtr make_ctxt_with_input(const char *text, int options) { |
| | htmlParserCtxtPtr ctxt = htmlNewParserCtxt(); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | ctxt->options = options; |
| | ctxt->sax = &gSAX; |
| | ctxt->userData = &gSAXData; |
| | ctxt->disableSAX = 0; |
| |
|
| | if (ctxt->dict == NULL) { |
| | ctxt->dict = xmlDictCreate(); |
| | TEST_ASSERT_NOT_NULL(ctxt->dict); |
| | } |
| |
|
| | xmlParserInputPtr in = xmlNewInputStream(ctxt); |
| | TEST_ASSERT_NOT_NULL(in); |
| |
|
| | size_t len = strlen(text); |
| | xmlChar *buf = xmlStrndup((const xmlChar *)text, (int)len); |
| | TEST_ASSERT_NOT_NULL(buf); |
| |
|
| | in->base = buf; |
| | in->cur = buf; |
| | in->end = buf + len; |
| | in->line = 1; |
| | in->col = 1; |
| | in->consumed = 0; |
| |
|
| | inputPush(ctxt, in); |
| | return ctxt; |
| | } |
| |
|
| | static void ensure_name_stack(htmlParserCtxtPtr ctxt, const char **names, int count) { |
| | if ((ctxt->nameTab == NULL) || (ctxt->nameMax < count)) { |
| | if (ctxt->nameTab != NULL) { |
| | xmlFree((void *)ctxt->nameTab); |
| | } |
| | ctxt->nameTab = (const xmlChar **)xmlMalloc(sizeof(xmlChar *) * (count > 0 ? count : 1)); |
| | TEST_ASSERT_NOT_NULL(ctxt->nameTab); |
| | ctxt->nameMax = (count > 0 ? count : 1); |
| | } |
| | for (int i = 0; i < count; i++) { |
| | ctxt->nameTab[i] = (const xmlChar *)names[i]; |
| | } |
| | ctxt->nameNr = count; |
| | ctxt->name = (count > 0) ? ctxt->nameTab[count - 1] : NULL; |
| | } |
| |
|
| | |
| |
|
| | void setUp(void) { |
| | reset_sax_data(); |
| | } |
| |
|
| | void tearDown(void) { |
| | |
| | } |
| |
|
| | static void init_sax(void) { |
| | memset(&gSAX, 0, sizeof(gSAX)); |
| | gSAX.endElement = cb_endElement; |
| | gSAX.characters = cb_characters; |
| | gSAX.comment = cb_comment; |
| | } |
| |
|
| | |
| |
|
| | void test_htmlParseEndTag_eof_after_slash_emits_literal_chars(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</", 0); |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, gSAXData.char_count); |
| | TEST_ASSERT_EQUAL_STRING("</", gSAXData.last_chars); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_immediate_gt_noop(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</>", 0); |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.char_count); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_bogus_comment_non_letter(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</123abc>", 0); |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, gSAXData.comment_count); |
| | TEST_ASSERT_EQUAL_STRING("123abc", gSAXData.last_comment); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_html5_simple_lowercase_name(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</DiV>", HTML_PARSE_HTML5); |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, gSAXData.end_count); |
| | TEST_ASSERT_EQUAL_STRING("div", gSAXData.last_end_name); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_html5_with_attrs_and_solidus(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</SPAN class=foo />", HTML_PARSE_HTML5); |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, gSAXData.end_count); |
| | TEST_ASSERT_EQUAL_STRING("span", gSAXData.last_end_name); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_nonhtml5_special_depth_decrement(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</body>", 0); |
| | ctxt->depth = 3; |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(2, ctxt->depth); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count); |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_nonhtml5_name_not_in_stack_reports_error_and_returns(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</table>", 0); |
| |
|
| | |
| | const char *stack[] = { "p", "div" }; |
| | ensure_name_stack(ctxt, stack, 2); |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | |
| | TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | void test_htmlParseEndTag_nonhtml5_matching_close_emits_end_and_pops(void) { |
| | init_sax(); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_input("</EM>", 0); |
| |
|
| | const char *stack[] = { "em" }; |
| | ensure_name_stack(ctxt, stack, 1); |
| | int before = ctxt->nameNr; |
| |
|
| | test_htmlParseEndTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, gSAXData.end_count); |
| | TEST_ASSERT_EQUAL_STRING("em", gSAXData.last_end_name); |
| | |
| | TEST_ASSERT_TRUE(ctxt->nameNr == before - 1); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| |
|
| | int main(void) { |
| | xmlInitParser(); |
| | UNITY_BEGIN(); |
| |
|
| | RUN_TEST(test_htmlParseEndTag_eof_after_slash_emits_literal_chars); |
| | RUN_TEST(test_htmlParseEndTag_immediate_gt_noop); |
| | RUN_TEST(test_htmlParseEndTag_bogus_comment_non_letter); |
| | RUN_TEST(test_htmlParseEndTag_html5_simple_lowercase_name); |
| | RUN_TEST(test_htmlParseEndTag_html5_with_attrs_and_solidus); |
| | RUN_TEST(test_htmlParseEndTag_nonhtml5_special_depth_decrement); |
| | RUN_TEST(test_htmlParseEndTag_nonhtml5_name_not_in_stack_reports_error_and_returns); |
| | RUN_TEST(test_htmlParseEndTag_nonhtml5_matching_close_emits_end_and_pops); |
| |
|
| | int rc = UNITY_END(); |
| | xmlCleanupParser(); |
| | return rc; |
| | } |