| | #include "unity/unity.h" |
| | #include <libxml/HTMLparser.h> |
| | #include <stdlib.h> |
| | #include <string.h> |
| |
|
| | |
| | extern void test_htmlParseContent(htmlParserCtxtPtr ctxt); |
| |
|
| | |
| | typedef struct { |
| | char *text; |
| | size_t text_len; |
| |
|
| | char *last_comment; |
| | int comment_count; |
| |
|
| | char *internal_subset_name; |
| | int internal_subset_count; |
| |
|
| | char *end_names[32]; |
| | int end_count; |
| | } SaxCapture; |
| |
|
| | static void cap_init(SaxCapture *cap) { |
| | memset(cap, 0, sizeof(*cap)); |
| | } |
| |
|
| | static void cap_free(SaxCapture *cap) { |
| | free(cap->text); |
| | free(cap->last_comment); |
| | free(cap->internal_subset_name); |
| | for (int i = 0; i < cap->end_count; i++) |
| | free(cap->end_names[i]); |
| | } |
| |
|
| | static void cap_append_text(SaxCapture *cap, const char *data, size_t len) { |
| | if (len == 0) return; |
| | char *newbuf = (char *)realloc(cap->text, cap->text_len + len + 1); |
| | if (!newbuf) return; |
| | cap->text = newbuf; |
| | memcpy(cap->text + cap->text_len, data, len); |
| | cap->text_len += len; |
| | cap->text[cap->text_len] = '\0'; |
| | } |
| |
|
| | |
| | static void onCharacters(void *userData, const xmlChar *ch, int len) { |
| | SaxCapture *cap = (SaxCapture *)userData; |
| | cap_append_text(cap, (const char *)ch, (size_t)len); |
| | } |
| |
|
| | static void onEndElement(void *userData, const xmlChar *name) { |
| | SaxCapture *cap = (SaxCapture *)userData; |
| | if (cap->end_count < (int)(sizeof(cap->end_names)/sizeof(cap->end_names[0]))) { |
| | const char *src = (const char *)name; |
| | size_t l = strlen(src); |
| | cap->end_names[cap->end_count] = (char *)malloc(l + 1); |
| | if (cap->end_names[cap->end_count]) { |
| | memcpy(cap->end_names[cap->end_count], src, l + 1); |
| | cap->end_count++; |
| | } |
| | } |
| | } |
| |
|
| | static void onComment(void *userData, const xmlChar *value) { |
| | SaxCapture *cap = (SaxCapture *)userData; |
| | free(cap->last_comment); |
| | cap->last_comment = strdup((const char *)value); |
| | cap->comment_count++; |
| | } |
| |
|
| | static void onInternalSubset(void *userData, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID) { |
| | (void)ExternalID; (void)SystemID; |
| | SaxCapture *cap = (SaxCapture *)userData; |
| | free(cap->internal_subset_name); |
| | cap->internal_subset_name = strdup((const char *)name); |
| | cap->internal_subset_count++; |
| | } |
| |
|
| | |
| | static htmlSAXHandler make_sax(void) { |
| | htmlSAXHandler sax; |
| | memset(&sax, 0, sizeof(sax)); |
| | sax.characters = onCharacters; |
| | sax.endElement = onEndElement; |
| | sax.comment = onComment; |
| | sax.internalSubset = onInternalSubset; |
| | return sax; |
| | } |
| |
|
| | |
| | static htmlParserCtxtPtr make_ctxt_with_sax(const char *data, SaxCapture *cap) { |
| | htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(data, (int)strlen(data)); |
| | if (ctxt) { |
| | static htmlSAXHandler sax; |
| | sax = make_sax(); |
| | ctxt->sax = &sax; |
| | ctxt->userData = cap; |
| | ctxt->disableSAX = 0; |
| | } |
| | return ctxt; |
| | } |
| |
|
| | void setUp(void) { |
| | |
| | } |
| |
|
| | void tearDown(void) { |
| | |
| | } |
| |
|
| | |
| | void test_htmlParseContent_plain_text(void) { |
| | const char *html = "Hello"; |
| | SaxCapture cap; cap_init(&cap); |
| |
|
| | htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | test_htmlParseContent(ctxt); |
| |
|
| | TEST_ASSERT_NOT_NULL(cap.text); |
| | TEST_ASSERT_EQUAL_STRING("Hello", cap.text); |
| | TEST_ASSERT_EQUAL_INT(0, cap.comment_count); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | cap_free(&cap); |
| | } |
| |
|
| | void test_htmlParseContent_element_with_end_tag(void) { |
| | const char *html = "<p>Hi</p>"; |
| | SaxCapture cap; cap_init(&cap); |
| |
|
| | htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | test_htmlParseContent(ctxt); |
| |
|
| | TEST_ASSERT_NOT_NULL(cap.text); |
| | TEST_ASSERT_EQUAL_STRING("Hi", cap.text); |
| | TEST_ASSERT_TRUE(cap.end_count >= 1); |
| | |
| | TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | cap_free(&cap); |
| | } |
| |
|
| | void test_htmlParseContent_comment_and_bogus_comment(void) { |
| | const char *html1 = "X<!--abc-->Y"; |
| | SaxCapture cap1; cap_init(&cap1); |
| | htmlParserCtxtPtr ctxt1 = make_ctxt_with_sax(html1, &cap1); |
| | TEST_ASSERT_NOT_NULL(ctxt1); |
| | test_htmlParseContent(ctxt1); |
| | TEST_ASSERT_EQUAL_INT(1, cap1.comment_count); |
| | TEST_ASSERT_EQUAL_STRING("abc", cap1.last_comment); |
| | TEST_ASSERT_EQUAL_STRING("XY", cap1.text); |
| | htmlFreeParserCtxt(ctxt1); |
| | cap_free(&cap1); |
| |
|
| | const char *html2 = "A<!zzz>B"; |
| | SaxCapture cap2; cap_init(&cap2); |
| | htmlParserCtxtPtr ctxt2 = make_ctxt_with_sax(html2, &cap2); |
| | TEST_ASSERT_NOT_NULL(ctxt2); |
| | test_htmlParseContent(ctxt2); |
| | TEST_ASSERT_EQUAL_INT(1, cap2.comment_count); |
| | TEST_ASSERT_EQUAL_STRING("zzz", cap2.last_comment); |
| | TEST_ASSERT_EQUAL_STRING("AB", cap2.text); |
| | htmlFreeParserCtxt(ctxt2); |
| | cap_free(&cap2); |
| | } |
| |
|
| | void test_htmlParseContent_literal_lt_when_not_a_tag(void) { |
| | const char *html = "<1"; |
| | SaxCapture cap; cap_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | test_htmlParseContent(ctxt); |
| |
|
| | TEST_ASSERT_NOT_NULL(cap.text); |
| | TEST_ASSERT_EQUAL_STRING("<1", cap.text); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | cap_free(&cap); |
| | } |
| |
|
| | void test_htmlParseContent_doctype_in_content(void) { |
| | const char *html = "<!DOCTYPE html>Text"; |
| | SaxCapture cap; cap_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | test_htmlParseContent(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.internal_subset_count); |
| | TEST_ASSERT_NOT_NULL(cap.internal_subset_name); |
| | TEST_ASSERT_EQUAL_STRING("html", cap.internal_subset_name); |
| | TEST_ASSERT_EQUAL_STRING("Text", cap.text); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | cap_free(&cap); |
| | } |
| |
|
| | void test_htmlParseContent_autoclose_on_end(void) { |
| | const char *html = "<p>ab"; |
| | SaxCapture cap; cap_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | test_htmlParseContent(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_STRING("ab", cap.text); |
| | |
| | TEST_ASSERT_TRUE(cap.end_count >= 1); |
| | TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | cap_free(&cap); |
| | } |
| |
|
| | void test_htmlParseContent_script_mode_preserves_lt_inside_script(void) { |
| | const char *html = "<script>1<2</script>"; |
| | SaxCapture cap; cap_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
| | TEST_ASSERT_NOT_NULL(ctxt); |
| |
|
| | test_htmlParseContent(ctxt); |
| |
|
| | TEST_ASSERT_NOT_NULL(cap.text); |
| | TEST_ASSERT_EQUAL_STRING("1<2", cap.text); |
| | |
| | TEST_ASSERT_TRUE(cap.end_count >= 1); |
| | |
| | int saw_script = 0; |
| | for (int i = 0; i < cap.end_count; i++) { |
| | if (strcmp(cap.end_names[i], "script") == 0) { |
| | saw_script = 1; break; |
| | } |
| | } |
| | TEST_ASSERT_TRUE(saw_script); |
| |
|
| | htmlFreeParserCtxt(ctxt); |
| | cap_free(&cap); |
| | } |
| |
|
| | int main(void) { |
| | UNITY_BEGIN(); |
| | RUN_TEST(test_htmlParseContent_plain_text); |
| | RUN_TEST(test_htmlParseContent_element_with_end_tag); |
| | RUN_TEST(test_htmlParseContent_comment_and_bogus_comment); |
| | RUN_TEST(test_htmlParseContent_literal_lt_when_not_a_tag); |
| | RUN_TEST(test_htmlParseContent_doctype_in_content); |
| | RUN_TEST(test_htmlParseContent_autoclose_on_end); |
| | RUN_TEST(test_htmlParseContent_script_mode_preserves_lt_inside_script); |
| | return UNITY_END(); |
| | } |