| | #include "unity/unity.h" |
| | #include <libxml/HTMLparser.h> |
| |
|
| | #include <stdlib.h> |
| | #include <string.h> |
| | #include <stdio.h> |
| |
|
| | |
| | extern void test_htmlParseStartTag(htmlParserCtxtPtr ctxt); |
| |
|
| | |
| | typedef struct { |
| | xmlChar *name; |
| | int att_count; |
| | xmlChar **atts; |
| | } StartEvent; |
| |
|
| | typedef struct { |
| | int nevents; |
| | StartEvent events[64]; |
| | } SAXCapture; |
| |
|
| | static void capture_init(SAXCapture *cap) { |
| | memset(cap, 0, sizeof(*cap)); |
| | } |
| |
|
| | static void capture_free(SAXCapture *cap) { |
| | for (int i = 0; i < cap->nevents; i++) { |
| | if (cap->events[i].name) { |
| | xmlFree(cap->events[i].name); |
| | } |
| | if (cap->events[i].atts) { |
| | |
| | int j = 0; |
| | while (cap->events[i].atts[j] != NULL) { |
| | xmlFree(cap->events[i].atts[j]); |
| | j++; |
| | } |
| | free(cap->events[i].atts); |
| | } |
| | } |
| | memset(cap, 0, sizeof(*cap)); |
| | } |
| |
|
| | static void test_sax_startElement(void *ctx, const xmlChar *name, const xmlChar **atts) { |
| | SAXCapture *cap = (SAXCapture *)ctx; |
| | if (cap->nevents >= (int)(sizeof(cap->events)/sizeof(cap->events[0]))) |
| | return; |
| | StartEvent *ev = &cap->events[cap->nevents++]; |
| | ev->name = xmlStrdup(name); |
| |
|
| | int count = 0; |
| | if (atts != NULL) { |
| | const xmlChar **p = atts; |
| | while (*p != NULL) { |
| | |
| | p++; |
| | |
| | if (*p == NULL) break; |
| | p++; |
| | count++; |
| | } |
| | } |
| | ev->att_count = count; |
| |
|
| | if (atts != NULL && count > 0) { |
| | |
| | ev->atts = (xmlChar **)calloc((size_t)(2 * count + 1), sizeof(xmlChar *)); |
| | int idx = 0; |
| | for (int i = 0; i < count; i++) { |
| | const xmlChar *aname = atts[2*i]; |
| | const xmlChar *aval = atts[2*i + 1]; |
| | ev->atts[idx++] = xmlStrdup(aname); |
| | ev->atts[idx++] = (aval != NULL) ? xmlStrdup(aval) : NULL; |
| | } |
| | ev->atts[idx] = NULL; |
| | } else { |
| | ev->atts = (xmlChar **)calloc(1, sizeof(xmlChar *)); |
| | ev->atts[0] = NULL; |
| | } |
| | } |
| |
|
| | static const xmlChar* find_attr_value(const StartEvent *ev, const char *name) { |
| | if (ev->atts == NULL) return NULL; |
| | for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { |
| | if (xmlStrcasecmp(ev->atts[i], (const xmlChar *)name) == 0) { |
| | return ev->atts[i+1]; |
| | } |
| | } |
| | return NULL; |
| | } |
| |
|
| | |
| | static htmlParserCtxtPtr make_ctxt(const char *buf, int flags, SAXCapture *cap) { |
| | htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(buf, (int)strlen(buf)); |
| | TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context"); |
| | static xmlSAXHandler sax; |
| | memset(&sax, 0, sizeof(sax)); |
| | sax.startElement = test_sax_startElement; |
| | ctxt->sax = &sax; |
| | ctxt->userData = cap; |
| | ctxt->options |= flags; |
| | return ctxt; |
| | } |
| |
|
| | void setUp(void) { |
| | |
| | xmlInitParser(); |
| | } |
| |
|
| | void tearDown(void) { |
| | |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_simple_div_noimplied(void) { |
| | const char *src = "<div>"; |
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| | TEST_ASSERT_NOT_NULL(cap.events[0].name); |
| | TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
| | TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_uppercase_and_attr_lowercased(void) { |
| | const char *src = "<DIV CLASS=AbC ID=42>"; |
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| | TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
| | TEST_ASSERT_TRUE(cap.events[0].att_count >= 2); |
| |
|
| | |
| | const xmlChar *vclass = find_attr_value(&cap.events[0], "class"); |
| | const xmlChar *vid = find_attr_value(&cap.events[0], "id"); |
| | TEST_ASSERT_NOT_NULL(vclass); |
| | TEST_ASSERT_NOT_NULL(vid); |
| | TEST_ASSERT_EQUAL_STRING("AbC", (const char *)vclass); |
| | TEST_ASSERT_EQUAL_STRING("42", (const char *)vid); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_duplicate_attributes_dedup(void) { |
| | const char *src = "<div class='a' CLASS=\"b\" class=c>"; |
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| | |
| | const StartEvent *ev = &cap.events[0]; |
| | |
| | int class_count = 0; |
| | for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { |
| | if (xmlStrcasecmp(ev->atts[i], BAD_CAST "class") == 0) |
| | class_count++; |
| | } |
| | TEST_ASSERT_EQUAL_INT(1, class_count); |
| | const xmlChar *v = find_attr_value(ev, "class"); |
| | TEST_ASSERT_NOT_NULL(v); |
| | TEST_ASSERT_EQUAL_STRING("a", (const char *)v); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_unexpected_solidus_ignored(void) { |
| | const char *src = "<div / id='x'>"; |
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| | TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
| | const xmlChar *vx = find_attr_value(&cap.events[0], "id"); |
| | TEST_ASSERT_NOT_NULL(vx); |
| | TEST_ASSERT_EQUAL_STRING("x", (const char *)vx); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_self_closing(void) { |
| | const char *src = "<br/>"; |
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| | TEST_ASSERT_EQUAL_STRING("br", (const char *)cap.events[0].name); |
| | TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_incomplete_tag_discarded(void) { |
| | const char *src = "<div id='x'"; |
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(0, cap.nevents); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | |
| | void test_htmlParseStartTag_many_attributes(void) { |
| | |
| | char buf[4096]; |
| | strcpy(buf, "<span"); |
| | const int N = 20; |
| | char tmp[64]; |
| | for (int i = 0; i < N; i++) { |
| | snprintf(tmp, sizeof(tmp), " a%d='v%d'", i, i); |
| | strcat(buf, tmp); |
| | } |
| | strcat(buf, ">"); |
| |
|
| | SAXCapture cap; capture_init(&cap); |
| | htmlParserCtxtPtr ctxt = make_ctxt(buf, HTML_PARSE_NOIMPLIED, &cap); |
| |
|
| | test_htmlParseStartTag(ctxt); |
| |
|
| | TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| | TEST_ASSERT_EQUAL_STRING("span", (const char *)cap.events[0].name); |
| | TEST_ASSERT_EQUAL_INT(N, cap.events[0].att_count); |
| |
|
| | |
| | const StartEvent *ev = &cap.events[0]; |
| | const xmlChar *v0 = find_attr_value(ev, "a0"); |
| | const xmlChar *v7 = find_attr_value(ev, "a7"); |
| | const xmlChar *v19 = find_attr_value(ev, "a19"); |
| | TEST_ASSERT_NOT_NULL(v0); |
| | TEST_ASSERT_NOT_NULL(v7); |
| | TEST_ASSERT_NOT_NULL(v19); |
| | TEST_ASSERT_EQUAL_STRING("v0", (const char *)v0); |
| | TEST_ASSERT_EQUAL_STRING("v7", (const char *)v7); |
| | TEST_ASSERT_EQUAL_STRING("v19", (const char *)v19); |
| |
|
| | capture_free(&cap); |
| | htmlFreeParserCtxt(ctxt); |
| | } |
| |
|
| | int main(void) { |
| | UNITY_BEGIN(); |
| |
|
| | RUN_TEST(test_htmlParseStartTag_simple_div_noimplied); |
| | RUN_TEST(test_htmlParseStartTag_uppercase_and_attr_lowercased); |
| | RUN_TEST(test_htmlParseStartTag_duplicate_attributes_dedup); |
| | RUN_TEST(test_htmlParseStartTag_unexpected_solidus_ignored); |
| | RUN_TEST(test_htmlParseStartTag_self_closing); |
| | RUN_TEST(test_htmlParseStartTag_incomplete_tag_discarded); |
| | RUN_TEST(test_htmlParseStartTag_many_attributes); |
| |
|
| | return UNITY_END(); |
| | } |