2 This file is part of libXMLRPC - a C library for xml-encoded function calls.
4 Author: Dan Libby (dan@libby.com)
5 Epinions.com may be contacted at feedback@epinions-inc.com
9 Copyright 2000 Epinions, Inc.
11 Subject to the following 3 conditions, Epinions, Inc. permits you, free
12 of charge, to (a) use, copy, distribute, modify, perform and display this
13 software and associated documentation files (the "Software"), and (b)
14 permit others to whom the Software is furnished to do so as well.
16 1) The above copyright notice and this permission notice shall be included
17 without modification in all copies or substantial portions of the
20 2) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT ANY WARRANTY OR CONDITION OF
21 ANY KIND, EXPRESS, IMPLIED OR STATUTORY, INCLUDING WITHOUT LIMITATION ANY
22 IMPLIED WARRANTIES OF ACCURACY, MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 PURPOSE OR NONINFRINGEMENT.
25 3) IN NO EVENT SHALL EPINIONS, INC. BE LIABLE FOR ANY DIRECT, INDIRECT,
26 SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR LOST PROFITS ARISING OUT
27 OF OR IN CONNECTION WITH THE SOFTWARE (HOWEVER ARISING, INCLUDING
28 NEGLIGENCE), EVEN IF EPINIONS, INC. IS AWARE OF THE POSSIBILITY OF SUCH
34 static const char rcsid[] = "#(@) $Id: xml_element.c,v 1.6 2004/06/01 20:16:06 iliaa Exp $";
38 /****h* ABOUT/xml_element
42 * Dan Libby, aka danda (dan@libby.com)
46 * $Log: xml_element.c,v $
47 * Revision 1.6 2004/06/01 20:16:06 iliaa
48 * Fixed bug #28597 (xmlrpc_encode_request() incorrectly encodes chars in
50 * Patch by: fernando dot nemec at folha dot com dot br
52 * Revision 1.5 2003/12/16 21:00:21 sniper
53 * Fix some compile warnings (patch by Joe Orton)
55 * Revision 1.4 2002/11/26 23:01:16 fmk
56 * removing unused variables
58 * Revision 1.3 2002/07/05 04:43:53 danda
59 * merged in updates from SF project. bring php repository up to date with xmlrpc-epi version 0.51
61 * Revision 1.9 2002/07/03 20:54:30 danda
62 * root element should not have a parent. patch from anon SF user
64 * Revision 1.8 2002/05/23 17:46:51 danda
65 * patch from mukund - fix non utf-8 encoding conversions
67 * Revision 1.7 2002/02/13 20:58:50 danda
68 * patch to make source more windows friendly, contributed by Jeff Lawson
70 * Revision 1.6 2002/01/08 01:06:55 danda
71 * enable <?xml version="1.0"?> format for parsers that are very picky.
73 * Revision 1.5 2001/09/29 21:58:05 danda
74 * adding cvs log to history section
76 * 10/15/2000 -- danda -- adding robodoc documentation
78 * Nicer external API. Get rid of macros. Make opaque types, etc.
80 * Coded on RedHat Linux 6.2. Builds on Solaris x86. Should build on just
81 * about anything with minor mods.
83 * This code incorporates ideas from expat-ensor from http://xml.ensor.org.
85 * It was coded primarily to act as a go-between for expat and xmlrpc. To this
86 * end, it stores xml elements, their sub-elements, and their attributes in an
87 * in-memory tree. When expat is done parsing, the tree can be walked, thus
88 * retrieving the values. The code can also be used to build a tree via API then
89 * write out the tree to a buffer, thus "serializing" the xml.
91 * It turns out this is useful for other purposes, such as parsing config files.
95 * - output option for xml escaping data. Choices include no escaping, entity escaping,
97 * - output option for character encoding. Defaults to (none) utf-8.
98 * - output option for verbosity/readability. ultra-compact, newlines, pretty/level indented.
101 * there must be some.
105 #include "xmlrpc_win32.h"
111 #include "xml_element.h"
114 #include "encodings.h"
116 #define my_free(thing) if(thing) {free(thing); thing = NULL;}
118 #define XML_DECL_START "<?xml"
119 #define XML_DECL_START_LEN sizeof(XML_DECL_START) - 1
120 #define XML_DECL_VERSION "version=\"1.0\""
121 #define XML_DECL_VERSION_LEN sizeof(XML_DECL_VERSION) - 1
122 #define XML_DECL_ENCODING_ATTR "encoding"
123 #define XML_DECL_ENCODING_ATTR_LEN sizeof(XML_DECL_ENCODING_ATTR) - 1
124 #define XML_DECL_ENCODING_DEFAULT "utf-8"
125 #define XML_DECL_ENCODING_DEFAULT_LEN sizeof(XML_DECL_ENCODING_DEFAULT) - 1
126 #define XML_DECL_END "?>"
127 #define XML_DECL_END_LEN sizeof(XML_DECL_END) - 1
128 #define START_TOKEN_BEGIN "<"
129 #define START_TOKEN_BEGIN_LEN sizeof(START_TOKEN_BEGIN) - 1
130 #define START_TOKEN_END ">"
131 #define START_TOKEN_END_LEN sizeof(START_TOKEN_END) - 1
132 #define EMPTY_START_TOKEN_END "/>"
133 #define EMPTY_START_TOKEN_END_LEN sizeof(EMPTY_START_TOKEN_END) - 1
134 #define END_TOKEN_BEGIN "</"
135 #define END_TOKEN_BEGIN_LEN sizeof(END_TOKEN_BEGIN) - 1
136 #define END_TOKEN_END ">"
137 #define END_TOKEN_END_LEN sizeof(END_TOKEN_END) - 1
138 #define ATTR_DELIMITER "\""
139 #define ATTR_DELIMITER_LEN sizeof(ATTR_DELIMITER) - 1
140 #define CDATA_BEGIN "<![CDATA["
141 #define CDATA_BEGIN_LEN sizeof(CDATA_BEGIN) - 1
142 #define CDATA_END "]]>"
143 #define CDATA_END_LEN sizeof(CDATA_END) - 1
145 #define EQUALS_LEN sizeof(EQUALS) - 1
146 #define WHITESPACE " "
147 #define WHITESPACE_LEN sizeof(WHITESPACE) - 1
149 #define NEWLINE_LEN sizeof(NEWLINE) - 1
150 #define MAX_VAL_BUF 144
151 #define SCALAR_STR "SCALAR"
152 #define SCALAR_STR_LEN sizeof(SCALAR_STR) - 1
153 #define VECTOR_STR "VECTOR"
154 #define VECTOR_STR_LEN sizeof(VECTOR_STR) - 1
155 #define RESPONSE_STR "RESPONSE"
156 #define RESPONSE_STR_LEN sizeof(RESPONSE_STR) - 1
159 /*-----------------------------
160 - Begin xml_element Functions -
161 -----------------------------*/
163 /****f* xml_element/xml_elem_free_non_recurse
165 * xml_elem_free_non_recurse
167 * void xml_elem_free_non_recurse(xml_element* root)
169 * free a single xml element. child elements will not be freed.
171 * root - the element to free
180 void xml_elem_free_non_recurse(xml_element* root) {
182 xml_element_attr* attrs = Q_Head(&root->attrs);
187 attrs = Q_Next(&root->attrs);
190 Q_Destroy(&root->children);
191 Q_Destroy(&root->attrs);
193 free((char *)root->name);
196 simplestring_free(&root->text);
202 /****f* xml_element/xml_elem_free
206 * void xml_elem_free(xml_element* root)
208 * free an xml element and all of its child elements
210 * root - the root of an xml tree you would like to free
215 * xml_elem_free_non_recurse ()
219 void xml_elem_free(xml_element* root) {
221 xml_element* kids = Q_Head(&root->children);
224 kids = Q_Next(&root->children);
226 xml_elem_free_non_recurse(root);
231 /****f* xml_element/xml_elem_new
235 * xml_element* xml_elem_new()
237 * allocates and initializes a new xml_element
241 * xml_element* or NULL. NULL indicates an out-of-memory condition.
245 * xml_elem_free_non_recurse ()
248 xml_element* xml_elem_new() {
249 xml_element* elem = calloc(1, sizeof(xml_element));
251 Q_Init(&elem->children);
252 Q_Init(&elem->attrs);
253 simplestring_init(&elem->text);
255 /* init empty string in case we don't find any char data */
256 simplestring_addn(&elem->text, "", 0);
262 static int xml_elem_writefunc(int (*fptr)(void *data, const char *text, int size), const char *text, void *data, int len)
264 return fptr && text ? fptr(data, text, len ? len : strlen(text)) : 0;
269 static int create_xml_escape(char *pString, unsigned char c)
273 pString[counter++] = '&';
274 pString[counter++] = '#';
276 pString[counter++] = c / 100 + '0';
279 pString[counter++] = c / 10 + '0';
282 pString[counter++] = c + '0';
283 pString[counter++] = ';';
287 #define non_ascii(c) (c > 127)
288 #define non_print(c) (!isprint(c))
289 #define markup(c) (c == '&' || c == '\"' || c == '>' || c == '<')
290 #define entity_length(c) ( (c >= 100) ? 3 : ((c >= 10) ? 2 : 1) ) + 3; /* "&#" + c + ";" */
293 * xml_elem_entity_escape
296 * escape reserved xml chars and non utf-8 chars as xml entities
298 * The return value may be a new string, or null if no
299 * conversion was performed. In the latter case, *newlen will
302 * xml_elem_no_escaping = 0x000,
303 * xml_elem_entity_escaping = 0x002, // escape xml special chars as entities
304 * xml_elem_non_ascii_escaping = 0x008, // escape chars above 127
305 * xml_elem_cdata_escaping = 0x010, // wrap in cdata
307 static char* xml_elem_entity_escape(const char* buf, int old_len, int *newlen, XML_ELEM_ESCAPING flags) {
311 #define should_escape(c, flag) ( ((flag & xml_elem_markup_escaping) && markup(c)) || \
312 ((flag & xml_elem_non_ascii_escaping) && non_ascii(c)) || \
313 ((flag & xml_elem_non_print_escaping) && non_print(c)) )
316 const unsigned char *bufcopy;
318 int ToBeXmlEscaped=0;
321 iLength= old_len ? old_len : strlen(buf);
323 if( should_escape(*bufcopy, flags) ) {
324 /* the length will increase by length of xml escape - the character length */
325 iLength += entity_length(*bufcopy);
333 NewBuffer= malloc(iLength+1);
337 if(should_escape(*bufcopy, flags)) {
338 iNewBufLen += create_xml_escape(NewBuffer+iNewBufLen,*bufcopy);
341 NewBuffer[iNewBufLen++]=*bufcopy;
345 NewBuffer[iNewBufLen] = 0;
352 *newlen = iNewBufLen;
359 static void xml_element_serialize(xml_element *el, int (*fptr)(void *data, const char *text, int size), void *data, XML_ELEM_OUTPUT_OPTIONS options, int depth)
362 static STRUCT_XML_ELEM_OUTPUT_OPTIONS default_opts = {xml_elem_pretty, xml_elem_markup_escaping | xml_elem_non_print_escaping, XML_DECL_ENCODING_DEFAULT};
363 static char whitespace[] = " "
369 fprintf(stderr, "Nothing to write\n");
373 options = &default_opts;
376 /* print xml declaration if at root level */
378 xml_elem_writefunc(fptr, XML_DECL_START, data, XML_DECL_START_LEN);
379 xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
380 xml_elem_writefunc(fptr, XML_DECL_VERSION, data, XML_DECL_VERSION_LEN);
381 if(options->encoding && *options->encoding) {
382 xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
383 xml_elem_writefunc(fptr, XML_DECL_ENCODING_ATTR, data, XML_DECL_ENCODING_ATTR_LEN);
384 xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
385 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
386 xml_elem_writefunc(fptr, options->encoding, data, 0);
387 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
389 xml_elem_writefunc(fptr, XML_DECL_END, data, XML_DECL_END_LEN);
390 if(options->verbosity != xml_elem_no_white_space) {
391 xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
395 if(options->verbosity == xml_elem_pretty && depth > 2) {
396 xml_elem_writefunc(fptr, whitespace, data, depth - 2);
399 xml_elem_writefunc(fptr,START_TOKEN_BEGIN, data, START_TOKEN_BEGIN_LEN);
401 xml_elem_writefunc(fptr, el->name, data, 0);
403 /* write attrs, if any */
404 if(Q_Size(&el->attrs)) {
405 xml_element_attr* iter = Q_Head(&el->attrs);
407 xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
408 xml_elem_writefunc(fptr, iter->key, data, 0);
409 xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
410 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
411 xml_elem_writefunc(fptr, iter->val, data, 0);
412 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
414 iter = Q_Next(&el->attrs);
419 xml_elem_writefunc(fptr, "None", data, 0);
421 /* if no text and no children, use abbreviated form, eg: <foo/> */
422 if(!el->text.len && !Q_Size(&el->children)) {
423 xml_elem_writefunc(fptr, EMPTY_START_TOKEN_END, data, EMPTY_START_TOKEN_END_LEN);
425 /* otherwise, print element contents */
427 xml_elem_writefunc(fptr, START_TOKEN_END, data, START_TOKEN_END_LEN);
429 /* print text, if any */
431 char* escaped_str = el->text.str;
432 int buflen = el->text.len;
434 if(options->escaping && options->escaping != xml_elem_cdata_escaping) {
435 escaped_str = xml_elem_entity_escape(el->text.str, buflen, &buflen, options->escaping );
437 escaped_str = el->text.str;
441 if(options->escaping & xml_elem_cdata_escaping) {
442 xml_elem_writefunc(fptr, CDATA_BEGIN, data, CDATA_BEGIN_LEN);
445 xml_elem_writefunc(fptr, escaped_str, data, buflen);
447 if(escaped_str != el->text.str) {
448 my_free(escaped_str);
451 if(options->escaping & xml_elem_cdata_escaping) {
452 xml_elem_writefunc(fptr, CDATA_END, data, CDATA_END_LEN);
455 /* no text, so print child elems */
457 xml_element *kids = Q_Head(&el->children);
461 if(options->verbosity != xml_elem_no_white_space) {
462 xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
465 xml_element_serialize(kids, fptr, data, options, depth);
466 kids = Q_Next(&el->children);
469 if(options->verbosity == xml_elem_pretty && depth > 2) {
470 xml_elem_writefunc(fptr, whitespace, data, depth - 2);
475 xml_elem_writefunc(fptr, END_TOKEN_BEGIN, data, END_TOKEN_BEGIN_LEN);
476 xml_elem_writefunc(fptr,el->name ? el->name : "None", data, 0);
477 xml_elem_writefunc(fptr, END_TOKEN_END, data, END_TOKEN_END_LEN);
479 if(options->verbosity != xml_elem_no_white_space) {
480 xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
484 /* print buf to file */
485 static int file_out_fptr(void *f, const char *text, int size)
487 fputs(text, (FILE *)f);
491 /* print buf to simplestring */
492 static int simplestring_out_fptr(void *f, const char *text, int size)
494 simplestring* buf = (simplestring*)f;
496 simplestring_addn(buf, text, size);
501 /****f* xml_element/xml_elem_serialize_to_string
503 * xml_elem_serialize_to_string
505 * void xml_element_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
507 * writes element tree as XML into a newly allocated buffer
509 * el - root element of tree
510 * options - options determining how output is written. see XML_ELEM_OUTPUT_OPTIONS
511 * buf_len - length of returned buffer, if not null.
513 * char* or NULL. Must be free'd by caller.
516 * xml_elem_serialize_to_stream ()
517 * xml_elem_parse_buf ()
520 char* xml_elem_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
523 simplestring_init(&buf);
525 xml_element_serialize(el, simplestring_out_fptr, (void *)&buf, options, 0);
535 /****f* xml_element/xml_elem_serialize_to_stream
537 * xml_elem_serialize_to_stream
539 * void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
541 * writes element tree as XML into a stream (typically an opened file)
543 * el - root element of tree
544 * output - stream handle
545 * options - options determining how output is written. see XML_ELEM_OUTPUT_OPTIONS
550 * xml_elem_serialize_to_string ()
551 * xml_elem_parse_buf ()
554 void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
556 xml_element_serialize(el, file_out_fptr, (void *)output, options, 0);
560 /*--------------------------*
561 * End xml_element Functions *
562 *--------------------------*/
565 /*----------------------
566 * Begin Expat Handlers *
567 *---------------------*/
569 typedef struct _xml_elem_data {
571 xml_element* current;
572 XML_ELEM_INPUT_OPTIONS input_options;
573 int needs_enc_conversion;
577 /* expat start of element handler */
578 static void startElement(void *userData, const char *name, const char **attrs)
581 xml_elem_data* mydata = (xml_elem_data*)userData;
582 const char** p = attrs;
587 mydata->current = xml_elem_new();
588 mydata->current->name = (char*)strdup(name);
589 mydata->current->parent = c;
593 xml_element_attr* attr = malloc(sizeof(xml_element_attr));
595 attr->key = strdup(*p);
596 attr->val = strdup(*(p+1));
597 Q_PushTail(&mydata->current->attrs, attr);
605 /* expat end of element handler */
606 static void endElement(void *userData, const char *name)
608 xml_elem_data* mydata = (xml_elem_data*)userData;
610 if(mydata && mydata->current && mydata->current->parent) {
611 Q_PushTail(&mydata->current->parent->children, mydata->current);
613 mydata->current = mydata->current->parent;
617 /* expat char data handler */
618 static void charHandler(void *userData,
622 xml_elem_data* mydata = (xml_elem_data*)userData;
623 if(mydata && mydata->current) {
625 /* Check if we need to decode utf-8 parser output to another encoding */
626 if(mydata->needs_enc_conversion && mydata->input_options->encoding) {
628 char* add_text = utf8_decode(s, len, &new_len, mydata->input_options->encoding);
631 simplestring_addn(&mydata->current->text, add_text, len);
636 simplestring_addn(&mydata->current->text, s, len);
641 /*-------------------*
642 * End Expat Handlers *
643 *-------------------*/
645 /*-------------------*
646 * xml_elem_parse_buf *
647 *-------------------*/
649 /****f* xml_element/xml_elem_parse_buf
653 * xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
655 * parse a buffer containing XML into an xml_element in-memory tree
657 * in_buf - buffer containing XML document
658 * len - length of buffer
659 * options - input options. optional
660 * error - error result data. optional. check if result is null.
664 * The returned data must be free'd by caller
666 * xml_elem_serialize_to_string ()
670 xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
672 xml_element* xReturn = NULL;
674 static STRUCT_XML_ELEM_INPUT_OPTIONS default_opts = {encoding_utf_8};
677 options = &default_opts;
682 xml_elem_data mydata = {0};
684 parser = XML_ParserCreate(NULL);
686 mydata.root = xml_elem_new();
687 mydata.current = mydata.root;
688 mydata.input_options = options;
689 mydata.needs_enc_conversion = options->encoding && strcmp(options->encoding, encoding_utf_8);
691 XML_SetElementHandler(parser, startElement, endElement);
692 XML_SetCharacterDataHandler(parser, charHandler);
694 /* pass the xml_elem_data struct along */
695 XML_SetUserData(parser, (void*)&mydata);
698 len = strlen(in_buf);
702 if(XML_Parse(parser, in_buf, len, 1) == 0) {
703 enum XML_Error err_code = XML_GetErrorCode(parser);
704 int line_num = XML_GetCurrentLineNumber(parser);
705 int col_num = XML_GetCurrentColumnNumber(parser);
706 long byte_idx = XML_GetCurrentByteIndex(parser);
707 int byte_total = XML_GetCurrentByteCount(parser);
708 const char * error_str = XML_ErrorString(err_code);
712 "\n\tdata beginning %ld before byte index: %s\n",
713 byte_idx > 10 ? 10 : byte_idx,
714 in_buf + (byte_idx > 10 ? byte_idx - 10 : byte_idx));
717 fprintf(stderr, "expat reports error code %i\n"
718 "\tdescription: %s\n"
721 "\tbyte index: %ld\n"
722 "\ttotal bytes: %i\n%s ",
723 err_code, error_str, line_num,
724 col_num, byte_idx, byte_total, buf);
727 /* error condition */
729 error->parser_code = (long)err_code;
730 error->line = line_num;
731 error->column = col_num;
732 error->byte_index = byte_idx;
733 error->parser_error = error_str;
737 xReturn = (xml_element*)Q_Head(&mydata.root->children);
738 xReturn->parent = NULL;
741 XML_ParserFree(parser);
744 xml_elem_free_non_recurse(mydata.root);