2 This file is part of libXMLRPC - a C library for xml-encoded function calls.
4 Author: Dan Libby (dan@libby.com)
5 Epinions.com may be contacted at feedback@epinions-inc.com
9 Copyright 2000 Epinions, Inc.
11 Subject to the following 3 conditions, Epinions, Inc. permits you, free
12 of charge, to (a) use, copy, distribute, modify, perform and display this
13 software and associated documentation files (the "Software"), and (b)
14 permit others to whom the Software is furnished to do so as well.
16 1) The above copyright notice and this permission notice shall be included
17 without modification in all copies or substantial portions of the
20 2) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT ANY WARRANTY OR CONDITION OF
21 ANY KIND, EXPRESS, IMPLIED OR STATUTORY, INCLUDING WITHOUT LIMITATION ANY
22 IMPLIED WARRANTIES OF ACCURACY, MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 PURPOSE OR NONINFRINGEMENT.
25 3) IN NO EVENT SHALL EPINIONS, INC. BE LIABLE FOR ANY DIRECT, INDIRECT,
26 SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR LOST PROFITS ARISING OUT
27 OF OR IN CONNECTION WITH THE SOFTWARE (HOWEVER ARISING, INCLUDING
28 NEGLIGENCE), EVEN IF EPINIONS, INC. IS AWARE OF THE POSSIBILITY OF SUCH
34 static const char rcsid[] = "#(@) $Id$";
38 /****h* ABOUT/xml_element
42 * Dan Libby, aka danda (dan@libby.com)
46 * $Log: xml_element.c,v $
47 * Revision 1.6 2004/06/01 20:16:06 iliaa
48 * Fixed bug #28597 (xmlrpc_encode_request() incorrectly encodes chars in
50 * Patch by: fernando dot nemec at folha dot com dot br
52 * Revision 1.5 2003/12/16 21:00:21 sniper
53 * Fix some compile warnings (patch by Joe Orton)
55 * Revision 1.4 2002/11/26 23:01:16 fmk
56 * removing unused variables
58 * Revision 1.3 2002/07/05 04:43:53 danda
59 * merged in updates from SF project. bring php repository up to date with xmlrpc-epi version 0.51
61 * Revision 1.9 2002/07/03 20:54:30 danda
62 * root element should not have a parent. patch from anon SF user
64 * Revision 1.8 2002/05/23 17:46:51 danda
65 * patch from mukund - fix non utf-8 encoding conversions
67 * Revision 1.7 2002/02/13 20:58:50 danda
68 * patch to make source more windows friendly, contributed by Jeff Lawson
70 * Revision 1.6 2002/01/08 01:06:55 danda
71 * enable <?xml version="1.0"?> format for parsers that are very picky.
73 * Revision 1.5 2001/09/29 21:58:05 danda
74 * adding cvs log to history section
76 * 10/15/2000 -- danda -- adding robodoc documentation
78 * Nicer external API. Get rid of macros. Make opaque types, etc.
80 * Coded on RedHat Linux 6.2. Builds on Solaris x86. Should build on just
81 * about anything with minor mods.
83 * This code incorporates ideas from expat-ensor from http://xml.ensor.org.
85 * It was coded primarily to act as a go-between for expat and xmlrpc. To this
86 * end, it stores xml elements, their sub-elements, and their attributes in an
87 * in-memory tree. When expat is done parsing, the tree can be walked, thus
88 * retrieving the values. The code can also be used to build a tree via API then
89 * write out the tree to a buffer, thus "serializing" the xml.
91 * It turns out this is useful for other purposes, such as parsing config files.
95 * - output option for xml escaping data. Choices include no escaping, entity escaping,
97 * - output option for character encoding. Defaults to (none) utf-8.
98 * - output option for verbosity/readability. ultra-compact, newlines, pretty/level indented.
101 * there must be some.
105 #include "xmlrpc_win32.h"
111 #include "xml_element.h"
114 #include "encodings.h"
116 #define my_free(thing) if(thing) {free(thing); thing = 0;}
118 #define XML_DECL_START "<?xml"
119 #define XML_DECL_START_LEN sizeof(XML_DECL_START) - 1
120 #define XML_DECL_VERSION "version=\"1.0\""
121 #define XML_DECL_VERSION_LEN sizeof(XML_DECL_VERSION) - 1
122 #define XML_DECL_ENCODING_ATTR "encoding"
123 #define XML_DECL_ENCODING_ATTR_LEN sizeof(XML_DECL_ENCODING_ATTR) - 1
124 #define XML_DECL_ENCODING_DEFAULT "utf-8"
125 #define XML_DECL_ENCODING_DEFAULT_LEN sizeof(XML_DECL_ENCODING_DEFAULT) - 1
126 #define XML_DECL_END "?>"
127 #define XML_DECL_END_LEN sizeof(XML_DECL_END) - 1
128 #define START_TOKEN_BEGIN "<"
129 #define START_TOKEN_BEGIN_LEN sizeof(START_TOKEN_BEGIN) - 1
130 #define START_TOKEN_END ">"
131 #define START_TOKEN_END_LEN sizeof(START_TOKEN_END) - 1
132 #define EMPTY_START_TOKEN_END "/>"
133 #define EMPTY_START_TOKEN_END_LEN sizeof(EMPTY_START_TOKEN_END) - 1
134 #define END_TOKEN_BEGIN "</"
135 #define END_TOKEN_BEGIN_LEN sizeof(END_TOKEN_BEGIN) - 1
136 #define END_TOKEN_END ">"
137 #define END_TOKEN_END_LEN sizeof(END_TOKEN_END) - 1
138 #define ATTR_DELIMITER "\""
139 #define ATTR_DELIMITER_LEN sizeof(ATTR_DELIMITER) - 1
140 #define CDATA_BEGIN "<![CDATA["
141 #define CDATA_BEGIN_LEN sizeof(CDATA_BEGIN) - 1
142 #define CDATA_END "]]>"
143 #define CDATA_END_LEN sizeof(CDATA_END) - 1
145 #define EQUALS_LEN sizeof(EQUALS) - 1
146 #define WHITESPACE " "
147 #define WHITESPACE_LEN sizeof(WHITESPACE) - 1
149 #define NEWLINE_LEN sizeof(NEWLINE) - 1
150 #define MAX_VAL_BUF 144
151 #define SCALAR_STR "SCALAR"
152 #define SCALAR_STR_LEN sizeof(SCALAR_STR) - 1
153 #define VECTOR_STR "VECTOR"
154 #define VECTOR_STR_LEN sizeof(VECTOR_STR) - 1
155 #define RESPONSE_STR "RESPONSE"
156 #define RESPONSE_STR_LEN sizeof(RESPONSE_STR) - 1
159 /*-----------------------------
160 - Begin xml_element Functions -
161 -----------------------------*/
163 /****f* xml_element/xml_elem_free_non_recurse
165 * xml_elem_free_non_recurse
167 * void xml_elem_free_non_recurse(xml_element* root)
169 * free a single xml element. child elements will not be freed.
171 * root - the element to free
180 void xml_elem_free_non_recurse(xml_element* root) {
182 xml_element_attr* attrs = Q_Head(&root->attrs);
187 attrs = Q_Next(&root->attrs);
190 Q_Destroy(&root->children);
191 Q_Destroy(&root->attrs);
192 my_free((char*)root->name);
193 simplestring_free(&root->text);
199 /****f* xml_element/xml_elem_free
203 * void xml_elem_free(xml_element* root)
205 * free an xml element and all of its child elements
207 * root - the root of an xml tree you would like to free
212 * xml_elem_free_non_recurse ()
216 void xml_elem_free(xml_element* root) {
218 xml_element* kids = Q_Head(&root->children);
221 kids = Q_Next(&root->children);
223 xml_elem_free_non_recurse(root);
228 /****f* xml_element/xml_elem_new
232 * xml_element* xml_elem_new()
234 * allocates and initializes a new xml_element
238 * xml_element* or NULL. NULL indicates an out-of-memory condition.
242 * xml_elem_free_non_recurse ()
245 xml_element* xml_elem_new() {
246 xml_element* elem = calloc(1, sizeof(xml_element));
248 Q_Init(&elem->children);
249 Q_Init(&elem->attrs);
250 simplestring_init(&elem->text);
252 /* init empty string in case we don't find any char data */
253 simplestring_addn(&elem->text, "", 0);
259 static int xml_elem_writefunc(int (*fptr)(void *data, const char *text, int size), const char *text, void *data, int len)
261 return fptr && text ? fptr(data, text, len ? len : strlen(text)) : 0;
266 static int create_xml_escape(char *pString, unsigned char c)
270 pString[counter++] = '&';
271 pString[counter++] = '#';
273 pString[counter++] = c / 100 + '0';
276 pString[counter++] = c / 10 + '0';
279 pString[counter++] = c + '0';
280 pString[counter++] = ';';
284 #define non_ascii(c) (c > 127)
285 #define non_print(c) (!isprint(c))
286 #define markup(c) (c == '&' || c == '\"' || c == '>' || c == '<')
287 #define entity_length(c) ( (c >= 100) ? 3 : ((c >= 10) ? 2 : 1) ) + 3; /* "&#" + c + ";" */
290 * xml_elem_entity_escape
293 * escape reserved xml chars and non utf-8 chars as xml entities
295 * The return value may be a new string, or null if no
296 * conversion was performed. In the latter case, *newlen will
299 * xml_elem_no_escaping = 0x000,
300 * xml_elem_entity_escaping = 0x002, // escape xml special chars as entities
301 * xml_elem_non_ascii_escaping = 0x008, // escape chars above 127
302 * xml_elem_cdata_escaping = 0x010, // wrap in cdata
304 static char* xml_elem_entity_escape(const char* buf, int old_len, int *newlen, XML_ELEM_ESCAPING flags) {
308 #define should_escape(c, flag) ( ((flag & xml_elem_markup_escaping) && markup(c)) || \
309 ((flag & xml_elem_non_ascii_escaping) && non_ascii(c)) || \
310 ((flag & xml_elem_non_print_escaping) && non_print(c)) )
313 const unsigned char *bufcopy;
315 int ToBeXmlEscaped=0;
318 iLength= old_len ? old_len : strlen(buf);
320 if( should_escape(*bufcopy, flags) ) {
321 /* the length will increase by length of xml escape - the character length */
322 iLength += entity_length(*bufcopy);
330 NewBuffer= malloc(iLength+1);
334 if(should_escape(*bufcopy, flags)) {
335 iNewBufLen += create_xml_escape(NewBuffer+iNewBufLen,*bufcopy);
338 NewBuffer[iNewBufLen++]=*bufcopy;
342 NewBuffer[iNewBufLen] = 0;
349 *newlen = iNewBufLen;
356 static void xml_element_serialize(xml_element *el, int (*fptr)(void *data, const char *text, int size), void *data, XML_ELEM_OUTPUT_OPTIONS options, int depth)
359 static STRUCT_XML_ELEM_OUTPUT_OPTIONS default_opts = {xml_elem_pretty, xml_elem_markup_escaping | xml_elem_non_print_escaping, XML_DECL_ENCODING_DEFAULT};
360 static char whitespace[] = " "
366 fprintf(stderr, "Nothing to write\n");
370 options = &default_opts;
373 /* print xml declaration if at root level */
375 xml_elem_writefunc(fptr, XML_DECL_START, data, XML_DECL_START_LEN);
376 xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
377 xml_elem_writefunc(fptr, XML_DECL_VERSION, data, XML_DECL_VERSION_LEN);
378 if(options->encoding && *options->encoding) {
379 xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
380 xml_elem_writefunc(fptr, XML_DECL_ENCODING_ATTR, data, XML_DECL_ENCODING_ATTR_LEN);
381 xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
382 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
383 xml_elem_writefunc(fptr, options->encoding, data, 0);
384 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
386 xml_elem_writefunc(fptr, XML_DECL_END, data, XML_DECL_END_LEN);
387 if(options->verbosity != xml_elem_no_white_space) {
388 xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
392 if(options->verbosity == xml_elem_pretty && depth > 2) {
393 xml_elem_writefunc(fptr, whitespace, data, depth - 2);
396 xml_elem_writefunc(fptr,START_TOKEN_BEGIN, data, START_TOKEN_BEGIN_LEN);
398 xml_elem_writefunc(fptr, el->name, data, 0);
400 /* write attrs, if any */
401 if(Q_Size(&el->attrs)) {
402 xml_element_attr* iter = Q_Head(&el->attrs);
404 xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
405 xml_elem_writefunc(fptr, iter->key, data, 0);
406 xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
407 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
408 xml_elem_writefunc(fptr, iter->val, data, 0);
409 xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
411 iter = Q_Next(&el->attrs);
416 xml_elem_writefunc(fptr, "None", data, 0);
418 /* if no text and no children, use abbreviated form, eg: <foo/> */
419 if(!el->text.len && !Q_Size(&el->children)) {
420 xml_elem_writefunc(fptr, EMPTY_START_TOKEN_END, data, EMPTY_START_TOKEN_END_LEN);
422 /* otherwise, print element contents */
424 xml_elem_writefunc(fptr, START_TOKEN_END, data, START_TOKEN_END_LEN);
426 /* print text, if any */
428 char* escaped_str = el->text.str;
429 int buflen = el->text.len;
431 if(options->escaping && options->escaping != xml_elem_cdata_escaping) {
432 escaped_str = xml_elem_entity_escape(el->text.str, buflen, &buflen, options->escaping );
434 escaped_str = el->text.str;
438 if(options->escaping & xml_elem_cdata_escaping) {
439 xml_elem_writefunc(fptr, CDATA_BEGIN, data, CDATA_BEGIN_LEN);
442 xml_elem_writefunc(fptr, escaped_str, data, buflen);
444 if(escaped_str != el->text.str) {
445 my_free(escaped_str);
448 if(options->escaping & xml_elem_cdata_escaping) {
449 xml_elem_writefunc(fptr, CDATA_END, data, CDATA_END_LEN);
452 /* no text, so print child elems */
454 xml_element *kids = Q_Head(&el->children);
458 if(options->verbosity != xml_elem_no_white_space) {
459 xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
462 xml_element_serialize(kids, fptr, data, options, depth);
463 kids = Q_Next(&el->children);
466 if(options->verbosity == xml_elem_pretty && depth > 2) {
467 xml_elem_writefunc(fptr, whitespace, data, depth - 2);
472 xml_elem_writefunc(fptr, END_TOKEN_BEGIN, data, END_TOKEN_BEGIN_LEN);
473 xml_elem_writefunc(fptr,el->name ? el->name : "None", data, 0);
474 xml_elem_writefunc(fptr, END_TOKEN_END, data, END_TOKEN_END_LEN);
476 if(options->verbosity != xml_elem_no_white_space) {
477 xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
481 /* print buf to file */
482 static int file_out_fptr(void *f, const char *text, int size)
484 fputs(text, (FILE *)f);
488 /* print buf to simplestring */
489 static int simplestring_out_fptr(void *f, const char *text, int size)
491 simplestring* buf = (simplestring*)f;
493 simplestring_addn(buf, text, size);
498 /****f* xml_element/xml_elem_serialize_to_string
500 * xml_elem_serialize_to_string
502 * void xml_element_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
504 * writes element tree as XML into a newly allocated buffer
506 * el - root element of tree
507 * options - options determining how output is written. see XML_ELEM_OUTPUT_OPTIONS
508 * buf_len - length of returned buffer, if not null.
510 * char* or NULL. Must be free'd by caller.
513 * xml_elem_serialize_to_stream ()
514 * xml_elem_parse_buf ()
517 char* xml_elem_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
520 simplestring_init(&buf);
522 xml_element_serialize(el, simplestring_out_fptr, (void *)&buf, options, 0);
532 /****f* xml_element/xml_elem_serialize_to_stream
534 * xml_elem_serialize_to_stream
536 * void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
538 * writes element tree as XML into a stream (typically an opened file)
540 * el - root element of tree
541 * output - stream handle
542 * options - options determining how output is written. see XML_ELEM_OUTPUT_OPTIONS
547 * xml_elem_serialize_to_string ()
548 * xml_elem_parse_buf ()
551 void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
553 xml_element_serialize(el, file_out_fptr, (void *)output, options, 0);
557 /*--------------------------*
558 * End xml_element Functions *
559 *--------------------------*/
562 /*----------------------
563 * Begin Expat Handlers *
564 *---------------------*/
566 typedef struct _xml_elem_data {
568 xml_element* current;
569 XML_ELEM_INPUT_OPTIONS input_options;
570 int needs_enc_conversion;
574 /* expat start of element handler */
575 static void startElement(void *userData, const char *name, const char **attrs)
578 xml_elem_data* mydata = (xml_elem_data*)userData;
579 const char** p = attrs;
584 mydata->current = xml_elem_new();
585 mydata->current->name = (char*)strdup(name);
586 mydata->current->parent = c;
590 xml_element_attr* attr = malloc(sizeof(xml_element_attr));
592 attr->key = strdup(*p);
593 attr->val = strdup(*(p+1));
594 Q_PushTail(&mydata->current->attrs, attr);
602 /* expat end of element handler */
603 static void endElement(void *userData, const char *name)
605 xml_elem_data* mydata = (xml_elem_data*)userData;
607 if(mydata && mydata->current && mydata->current->parent) {
608 Q_PushTail(&mydata->current->parent->children, mydata->current);
610 mydata->current = mydata->current->parent;
614 /* expat char data handler */
615 static void charHandler(void *userData,
619 xml_elem_data* mydata = (xml_elem_data*)userData;
620 if(mydata && mydata->current) {
622 /* Check if we need to decode utf-8 parser output to another encoding */
623 if(mydata->needs_enc_conversion && mydata->input_options->encoding) {
625 char* add_text = utf8_decode(s, len, &new_len, mydata->input_options->encoding);
628 simplestring_addn(&mydata->current->text, add_text, len);
633 simplestring_addn(&mydata->current->text, s, len);
638 /*-------------------*
639 * End Expat Handlers *
640 *-------------------*/
642 /*-------------------*
643 * xml_elem_parse_buf *
644 *-------------------*/
646 /****f* xml_element/xml_elem_parse_buf
650 * xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
652 * parse a buffer containing XML into an xml_element in-memory tree
654 * in_buf - buffer containing XML document
655 * len - length of buffer
656 * options - input options. optional
657 * error - error result data. optional. check if result is null.
661 * The returned data must be free'd by caller
663 * xml_elem_serialize_to_string ()
667 xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
669 xml_element* xReturn = NULL;
671 static STRUCT_XML_ELEM_INPUT_OPTIONS default_opts = {encoding_utf_8};
674 options = &default_opts;
679 xml_elem_data mydata = {0};
681 parser = XML_ParserCreate(NULL);
683 mydata.root = xml_elem_new();
684 mydata.current = mydata.root;
685 mydata.input_options = options;
686 mydata.needs_enc_conversion = options->encoding && strcmp(options->encoding, encoding_utf_8);
688 XML_SetElementHandler(parser, startElement, endElement);
689 XML_SetCharacterDataHandler(parser, charHandler);
691 /* pass the xml_elem_data struct along */
692 XML_SetUserData(parser, (void*)&mydata);
695 len = strlen(in_buf);
699 if(XML_Parse(parser, in_buf, len, 1) == 0) {
700 enum XML_Error err_code = XML_GetErrorCode(parser);
701 int line_num = XML_GetCurrentLineNumber(parser);
702 int col_num = XML_GetCurrentColumnNumber(parser);
703 long byte_idx = XML_GetCurrentByteIndex(parser);
704 int byte_total = XML_GetCurrentByteCount(parser);
705 const char * error_str = XML_ErrorString(err_code);
709 "\n\tdata beginning %ld before byte index: %s\n",
710 byte_idx > 10 ? 10 : byte_idx,
711 in_buf + (byte_idx > 10 ? byte_idx - 10 : byte_idx));
714 fprintf(stderr, "expat reports error code %i\n"
715 "\tdescription: %s\n"
718 "\tbyte index: %ld\n"
719 "\ttotal bytes: %i\n%s ",
720 err_code, error_str, line_num,
721 col_num, byte_idx, byte_total, buf);
724 /* error condition */
726 error->parser_code = (long)err_code;
727 error->line = line_num;
728 error->column = col_num;
729 error->byte_index = byte_idx;
730 error->parser_error = error_str;
734 xReturn = (xml_element*)Q_Head(&mydata.root->children);
735 xReturn->parent = NULL;
738 XML_ParserFree(parser);
741 xml_elem_free_non_recurse(mydata.root);