*/
class XMLParser
{
- // used to store state during parsing
- // quick explanation of components:
+ const RETURN_XMLRPCVALS = 'xmlrpcvals';
+ const RETURN_PHP = 'phpvals';
+
+ const ACCEPT_REQUEST = 1;
+ const ACCEPT_RESPONSE = 2;
+ const ACCEPT_VALUE = 4;
+
+ // Used to store state during parsing.
+ // Quick explanation of components:
+ // private:
// ac - used to accumulate values
- // stack - array with genealogy of xml elements names:
- // used to validate nesting of xmlrpc elements
+ // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements
// valuestack - array used for parsing arrays and structs
- // lv - used to indicate "looking for a value": implements
- // the logic to allow values with no types to be strings
- // isf - used to indicate a parsing fault (2) or xmlrpc response fault (1)
+ // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
+ // public:
+ // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1)
// isf_reason - used for storing xmlrpc response fault string
// method - used to store method name
// params - used to store parameters in method calls
// pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
- // rt - 'methodcall or 'methodresponse'
+ // rt - 'methodcall', 'methodresponse' or 'value'
public $_xh = array(
'ac' => '',
'stack' => array(),
'valuestack' => array(),
'isf' => 0,
'isf_reason' => '',
- 'method' => false, // so we can check later if we got a methodname or not
+ 'method' => false,
'params' => array(),
'pt' => array(),
'rt' => '',
'BOOLEAN' => array('VALUE'),
'I4' => array('VALUE'),
'I8' => array('VALUE'),
+ 'EX:I8' => array('VALUE'),
'INT' => array('VALUE'),
'STRING' => array('VALUE'),
'DOUBLE' => array('VALUE'),
'EX:NIL' => array('VALUE'), // only used when extension activated
);
+ /** @var array $parsing_options */
+ protected $parsing_options = array();
+ /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
+ protected $accept = 3;
+ /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
+ protected $maxChunkLength = 4194304;
+
+ /**
+ * @param array $options passed to the xml parser
+ */
+ public function __construct(array $options = array())
+ {
+ $this->parsing_options = $options;
+ }
+
+ /**
+ * @param string $data
+ * @param string $returnType
+ * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
+ * @return string
+ */
+ public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3)
+ {
+ $this->_xh = array(
+ 'ac' => '',
+ 'stack' => array(),
+ 'valuestack' => array(),
+ 'isf' => 0,
+ 'isf_reason' => '',
+ 'method' => false, // so we can check later if we got a methodname or not
+ 'params' => array(),
+ 'pt' => array(),
+ 'rt' => '',
+ );
+
+ $len = strlen($data);
+
+ // we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below
+ if ($len == 0) {
+ $this->_xh['isf'] = 3;
+ $this->_xh['isf_reason'] = 'XML error 5: empty document';
+ return;
+ }
+
+ $parser = xml_parser_create();
+
+ foreach ($this->parsing_options as $key => $val) {
+ xml_parser_set_option($parser, $key, $val);
+ }
+ // always set this, in case someone tries to disable it via options...
+ xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1);
+
+ xml_set_object($parser, $this);
+
+ if ($returnType == self::RETURN_PHP) {
+ xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
+ } else {
+ xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
+ }
+
+ xml_set_character_data_handler($parser, 'xmlrpc_cd');
+ xml_set_default_handler($parser, 'xmlrpc_dh');
+
+ $this->accept = $accept;
+
+ // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors
+ for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) {
+ $chunk = substr($data, $offset, $this->maxChunkLength);
+ // error handling: xml not well formed
+ if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) {
+ $errCode = xml_get_error_code($parser);
+ $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
+ xml_get_current_line_number($parser), xml_get_current_column_number($parser));
+
+ $this->_xh['isf'] = 3;
+ $this->_xh['isf_reason'] = $errStr;
+ break;
+ }
+ }
+
+ xml_parser_free($parser);
+ }
+
/**
* xml parser handler function for opening element tags.
+ * @param resource $parser
+ * @param string $name
+ * @param $attrs
+ * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
*/
public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
{
// if invalid xmlrpc already detected, skip all processing
if ($this->_xh['isf'] < 2) {
+
// check for correct element nesting
- // top level element can only be of 2 types
- /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
- /// there is only a single top level element in xml anyway
if (count($this->_xh['stack']) == 0) {
- if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && (
- $name != 'VALUE' && !$acceptSingleVals)
- ) {
+ // top level element can only be of 2 types
+ /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
+ /// there is only a single top level element in xml anyway
+ // BC
+ if ($acceptSingleVals === false) {
+ $accept = $this->accept;
+ } else {
+ $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE;
+ }
+ if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) ||
+ ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) ||
+ ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE))) {
+ $this->_xh['rt'] = strtolower($name);
+ } else {
$this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = 'missing top level xmlrpc element';
+ $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
return;
- } else {
- $this->_xh['rt'] = strtolower($name);
}
} else {
// not top level element: see if parent is OK
$this->_xh['lv'] = 1;
$this->_xh['php_class'] = null;
break;
- case 'I4':
case 'I8':
+ case 'EX:I8':
+ if (PHP_INT_SIZE === 4) {
+ // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
+ $this->_xh['isf'] = 2;
+ $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
+
+ return;
+ }
+ // fall through voluntarily
+ case 'I4':
case 'INT':
case 'STRING':
case 'BOOLEAN':
case 'DATETIME.ISO8601':
case 'BASE64':
if ($this->_xh['vt'] != 'value') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
case 'STRUCT':
case 'ARRAY':
if ($this->_xh['vt'] != 'value') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
break;
case 'DATA':
if ($this->_xh['vt'] != 'data') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "found two data elements inside an array element";
$this->_xh['isf'] = 1;
break;
case 'MEMBER':
- $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on
+ // set member name to null, in case we do not find in the xml later on
+ $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = '';
//$this->_xh['ac']='';
// Drop trough intentionally
case 'PARAM':
case 'EX:NIL':
if (PhpXmlRpc::$xmlrpc_null_extension) {
if ($this->_xh['vt'] != 'value') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
}
/**
- * Used in decoding xml chunks that might represent single xmlrpc values.
+ * xml parser handler function for opening element tags.
+ * Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses.
+ * @deprecated
+ * @param resource $parser
+ * @param $name
+ * @param $attrs
*/
public function xmlrpc_se_any($parser, $name, $attrs)
{
/**
* xml parser handler function for close element tags.
+ * @param resource $parser
+ * @param string $name
+ * @param bool $rebuildXmlrpcvals
*/
public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true)
{
case 'BOOLEAN':
case 'I4':
case 'I8':
+ case 'EX:I8':
case 'INT':
case 'STRING':
case 'DOUBLE':
$this->_xh['value'] = (double)$this->_xh['ac'];
}
} else {
- // we have an I4/INT
+ // we have an I4/I8/INT
// we must check that only 0123456789-<space> are characters here
if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) {
/// @todo find a better way of throwing an error than this!
/**
* Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values.
+ * @param resource $parser
+ * @param string $name
*/
public function xmlrpc_ee_fast($parser, $name)
{
/**
* xml parser handler function for character data.
+ * @param resource $parser
+ * @param string $data
*/
public function xmlrpc_cd($parser, $data)
{
/**
* xml parser handler function for 'other stuff', ie. not char data or
* element start/end tag. In fact it only gets called on unknown entities...
+ * @param $parser
+ * @param string data
*/
public function xmlrpc_dh($parser, $data)
{
}
}
- return true;
+ //return true;
}
/**