X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2FHelper%2FXMLParser.php;h=2d3296d4ee6eae2dad2f8617cda1d743ba0dd749;hb=25b0d7c6c540615e854f941dc457f556bfca2408;hp=b4798ccf97c21b4b7435b2f2abd53b88d2cd915a;hpb=5fc0c8cc5cc3d0579e0e810512ca3b672e720db6;p=plcapi.git diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index b4798cc..2d3296d 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -7,30 +7,40 @@ use PhpXmlRpc\Value; /** * Deals with parsing the XML. + * @see http://xmlrpc.com/spec.md */ class XMLParser { - // used to store state during parsing - // quick explanation of components: + const RETURN_XMLRPCVALS = 'xmlrpcvals'; + const RETURN_EPIVALS = 'epivals'; + const RETURN_PHP = 'phpvals'; + + const ACCEPT_REQUEST = 1; + const ACCEPT_RESPONSE = 2; + const ACCEPT_VALUE = 4; + const ACCEPT_FAULT = 8; + + // Used to store state during parsing. + // Quick explanation of components: + // private: // ac - used to accumulate values - // stack - array with genealogy of xml elements names: - // used to validate nesting of xmlrpc elements + // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements // valuestack - array used for parsing arrays and structs - // lv - used to indicate "looking for a value": implements - // the logic to allow values with no types to be strings - // isf - used to indicate a parsing fault (2) or xmlrpc response fault (1) + // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings + // public: + // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) // isf_reason - used for storing xmlrpc response fault string // method - used to store method name // params - used to store parameters in method calls // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values - // rt - 'methodcall or 'methodresponse' + // rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) public $_xh = array( 'ac' => '', 'stack' => array(), 'valuestack' => array(), 'isf' => 0, 'isf_reason' => '', - 'method' => false, // so we can check later if we got a methodname or not + 'method' => false, 'params' => array(), 'pt' => array(), 'rt' => '', @@ -41,6 +51,7 @@ class XMLParser 'BOOLEAN' => array('VALUE'), 'I4' => array('VALUE'), 'I8' => array('VALUE'), + 'EX:I8' => array('VALUE'), 'INT' => array('VALUE'), 'STRING' => array('VALUE'), 'DOUBLE' => array('VALUE'), @@ -59,27 +70,128 @@ class XMLParser 'EX:NIL' => array('VALUE'), // only used when extension activated ); + /** @var array $parsing_options */ + protected $parsing_options = array(); + /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ + protected $accept = 3; + /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ + protected $maxChunkLength = 4194304; + + /** + * @param array $options passed to the xml parser + */ + public function __construct(array $options = array()) + { + $this->parsing_options = $options; + } + + /** + * @param string $data + * @param string $returnType + * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE + * @return string + */ + public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3) + { + $this->_xh = array( + 'ac' => '', + 'stack' => array(), + 'valuestack' => array(), + 'isf' => 0, + 'isf_reason' => '', + 'method' => false, // so we can check later if we got a methodname or not + 'params' => array(), + 'pt' => array(), + 'rt' => '', + ); + + $len = strlen($data); + + // we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below + if ($len == 0) { + $this->_xh['isf'] = 3; + $this->_xh['isf_reason'] = 'XML error 5: empty document'; + return; + } + + $parser = xml_parser_create(); + + foreach ($this->parsing_options as $key => $val) { + xml_parser_set_option($parser, $key, $val); + } + // always set this, in case someone tries to disable it via options... + xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); + + xml_set_object($parser, $this); + + switch($returnType) { + case self::RETURN_PHP: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); + break; + case self::RETURN_EPIVALS: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi'); + break; + default: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); + } + + xml_set_character_data_handler($parser, 'xmlrpc_cd'); + xml_set_default_handler($parser, 'xmlrpc_dh'); + + $this->accept = $accept; + + // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors + for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { + $chunk = substr($data, $offset, $this->maxChunkLength); + // error handling: xml not well formed + if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { + $errCode = xml_get_error_code($parser); + $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), + xml_get_current_line_number($parser), xml_get_current_column_number($parser)); + + $this->_xh['isf'] = 3; + $this->_xh['isf_reason'] = $errStr; + break; + } + } + + xml_parser_free($parser); + } + /** * xml parser handler function for opening element tags. + * @internal + * @param resource $parser + * @param string $name + * @param $attrs + * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead */ public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) { // if invalid xmlrpc already detected, skip all processing if ($this->_xh['isf'] < 2) { + // check for correct element nesting - // top level element can only be of 2 types - /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: - /// there is only a single top level element in xml anyway if (count($this->_xh['stack']) == 0) { - if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && ( - $name != 'VALUE' && !$acceptSingleVals) - ) { + // top level element can only be of 2 types + /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: + /// there is only a single top level element in xml anyway + // BC + if ($acceptSingleVals === false) { + $accept = $this->accept; + } else { + $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; + } + if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || + ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || + ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || + ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { + $this->_xh['rt'] = strtolower($name); + } else { $this->_xh['isf'] = 2; - $this->_xh['isf_reason'] = 'missing top level xmlrpc element'; + $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; return; - } else { - $this->_xh['rt'] = strtolower($name); } } else { // not top level element: see if parent is OK @@ -101,8 +213,17 @@ class XMLParser $this->_xh['lv'] = 1; $this->_xh['php_class'] = null; break; - case 'I4': case 'I8': + case 'EX:I8': + if (PHP_INT_SIZE === 4) { + // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; + + return; + } + // fall through voluntarily + case 'I4': case 'INT': case 'STRING': case 'BOOLEAN': @@ -110,7 +231,7 @@ class XMLParser case 'DATETIME.ISO8601': case 'BASE64': if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -121,7 +242,7 @@ class XMLParser case 'STRUCT': case 'ARRAY': if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -141,7 +262,7 @@ class XMLParser break; case 'DATA': if ($this->_xh['vt'] != 'data') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found two data elements inside an array element"; @@ -161,7 +282,8 @@ class XMLParser $this->_xh['isf'] = 1; break; case 'MEMBER': - $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on + // set member name to null, in case we do not find in the xml later on + $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; //$this->_xh['ac']=''; // Drop trough intentionally case 'PARAM': @@ -172,7 +294,7 @@ class XMLParser case 'EX:NIL': if (PhpXmlRpc::$xmlrpc_null_extension) { if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -184,7 +306,7 @@ class XMLParser // we do not support the extension, so // drop through intentionally default: - /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! + // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; break; @@ -201,7 +323,12 @@ class XMLParser } /** - * Used in decoding xml chunks that might represent single xmlrpc values. + * xml parser handler function for opening element tags. + * Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses. + * @deprecated + * @param resource $parser + * @param $name + * @param $attrs */ public function xmlrpc_se_any($parser, $name, $attrs) { @@ -210,8 +337,12 @@ class XMLParser /** * xml parser handler function for close element tags. + * @internal + * @param resource $parser + * @param string $name + * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility */ - public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true) + public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) { if ($this->_xh['isf'] < 2) { // push this element name from stack @@ -228,7 +359,7 @@ class XMLParser $this->_xh['vt'] = Value::$xmlrpcString; } - if ($rebuildXmlrpcvals) { + if ($rebuildXmlrpcvals > 0) { // build the xmlrpc val out of the data received, and substitute it $temp = new Value($this->_xh['value'], $this->_xh['vt']); // in case we got info about underlying php class, save it @@ -236,32 +367,39 @@ class XMLParser if (isset($this->_xh['php_class'])) { $temp->_php_class = $this->_xh['php_class']; } - // check if we are inside an array or struct: - // if value just built is inside an array, let's move it into array on the stack - $vscount = count($this->_xh['valuestack']); - if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { - $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp; - } else { - $this->_xh['value'] = $temp; + $this->_xh['value'] = $temp; + } elseif ($rebuildXmlrpcvals < 0) { + if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { + $this->_xh['value'] = (object)array( + 'xmlrpc_type' => 'datetime', + 'scalar' => $this->_xh['value'], + 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) + ); + } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { + $this->_xh['value'] = (object)array( + 'xmlrpc_type' => 'base64', + 'scalar' => $this->_xh['value'] + ); } } else { - /// @todo this needs to treat correctly php-serialized objects, + /// @todo this should handle php-serialized objects, /// since std deserializing is done by php_xmlrpc_decode, /// which we will not be calling... - if (isset($this->_xh['php_class'])) { - } + //if (isset($this->_xh['php_class'])) { + //} + } - // check if we are inside an array or struct: - // if value just built is inside an array, let's move it into array on the stack - $vscount = count($this->_xh['valuestack']); - if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { - $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; - } + // check if we are inside an array or struct: + // if value just built is inside an array, let's move it into array on the stack + $vscount = count($this->_xh['valuestack']); + if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { + $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; } break; case 'BOOLEAN': case 'I4': case 'I8': + case 'EX:I8': case 'INT': case 'STRING': case 'DOUBLE': @@ -274,7 +412,7 @@ class XMLParser $this->_xh['value'] = $this->_xh['ac']; } elseif ($name == 'DATETIME.ISO8601') { if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) { - error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); } $this->_xh['vt'] = Value::$xmlrpcDateTime; $this->_xh['value'] = $this->_xh['ac']; @@ -293,7 +431,7 @@ class XMLParser } else { // log if receiving something strange, even though we set the value to false anyway if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { - error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); } $this->_xh['value'] = false; } @@ -303,18 +441,18 @@ class XMLParser // NOTE: regexp could be much stricter than this... if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { /// @todo: find a better way of throwing an error than this! - error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on $this->_xh['value'] = (double)$this->_xh['ac']; } } else { - // we have an I4/INT + // we have an I4/I8/INT // we must check that only 0123456789- are characters here if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { /// @todo find a better way of throwing an error than this! - error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on @@ -333,7 +471,7 @@ class XMLParser $vscount = count($this->_xh['valuestack']); $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; } else { - error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); } break; case 'DATA': @@ -356,7 +494,7 @@ class XMLParser $this->_xh['params'][] = $this->_xh['value']; $this->_xh['pt'][] = $this->_xh['vt']; } else { - error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); } break; case 'METHODNAME': @@ -386,14 +524,31 @@ class XMLParser /** * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values. + * @internal + * @param resource $parser + * @param string $name */ public function xmlrpc_ee_fast($parser, $name) { - $this->xmlrpc_ee($parser, $name, false); + $this->xmlrpc_ee($parser, $name, 0); + } + + /** + * Used in decoding xmlrpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime). + * @internal + * @param resource $parser + * @param string $name + */ + public function xmlrpc_ee_epi($parser, $name) + { + $this->xmlrpc_ee($parser, $name, -1); } /** * xml parser handler function for character data. + * @internal + * @param resource $parser + * @param string $data */ public function xmlrpc_cd($parser, $data) { @@ -410,6 +565,9 @@ class XMLParser /** * xml parser handler function for 'other stuff', ie. not char data or * element start/end tag. In fact it only gets called on unknown entities... + * @internal + * @param $parser + * @param string data */ public function xmlrpc_dh($parser, $data) { @@ -420,7 +578,7 @@ class XMLParser } } - return true; + //return true; } /**