X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=php%2Fphpxmlrpc%2Fsrc%2FHelper%2FXMLParser.php;h=f6e79a9a0fa62fc02c88883e5384e2c39e688ed3;hb=c379c0fff5edc592cc5d5d647c7fadb91317db87;hp=b7d137f83f7b2b04c40e3095c610625802f31b94;hpb=ae8b10f8363f7a1df02e77cbd820904c4ded10b8;p=plcapi.git diff --git a/php/phpxmlrpc/src/Helper/XMLParser.php b/php/phpxmlrpc/src/Helper/XMLParser.php index b7d137f..f6e79a9 100644 --- a/php/phpxmlrpc/src/Helper/XMLParser.php +++ b/php/phpxmlrpc/src/Helper/XMLParser.php @@ -7,30 +7,48 @@ use PhpXmlRpc\Value; /** * Deals with parsing the XML. + * @see http://xmlrpc.com/spec.md + * + * @todo implement an interface to allow for alternative implementations + * - make access to $_xh protected, return more high-level data structures + * - add parseRequest, parseResponse, parseValue methods + * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding + * while parsing, which is faster than doing it later by going over the rebuilt data structure */ class XMLParser { - // used to store state during parsing - // quick explanation of components: - // ac - used to accumulate values - // stack - array with genealogy of xml elements names: - // used to validate nesting of xmlrpc elements - // valuestack - array used for parsing arrays and structs - // lv - used to indicate "looking for a value": implements - // the logic to allow values with no types to be strings - // isf - used to indicate a parsing fault (2) or xmlrpc response fault (1) - // isf_reason - used for storing xmlrpc response fault string - // method - used to store method name - // params - used to store parameters in method calls - // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values - // rt - 'methodcall or 'methodresponse' + const RETURN_XMLRPCVALS = 'xmlrpcvals'; + const RETURN_EPIVALS = 'epivals'; + const RETURN_PHP = 'phpvals'; + + const ACCEPT_REQUEST = 1; + const ACCEPT_RESPONSE = 2; + const ACCEPT_VALUE = 4; + const ACCEPT_FAULT = 8; + + // Used to store state during parsing and to pass parsing results to callers. + // Quick explanation of components: + // private: + // ac - used to accumulate values + // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements + // valuestack - array used for parsing arrays and structs + // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings + // public: + // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) + // isf_reason - used for storing xmlrpc response fault string + // value - used to store the value in responses + // method - used to store method name in requests + // params - used to store parameters in requests + // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values + // rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) public $_xh = array( 'ac' => '', 'stack' => array(), 'valuestack' => array(), 'isf' => 0, 'isf_reason' => '', - 'method' => false, // so we can check later if we got a methodname or not + 'value' => null, + 'method' => false, 'params' => array(), 'pt' => array(), 'rt' => '', @@ -60,27 +78,132 @@ class XMLParser 'EX:NIL' => array('VALUE'), // only used when extension activated ); + /** @var array $parsing_options */ + protected $parsing_options = array(); + /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ + protected $accept = 3; + /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ + protected $maxChunkLength = 4194304; + + /** + * @param array $options passed to the xml parser + */ + public function __construct(array $options = array()) + { + $this->parsing_options = $options; + } + + /** + * @param string $data + * @param string $returnType + * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE + * @param array $options + */ + public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) + { + $this->_xh = array( + 'ac' => '', + 'stack' => array(), + 'valuestack' => array(), + 'isf' => 0, + 'isf_reason' => '', + 'value' => null, + 'method' => false, // so we can check later if we got a methodname or not + 'params' => array(), + 'pt' => array(), + 'rt' => '', + ); + + $len = strlen($data); + + // we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below + if ($len == 0) { + $this->_xh['isf'] = 3; + $this->_xh['isf_reason'] = 'XML error 5: empty document'; + return; + } + + $parser = xml_parser_create(); + + foreach ($this->parsing_options as $key => $val) { + xml_parser_set_option($parser, $key, $val); + } + foreach ($options as $key => $val) { + xml_parser_set_option($parser, $key, $val); + } + // always set this, in case someone tries to disable it via options... + xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); + + xml_set_object($parser, $this); + + switch($returnType) { + case self::RETURN_PHP: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); + break; + case self::RETURN_EPIVALS: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi'); + break; + default: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); + } + + xml_set_character_data_handler($parser, 'xmlrpc_cd'); + xml_set_default_handler($parser, 'xmlrpc_dh'); + + $this->accept = $accept; + + // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors + for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { + $chunk = substr($data, $offset, $this->maxChunkLength); + // error handling: xml not well formed + if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { + $errCode = xml_get_error_code($parser); + $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), + xml_get_current_line_number($parser), xml_get_current_column_number($parser)); + + $this->_xh['isf'] = 3; + $this->_xh['isf_reason'] = $errStr; + break; + } + } + + xml_parser_free($parser); + } + /** * xml parser handler function for opening element tags. + * @internal + * @param resource $parser + * @param string $name + * @param $attrs + * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead */ public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) { // if invalid xmlrpc already detected, skip all processing if ($this->_xh['isf'] < 2) { + // check for correct element nesting - // top level element can only be of 2 types - /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: - /// there is only a single top level element in xml anyway if (count($this->_xh['stack']) == 0) { - if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && ( - $name != 'VALUE' && !$acceptSingleVals) - ) { + // top level element can only be of 2 types + /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: + /// there is only a single top level element in xml anyway + // BC + if ($acceptSingleVals === false) { + $accept = $this->accept; + } else { + $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; + } + if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || + ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || + ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || + ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { + $this->_xh['rt'] = strtolower($name); + } else { $this->_xh['isf'] = 2; - $this->_xh['isf_reason'] = 'missing top level xmlrpc element'; + $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; return; - } else { - $this->_xh['rt'] = strtolower($name); } } else { // not top level element: see if parent is OK @@ -105,13 +228,13 @@ class XMLParser case 'I8': case 'EX:I8': if (PHP_INT_SIZE === 4) { - /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! + // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; return; } - // fall through voluntarily + // fall through voluntarily case 'I4': case 'INT': case 'STRING': @@ -131,7 +254,7 @@ class XMLParser case 'STRUCT': case 'ARRAY': if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -151,7 +274,7 @@ class XMLParser break; case 'DATA': if ($this->_xh['vt'] != 'data') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found two data elements inside an array element"; @@ -171,7 +294,8 @@ class XMLParser $this->_xh['isf'] = 1; break; case 'MEMBER': - $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on + // set member name to null, in case we do not find in the xml later on + $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; //$this->_xh['ac']=''; // Drop trough intentionally case 'PARAM': @@ -182,7 +306,7 @@ class XMLParser case 'EX:NIL': if (PhpXmlRpc::$xmlrpc_null_extension) { if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -194,7 +318,7 @@ class XMLParser // we do not support the extension, so // drop through intentionally default: - /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! + // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; break; @@ -211,7 +335,12 @@ class XMLParser } /** - * Used in decoding xml chunks that might represent single xmlrpc values. + * xml parser handler function for opening element tags. + * Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses. + * @deprecated + * @param resource $parser + * @param $name + * @param $attrs */ public function xmlrpc_se_any($parser, $name, $attrs) { @@ -220,8 +349,12 @@ class XMLParser /** * xml parser handler function for close element tags. + * @internal + * @param resource $parser + * @param string $name + * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility */ - public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true) + public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) { if ($this->_xh['isf'] < 2) { // push this element name from stack @@ -238,7 +371,7 @@ class XMLParser $this->_xh['vt'] = Value::$xmlrpcString; } - if ($rebuildXmlrpcvals) { + if ($rebuildXmlrpcvals > 0) { // build the xmlrpc val out of the data received, and substitute it $temp = new Value($this->_xh['value'], $this->_xh['vt']); // in case we got info about underlying php class, save it @@ -246,27 +379,33 @@ class XMLParser if (isset($this->_xh['php_class'])) { $temp->_php_class = $this->_xh['php_class']; } - // check if we are inside an array or struct: - // if value just built is inside an array, let's move it into array on the stack - $vscount = count($this->_xh['valuestack']); - if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { - $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp; - } else { - $this->_xh['value'] = $temp; + $this->_xh['value'] = $temp; + } elseif ($rebuildXmlrpcvals < 0) { + if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { + $this->_xh['value'] = (object)array( + 'xmlrpc_type' => 'datetime', + 'scalar' => $this->_xh['value'], + 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) + ); + } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { + $this->_xh['value'] = (object)array( + 'xmlrpc_type' => 'base64', + 'scalar' => $this->_xh['value'] + ); } } else { - /// @todo this needs to treat correctly php-serialized objects, + /// @todo this should handle php-serialized objects, /// since std deserializing is done by php_xmlrpc_decode, /// which we will not be calling... - if (isset($this->_xh['php_class'])) { - } + //if (isset($this->_xh['php_class'])) { + //} + } - // check if we are inside an array or struct: - // if value just built is inside an array, let's move it into array on the stack - $vscount = count($this->_xh['valuestack']); - if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { - $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; - } + // check if we are inside an array or struct: + // if value just built is inside an array, let's move it into array on the stack + $vscount = count($this->_xh['valuestack']); + if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { + $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; } break; case 'BOOLEAN': @@ -285,7 +424,7 @@ class XMLParser $this->_xh['value'] = $this->_xh['ac']; } elseif ($name == 'DATETIME.ISO8601') { if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) { - error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); } $this->_xh['vt'] = Value::$xmlrpcDateTime; $this->_xh['value'] = $this->_xh['ac']; @@ -304,7 +443,7 @@ class XMLParser } else { // log if receiving something strange, even though we set the value to false anyway if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { - error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); } $this->_xh['value'] = false; } @@ -314,7 +453,7 @@ class XMLParser // NOTE: regexp could be much stricter than this... if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { /// @todo: find a better way of throwing an error than this! - error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on @@ -325,7 +464,7 @@ class XMLParser // we must check that only 0123456789- are characters here if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { /// @todo find a better way of throwing an error than this! - error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on @@ -344,7 +483,7 @@ class XMLParser $vscount = count($this->_xh['valuestack']); $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; } else { - error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); } break; case 'DATA': @@ -367,7 +506,7 @@ class XMLParser $this->_xh['params'][] = $this->_xh['value']; $this->_xh['pt'][] = $this->_xh['vt']; } else { - error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); } break; case 'METHODNAME': @@ -397,14 +536,31 @@ class XMLParser /** * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values. + * @internal + * @param resource $parser + * @param string $name */ public function xmlrpc_ee_fast($parser, $name) { - $this->xmlrpc_ee($parser, $name, false); + $this->xmlrpc_ee($parser, $name, 0); + } + + /** + * Used in decoding xmlrpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime). + * @internal + * @param resource $parser + * @param string $name + */ + public function xmlrpc_ee_epi($parser, $name) + { + $this->xmlrpc_ee($parser, $name, -1); } /** * xml parser handler function for character data. + * @internal + * @param resource $parser + * @param string $data */ public function xmlrpc_cd($parser, $data) { @@ -421,6 +577,9 @@ class XMLParser /** * xml parser handler function for 'other stuff', ie. not char data or * element start/end tag. In fact it only gets called on unknown entities... + * @internal + * @param $parser + * @param string data */ public function xmlrpc_dh($parser, $data) { @@ -431,7 +590,7 @@ class XMLParser } } - return true; + //return true; } /**