X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2FHelper%2FXMLParser.php;h=f6e79a9a0fa62fc02c88883e5384e2c39e688ed3;hb=b337d292eb5b5656d27a2fc9ab6796be300c59a3;hp=56c5177415d1010257eaec38504f02b0ed678100;hpb=1eeaee97ebed903e581849ba2e4cc38a22014d6e;p=plcapi.git diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index 56c5177..f6e79a9 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -7,36 +7,47 @@ use PhpXmlRpc\Value; /** * Deals with parsing the XML. + * @see http://xmlrpc.com/spec.md + * + * @todo implement an interface to allow for alternative implementations + * - make access to $_xh protected, return more high-level data structures + * - add parseRequest, parseResponse, parseValue methods + * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding + * while parsing, which is faster than doing it later by going over the rebuilt data structure */ class XMLParser { const RETURN_XMLRPCVALS = 'xmlrpcvals'; + const RETURN_EPIVALS = 'epivals'; const RETURN_PHP = 'phpvals'; const ACCEPT_REQUEST = 1; const ACCEPT_RESPONSE = 2; const ACCEPT_VALUE = 4; + const ACCEPT_FAULT = 8; - // Used to store state during parsing. + // Used to store state during parsing and to pass parsing results to callers. // Quick explanation of components: // private: - // ac - used to accumulate values - // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements - // valuestack - array used for parsing arrays and structs - // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings + // ac - used to accumulate values + // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements + // valuestack - array used for parsing arrays and structs + // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings // public: - // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) - // isf_reason - used for storing xmlrpc response fault string - // method - used to store method name - // params - used to store parameters in method calls - // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values - // rt - 'methodcall', 'methodresponse' or 'value' + // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) + // isf_reason - used for storing xmlrpc response fault string + // value - used to store the value in responses + // method - used to store method name in requests + // params - used to store parameters in requests + // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values + // rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) public $_xh = array( 'ac' => '', 'stack' => array(), 'valuestack' => array(), 'isf' => 0, 'isf_reason' => '', + 'value' => null, 'method' => false, 'params' => array(), 'pt' => array(), @@ -69,10 +80,10 @@ class XMLParser /** @var array $parsing_options */ protected $parsing_options = array(); - /** @var int $accept */ - protected $accept = 3; // self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE; - /** @var int $maxChunkLength */ - protected $maxChunkLength = 4194304; // 4 MB + /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ + protected $accept = 3; + /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ + protected $maxChunkLength = 4194304; /** * @param array $options passed to the xml parser @@ -82,34 +93,58 @@ class XMLParser $this->parsing_options = $options; } - /** - * @param array $options passed to the xml parser - */ - public function setParsingOptions(array $options) - { - $this->parsing_options = $options; - } - /** * @param string $data * @param string $returnType * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE - * @return string + * @param array $options */ - public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3) + public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) { + $this->_xh = array( + 'ac' => '', + 'stack' => array(), + 'valuestack' => array(), + 'isf' => 0, + 'isf_reason' => '', + 'value' => null, + 'method' => false, // so we can check later if we got a methodname or not + 'params' => array(), + 'pt' => array(), + 'rt' => '', + ); + + $len = strlen($data); + + // we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below + if ($len == 0) { + $this->_xh['isf'] = 3; + $this->_xh['isf_reason'] = 'XML error 5: empty document'; + return; + } + $parser = xml_parser_create(); foreach ($this->parsing_options as $key => $val) { xml_parser_set_option($parser, $key, $val); } + foreach ($options as $key => $val) { + xml_parser_set_option($parser, $key, $val); + } + // always set this, in case someone tries to disable it via options... + xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); xml_set_object($parser, $this); - if ($returnType == self::RETURN_PHP) { - xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); - } else { - xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); + switch($returnType) { + case self::RETURN_PHP: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); + break; + case self::RETURN_EPIVALS: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi'); + break; + default: + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); } xml_set_character_data_handler($parser, 'xmlrpc_cd'); @@ -117,19 +152,7 @@ class XMLParser $this->accept = $accept; - $this->_xh = array( - 'ac' => '', - 'stack' => array(), - 'valuestack' => array(), - 'isf' => 0, - 'isf_reason' => '', - 'method' => false, // so we can check later if we got a methodname or not - 'params' => array(), - 'pt' => array(), - 'rt' => '', - ); - - $len = strlen($data); + // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { $chunk = substr($data, $offset, $this->maxChunkLength); // error handling: xml not well formed @@ -149,6 +172,7 @@ class XMLParser /** * xml parser handler function for opening element tags. + * @internal * @param resource $parser * @param string $name * @param $attrs @@ -172,7 +196,8 @@ class XMLParser } if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || - ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE))) { + ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || + ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { $this->_xh['rt'] = strtolower($name); } else { $this->_xh['isf'] = 2; @@ -209,7 +234,7 @@ class XMLParser return; } - // fall through voluntarily + // fall through voluntarily case 'I4': case 'INT': case 'STRING': @@ -293,7 +318,7 @@ class XMLParser // we do not support the extension, so // drop through intentionally default: - /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! + // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; break; @@ -324,11 +349,12 @@ class XMLParser /** * xml parser handler function for close element tags. + * @internal * @param resource $parser * @param string $name - * @param bool $rebuildXmlrpcvals + * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility */ - public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true) + public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) { if ($this->_xh['isf'] < 2) { // push this element name from stack @@ -345,7 +371,7 @@ class XMLParser $this->_xh['vt'] = Value::$xmlrpcString; } - if ($rebuildXmlrpcvals) { + if ($rebuildXmlrpcvals > 0) { // build the xmlrpc val out of the data received, and substitute it $temp = new Value($this->_xh['value'], $this->_xh['vt']); // in case we got info about underlying php class, save it @@ -353,27 +379,33 @@ class XMLParser if (isset($this->_xh['php_class'])) { $temp->_php_class = $this->_xh['php_class']; } - // check if we are inside an array or struct: - // if value just built is inside an array, let's move it into array on the stack - $vscount = count($this->_xh['valuestack']); - if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { - $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp; - } else { - $this->_xh['value'] = $temp; + $this->_xh['value'] = $temp; + } elseif ($rebuildXmlrpcvals < 0) { + if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { + $this->_xh['value'] = (object)array( + 'xmlrpc_type' => 'datetime', + 'scalar' => $this->_xh['value'], + 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) + ); + } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { + $this->_xh['value'] = (object)array( + 'xmlrpc_type' => 'base64', + 'scalar' => $this->_xh['value'] + ); } } else { - /// @todo this needs to treat correctly php-serialized objects, + /// @todo this should handle php-serialized objects, /// since std deserializing is done by php_xmlrpc_decode, /// which we will not be calling... - if (isset($this->_xh['php_class'])) { - } + //if (isset($this->_xh['php_class'])) { + //} + } - // check if we are inside an array or struct: - // if value just built is inside an array, let's move it into array on the stack - $vscount = count($this->_xh['valuestack']); - if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { - $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; - } + // check if we are inside an array or struct: + // if value just built is inside an array, let's move it into array on the stack + $vscount = count($this->_xh['valuestack']); + if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { + $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; } break; case 'BOOLEAN': @@ -392,7 +424,7 @@ class XMLParser $this->_xh['value'] = $this->_xh['ac']; } elseif ($name == 'DATETIME.ISO8601') { if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) { - error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); } $this->_xh['vt'] = Value::$xmlrpcDateTime; $this->_xh['value'] = $this->_xh['ac']; @@ -411,7 +443,7 @@ class XMLParser } else { // log if receiving something strange, even though we set the value to false anyway if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { - error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); } $this->_xh['value'] = false; } @@ -421,7 +453,7 @@ class XMLParser // NOTE: regexp could be much stricter than this... if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { /// @todo: find a better way of throwing an error than this! - error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on @@ -432,7 +464,7 @@ class XMLParser // we must check that only 0123456789- are characters here if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { /// @todo find a better way of throwing an error than this! - error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on @@ -451,7 +483,7 @@ class XMLParser $vscount = count($this->_xh['valuestack']); $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; } else { - error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); } break; case 'DATA': @@ -474,7 +506,7 @@ class XMLParser $this->_xh['params'][] = $this->_xh['value']; $this->_xh['pt'][] = $this->_xh['vt']; } else { - error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); + Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); } break; case 'METHODNAME': @@ -504,16 +536,29 @@ class XMLParser /** * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values. + * @internal * @param resource $parser * @param string $name */ public function xmlrpc_ee_fast($parser, $name) { - $this->xmlrpc_ee($parser, $name, false); + $this->xmlrpc_ee($parser, $name, 0); + } + + /** + * Used in decoding xmlrpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime). + * @internal + * @param resource $parser + * @param string $name + */ + public function xmlrpc_ee_epi($parser, $name) + { + $this->xmlrpc_ee($parser, $name, -1); } /** * xml parser handler function for character data. + * @internal * @param resource $parser * @param string $data */ @@ -532,6 +577,7 @@ class XMLParser /** * xml parser handler function for 'other stuff', ie. not char data or * element start/end tag. In fact it only gets called on unknown entities... + * @internal * @param $parser * @param string data */