From 86c373e86d8c6f5612e3283db65fb340a65bfe19 Mon Sep 17 00:00:00 2001 From: gggeek Date: Mon, 29 Jul 2019 11:37:32 +0000 Subject: [PATCH] WIP fix #71: allow parsing of large xml files --- NEWS | 7 +- demo/client/agesort.php | 2 +- src/Encoder.php | 27 ++----- src/Helper/Logger.php | 2 +- src/Helper/XMLParser.php | 163 +++++++++++++++++++++++++++++++++------ src/Request.php | 60 +++++--------- src/Server.php | 56 ++++++-------- 7 files changed, 193 insertions(+), 124 deletions(-) diff --git a/NEWS b/NEWS index fa06c12b..11c82835 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,11 @@ +XML-RPC for PHP version 4.4.0 - 2019/xx/yy + +* fixed: allow handling huge xml messages (>=10MB) + + XML-RPC for PHP version 4.3.2 - 2019/5/27 -* fixed: remove one php 7.2 warning when using the v3 compatibility layer +* fixed: remove one php 7.2 warning when using the v3 api * improved: the Travis tests are now run with all php versions from 5.6 to 7.3. We dropped tests with php 5.3, 5.4 and 5.5 diff --git a/demo/client/agesort.php b/demo/client/agesort.php index 90622d21..60cc4b80 100644 --- a/demo/client/agesort.php +++ b/demo/client/agesort.php @@ -15,7 +15,7 @@ PhpXmlRpc\Autoloader::register(); $inAr = array("Dave" => 24, "Edd" => 45, "Joe" => 37, "Fred" => 27); print "This is the input data:
";
-foreach($inAr as $key => $val) {
+foreach ($inAr as $key => $val) {
     print $key . ", " . $val . "\n";
 }
 print "
"; diff --git a/src/Encoder.php b/src/Encoder.php index ebf3060d..ab413f44 100644 --- a/src/Encoder.php +++ b/src/Encoder.php @@ -257,32 +257,16 @@ class Encoder } } - $parser = xml_parser_create(); - xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); - // What if internal encoding is not in one of the 3 allowed? - // we use the broadest one, ie. utf8! + // What if internal encoding is not in one of the 3 allowed? We use the broadest one, ie. utf8! if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, 'UTF-8'); + $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8'); } else { - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, PhpXmlRpc::$xmlrpc_internalencoding); + $options = array(XML_OPTION_TARGET_ENCODING => PhpXmlRpc::$xmlrpc_internalencoding); } - $xmlRpcParser = new XMLParser(); - xml_set_object($parser, $xmlRpcParser); + $xmlRpcParser = new XMLParser($options); + $xmlRpcParser->parse($xmlVal, XMLParser::RETURN_XMLRPCVALS, XMLParser::ACCEPT_REQUEST | XMLParser::ACCEPT_RESPONSE | XMLParser::ACCEPT_VALUE); - xml_set_element_handler($parser, 'xmlrpc_se_any', 'xmlrpc_ee'); - xml_set_character_data_handler($parser, 'xmlrpc_cd'); - xml_set_default_handler($parser, 'xmlrpc_dh'); - if (!xml_parse($parser, $xmlVal, 1)) { - $errstr = sprintf('XML error: %s at line %d, column %d', - xml_error_string(xml_get_error_code($parser)), - xml_get_current_line_number($parser), xml_get_current_column_number($parser)); - error_log($errstr); - xml_parser_free($parser); - - return false; - } - xml_parser_free($parser); if ($xmlRpcParser->_xh['isf'] > 1) { // test that $xmlrpc->_xh['value'] is an obj, too??? @@ -290,6 +274,7 @@ class Encoder return false; } + switch ($xmlRpcParser->_xh['rt']) { case 'methodresponse': $v = $xmlRpcParser->_xh['value']; diff --git a/src/Helper/Logger.php b/src/Helper/Logger.php index 85353ebf..56f633d2 100644 --- a/src/Helper/Logger.php +++ b/src/Helper/Logger.php @@ -28,7 +28,7 @@ class Logger * @param string $message * @param string $encoding */ - public function debugMessage($message, $encoding=null) + public function debugMessage($message, $encoding = null) { // US-ASCII is a warning for PHP and a fatal for HHVM if ($encoding == 'US-ASCII') { diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index b7d137f8..56c51774 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -10,27 +10,34 @@ use PhpXmlRpc\Value; */ class XMLParser { - // used to store state during parsing - // quick explanation of components: + const RETURN_XMLRPCVALS = 'xmlrpcvals'; + const RETURN_PHP = 'phpvals'; + + const ACCEPT_REQUEST = 1; + const ACCEPT_RESPONSE = 2; + const ACCEPT_VALUE = 4; + + // Used to store state during parsing. + // Quick explanation of components: + // private: // ac - used to accumulate values - // stack - array with genealogy of xml elements names: - // used to validate nesting of xmlrpc elements + // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements // valuestack - array used for parsing arrays and structs - // lv - used to indicate "looking for a value": implements - // the logic to allow values with no types to be strings - // isf - used to indicate a parsing fault (2) or xmlrpc response fault (1) + // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings + // public: + // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) // isf_reason - used for storing xmlrpc response fault string // method - used to store method name // params - used to store parameters in method calls // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values - // rt - 'methodcall or 'methodresponse' + // rt - 'methodcall', 'methodresponse' or 'value' public $_xh = array( 'ac' => '', 'stack' => array(), 'valuestack' => array(), 'isf' => 0, 'isf_reason' => '', - 'method' => false, // so we can check later if we got a methodname or not + 'method' => false, 'params' => array(), 'pt' => array(), 'rt' => '', @@ -60,27 +67,118 @@ class XMLParser 'EX:NIL' => array('VALUE'), // only used when extension activated ); + /** @var array $parsing_options */ + protected $parsing_options = array(); + /** @var int $accept */ + protected $accept = 3; // self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE; + /** @var int $maxChunkLength */ + protected $maxChunkLength = 4194304; // 4 MB + + /** + * @param array $options passed to the xml parser + */ + public function __construct(array $options = array()) + { + $this->parsing_options = $options; + } + + /** + * @param array $options passed to the xml parser + */ + public function setParsingOptions(array $options) + { + $this->parsing_options = $options; + } + + /** + * @param string $data + * @param string $returnType + * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE + * @return string + */ + public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3) + { + $parser = xml_parser_create(); + + foreach ($this->parsing_options as $key => $val) { + xml_parser_set_option($parser, $key, $val); + } + + xml_set_object($parser, $this); + + if ($returnType == self::RETURN_PHP) { + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); + } else { + xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); + } + + xml_set_character_data_handler($parser, 'xmlrpc_cd'); + xml_set_default_handler($parser, 'xmlrpc_dh'); + + $this->accept = $accept; + + $this->_xh = array( + 'ac' => '', + 'stack' => array(), + 'valuestack' => array(), + 'isf' => 0, + 'isf_reason' => '', + 'method' => false, // so we can check later if we got a methodname or not + 'params' => array(), + 'pt' => array(), + 'rt' => '', + ); + + $len = strlen($data); + for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { + $chunk = substr($data, $offset, $this->maxChunkLength); + // error handling: xml not well formed + if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { + $errCode = xml_get_error_code($parser); + $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), + xml_get_current_line_number($parser), xml_get_current_column_number($parser)); + + $this->_xh['isf'] = 3; + $this->_xh['isf_reason'] = $errStr; + break; + } + } + + xml_parser_free($parser); + } + /** * xml parser handler function for opening element tags. + * @param resource $parser + * @param string $name + * @param $attrs + * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead */ public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) { // if invalid xmlrpc already detected, skip all processing if ($this->_xh['isf'] < 2) { + // check for correct element nesting - // top level element can only be of 2 types - /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: - /// there is only a single top level element in xml anyway if (count($this->_xh['stack']) == 0) { - if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && ( - $name != 'VALUE' && !$acceptSingleVals) - ) { + // top level element can only be of 2 types + /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: + /// there is only a single top level element in xml anyway + // BC + if ($acceptSingleVals === false) { + $accept = $this->accept; + } else { + $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; + } + if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || + ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || + ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE))) { + $this->_xh['rt'] = strtolower($name); + } else { $this->_xh['isf'] = 2; - $this->_xh['isf_reason'] = 'missing top level xmlrpc element'; + $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; return; - } else { - $this->_xh['rt'] = strtolower($name); } } else { // not top level element: see if parent is OK @@ -105,7 +203,7 @@ class XMLParser case 'I8': case 'EX:I8': if (PHP_INT_SIZE === 4) { - /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! + // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; @@ -131,7 +229,7 @@ class XMLParser case 'STRUCT': case 'ARRAY': if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -151,7 +249,7 @@ class XMLParser break; case 'DATA': if ($this->_xh['vt'] != 'data') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found two data elements inside an array element"; @@ -171,7 +269,8 @@ class XMLParser $this->_xh['isf'] = 1; break; case 'MEMBER': - $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on + // set member name to null, in case we do not find in the xml later on + $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; //$this->_xh['ac']=''; // Drop trough intentionally case 'PARAM': @@ -182,7 +281,7 @@ class XMLParser case 'EX:NIL': if (PhpXmlRpc::$xmlrpc_null_extension) { if ($this->_xh['vt'] != 'value') { - //two data elements inside a value: an error occurred! + // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; @@ -211,7 +310,12 @@ class XMLParser } /** - * Used in decoding xml chunks that might represent single xmlrpc values. + * xml parser handler function for opening element tags. + * Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses. + * @deprecated + * @param resource $parser + * @param $name + * @param $attrs */ public function xmlrpc_se_any($parser, $name, $attrs) { @@ -220,6 +324,9 @@ class XMLParser /** * xml parser handler function for close element tags. + * @param resource $parser + * @param string $name + * @param bool $rebuildXmlrpcvals */ public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true) { @@ -397,6 +504,8 @@ class XMLParser /** * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values. + * @param resource $parser + * @param string $name */ public function xmlrpc_ee_fast($parser, $name) { @@ -405,6 +514,8 @@ class XMLParser /** * xml parser handler function for character data. + * @param resource $parser + * @param string $data */ public function xmlrpc_cd($parser, $data) { @@ -421,6 +532,8 @@ class XMLParser /** * xml parser handler function for 'other stuff', ie. not char data or * element start/end tag. In fact it only gets called on unknown entities... + * @param $parser + * @param string data */ public function xmlrpc_dh($parser, $data) { @@ -431,7 +544,7 @@ class XMLParser } } - return true; + //return true; } /** diff --git a/src/Request.php b/src/Request.php index b3b8112a..e3ca0484 100644 --- a/src/Request.php +++ b/src/Request.php @@ -150,18 +150,18 @@ class Request * because we cannot trust the caller to give us a valid pointer to an open file... * * @param resource $fp stream pointer + * @param bool $headersProcessed + * @param string $returnType * * @return Response - * - * @todo add 2nd & 3rd param to be passed to ParseResponse() ??? */ - public function parseResponseFile($fp) + public function parseResponseFile($fp, $headersProcessed = false, $returnType = 'xmlrpcvals') { $ipd = ''; while ($data = fread($fp, 32768)) { $ipd .= $data; } - return $this->parseResponse($ipd); + return $this->parseResponse($ipd, $headersProcessed, $returnType); } /** @@ -261,57 +261,33 @@ class Request } } - $parser = xml_parser_create(); - xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); - // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell - // the xml parser to give us back data in the expected charset. - // What if internal encoding is not in one of the 3 allowed? - // we use the broadest one, ie. utf8 + // PHP internally might use ISO-8859-1, so we have to tell the xml parser to give us back data in the expected charset. + // What if internal encoding is not in one of the 3 allowed? We use the broadest one, ie. utf8 // This allows to send data which is native in various charset, // by extending xmlrpc_encode_entities() and setting xmlrpc_internalencoding if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, 'UTF-8'); + $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8'); } else { - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, PhpXmlRpc::$xmlrpc_internalencoding); + $options = array(XML_OPTION_TARGET_ENCODING => PhpXmlRpc::$xmlrpc_internalencoding); } - $xmlRpcParser = new XMLParser(); - xml_set_object($parser, $xmlRpcParser); + $xmlRpcParser = new XMLParser($options); + $xmlRpcParser->parse($data, $returnType, XMLParser::ACCEPT_RESPONSE); - if ($returnType == 'phpvals') { - xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); - } else { - xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); - } + // first error check: xml not well formed + if ($xmlRpcParser->_xh['isf'] > 2) { - xml_set_character_data_handler($parser, 'xmlrpc_cd'); - xml_set_default_handler($parser, 'xmlrpc_dh'); + // BC break: in the past for some cases we used the error message: 'XML error at line 1, check URL' + + $r = new Response(0, PhpXmlRpc::$xmlrpcerr['invalid_return'], + PhpXmlRpc::$xmlrpcstr['invalid_return'] . ' ' . $xmlRpcParser->_xh['isf_reason']); - // first error check: xml not well formed - if (!xml_parse($parser, $data, 1)) { - // thanks to Peter Kocks - if ((xml_get_current_line_number($parser)) == 1) { - $errStr = 'XML error at line 1, check URL'; - } else { - $errStr = sprintf('XML error: %s at line %d, column %d', - xml_error_string(xml_get_error_code($parser)), - xml_get_current_line_number($parser), xml_get_current_column_number($parser)); - } - error_log($errStr); - $r = new Response(0, PhpXmlRpc::$xmlrpcerr['invalid_return'], PhpXmlRpc::$xmlrpcstr['invalid_return'] . ' ' . $errStr); - xml_parser_free($parser); if ($this->debug) { - print $errStr; + print $xmlRpcParser->_xh['isf_reason']; } - $r->hdrs = $this->httpResponse['headers']; - $r->_cookies = $this->httpResponse['cookies']; - $r->raw_data = $this->httpResponse['raw_data']; - - return $r; } - xml_parser_free($parser); // second error check: xml well formed but not xml-rpc compliant - if ($xmlRpcParser->_xh['isf'] > 1) { + elseif ($xmlRpcParser->_xh['isf'] == 2) { if ($this->debug) { /// @todo echo something for user? } diff --git a/src/Server.php b/src/Server.php index 49a54836..9738e868 100644 --- a/src/Server.php +++ b/src/Server.php @@ -155,6 +155,9 @@ class Server static::$_xmlrpc_debuginfo .= $msg . "\n"; } + /** + * @param string $msg + */ public static function error_occurred($msg) { static::$_xmlrpcs_occurred_errors .= $msg . "\n"; @@ -453,7 +456,7 @@ class Server $reqEncoding = XMLParser::guessEncoding(isset($_SERVER['CONTENT_TYPE']) ? $_SERVER['CONTENT_TYPE'] : '', $data); - return; + return null; } /** @@ -490,50 +493,38 @@ class Server } } - $parser = xml_parser_create(); - xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); - // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell - // the xml parser to give us back data in the expected charset - // What if internal encoding is not in one of the 3 allowed? - // we use the broadest one, ie. utf8 + // PHP internally might use ISO-8859-1, so we have to tell the xml parser to give us back data in the expected charset. + // What if internal encoding is not in one of the 3 allowed? We use the broadest one, ie. utf8 // This allows to send data which is native in various charset, // by extending xmlrpc_encode_entities() and setting xmlrpc_internalencoding if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, 'UTF-8'); + $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8'); } else { - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, PhpXmlRpc::$xmlrpc_internalencoding); + $options = array(XML_OPTION_TARGET_ENCODING => PhpXmlRpc::$xmlrpc_internalencoding); } - $xmlRpcParser = new XMLParser(); - xml_set_object($parser, $xmlRpcParser); - - if ($this->functions_parameters_type != 'xmlrpcvals') { - xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); - } else { - xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); - } - xml_set_character_data_handler($parser, 'xmlrpc_cd'); - xml_set_default_handler($parser, 'xmlrpc_dh'); - if (!xml_parse($parser, $data, 1)) { - // return XML error as a faultCode + $xmlRpcParser = new XMLParser($options); + $xmlRpcParser->parse($data, $this->functions_parameters_type, XMLParser::ACCEPT_REQUEST); + if ($xmlRpcParser->_xh['isf'] > 2) { + // (BC) we return XML error as a faultCode + preg_match('/XML error [0-9]+/', $xmlRpcParser->_xh['isf_reason'], $matches); $r = new Response(0, - PhpXmlRpc::$xmlrpcerrxml + xml_get_error_code($parser), - sprintf('XML error: %s at line %d, column %d', - xml_error_string(xml_get_error_code($parser)), - xml_get_current_line_number($parser), xml_get_current_column_number($parser))); - xml_parser_free($parser); + PhpXmlRpc::$xmlrpcerrxml + $matches[1], + $xmlRpcParser->_xh['isf_reason']); } elseif ($xmlRpcParser->_xh['isf']) { - xml_parser_free($parser); $r = new Response(0, PhpXmlRpc::$xmlrpcerr['invalid_request'], PhpXmlRpc::$xmlrpcstr['invalid_request'] . ' ' . $xmlRpcParser->_xh['isf_reason']); } else { - xml_parser_free($parser); // small layering violation in favor of speed and memory usage: // we should allow the 'execute' method handle this, but in the // most common scenario (xmlrpc values type server with some methods // registered as phpvals) that would mean a useless encode+decode pass - if ($this->functions_parameters_type != 'xmlrpcvals' || (isset($this->dmap[$xmlRpcParser->_xh['method']]['parameters_type']) && ($this->dmap[$xmlRpcParser->_xh['method']]['parameters_type'] == 'phpvals'))) { + if ($this->functions_parameters_type != 'xmlrpcvals' || + (isset($this->dmap[$xmlRpcParser->_xh['method']]['parameters_type']) && + ($this->dmap[$xmlRpcParser->_xh['method']]['parameters_type'] == 'phpvals') + ) + ) { if ($this->debug > 1) { $this->debugmsg("\n+++PARSED+++\n" . var_export($xmlRpcParser->_xh['params'], true) . "\n+++END+++"); } @@ -838,10 +829,10 @@ class Server /** * @param Server $server - * @param Request $req + * @param Request $req if called in plain php values mode, second param is missing * @return Response */ - public static function _xmlrpcs_listMethods($server, $req = null) // if called in plain php values mode, second param is missing + public static function _xmlrpcs_listMethods($server, $req = null) { $outAr = array(); foreach ($server->dmap as $key => $val) { @@ -1022,7 +1013,6 @@ class Server // this is a real dirty and simplistic hack, since we might have received a // base64 or datetime values, but they will be listed as strings here... - $numParams = count($call['params']); $pt = array(); $wrapper = new Wrapper(); foreach ($call['params'] as $val) { @@ -1040,7 +1030,7 @@ class Server /** * @param Server $server - * @param Request $req + * @param Request|array $req * @return Response */ public static function _xmlrpcs_multicall($server, $req) -- 2.47.0