From 8614a7f979e3166bff518a92240c4feae0c08ca9 Mon Sep 17 00:00:00 2001 From: gggeek <giunta.gaetano@gmail.com> Date: Mon, 16 Jan 2023 14:54:34 +0000 Subject: [PATCH] convert data to non-utf8 internalencodings if possible --- src/Helper/XMLParser.php | 106 ++++++++++++++++++++++++++++++--------- src/Request.php | 14 ++---- src/Server.php | 20 +++----- 3 files changed, 95 insertions(+), 45 deletions(-) diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index 98b94297..a931e664 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -11,8 +11,7 @@ use PhpXmlRpc\Value; * * @todo implement an interface to allow for alternative implementations * - make access to $_xh protected, return more high-level data structures - * - move $this->accept, $this->callbacks to an internal-use parsing-options config, along with the private - * parts of $_xh + * - move the private parts of $_xh to the internal-use parsing-options config * - add parseRequest, parseResponse, parseValue methods * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding * while parsing, which is faster than doing it later by going over the rebuilt data structure @@ -91,14 +90,15 @@ class XMLParser 'EX:NIL' => array('VALUE'), // only used when extension activated ); - /** @var int[] $parsing_options */ + /** @var array $parsing_options */ protected $parsing_options = array(); + /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ - protected $accept = 3; + //protected $accept = 3; /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ protected $maxChunkLength = 4194304; - /** @var \Callable[] */ - protected $callbacks = array(); + /** @var array */ + protected $current_parsing_options = array(); public function getLogger() { @@ -158,39 +158,51 @@ class XMLParser return; } - $prevAccept = $this->accept; - $this->accept = $accept; + //$prevAccept = $this->accept; + //$this->accept = $accept; + $this->current_parsing_options = array('accept' => $accept); - $this->callbacks = array(); + $mergedOptions = $this->parsing_options; foreach ($options as $key => $val) { + $mergedOptions[$key] = $val; + } + + foreach ($mergedOptions as $key => $val) { if (is_string($key)) { switch($key) { + case 'target_charset': + if (function_exists('mb_convert_encoding')) { + $this->current_parsing_options['target_charset'] = $val; + } else { + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring"); + } + break; + case 'methodname_callback': - if (!is_callable($val)) { + if (is_callable($val)) { + $this->current_parsing_options['methodname_callback'] = $val; + } else { //$this->_xh['isf'] = 4; //$this->_xh['isf_reason'] = "Callback passed as 'methodname_callback' is not callable"; //return; $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable"); - } else { - $this->callbacks['methodname'] = $val; } break; + default: $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ": unsupported option: $key"); } - unset($options[$key]); + unset($mergedOptions[$key]); } } // NB: we use '' instead of null to force charset detection from the xml declaration $parser = xml_parser_create(''); - foreach ($this->parsing_options as $key => $val) { - xml_parser_set_option($parser, $key, $val); - } - foreach ($options as $key => $val) { + foreach ($mergedOptions as $key => $val) { xml_parser_set_option($parser, $key, $val); } + // always set this, in case someone tries to disable it via options... xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); @@ -233,15 +245,15 @@ class XMLParser } } catch (\Exception $e) { xml_parser_free($parser); - $this->callbacks = array(); - $this->accept = $prevAccept; + $this->current_parsing_options = array(); + //$this->accept = $prevAccept; /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? throw $e; } xml_parser_free($parser); - $this->callbacks = array(); - $this->accept = $prevAccept; + $this->current_parsing_options = array(); + //$this->accept = $prevAccept; } /** @@ -270,8 +282,9 @@ class XMLParser /// there is only a single top level element in xml anyway // BC if ($acceptSingleVals === false) { - $accept = $this->accept; + $accept = $this->current_parsing_options['accept']; } else { + //trigger_error('using argument $acceptSingleVals is deprecated', E_USER_DEPRECATED); $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; } if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || @@ -472,6 +485,11 @@ class XMLParser $this->_xh['vt'] = Value::$xmlrpcString; } + // in case there is charset conversion required, do it here, to catch both cases of string values + if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) { + $this->_xh['vt'] = mb_convert_encoding($this->_xh['vt'], $this->current_parsing_options['target_charset'], 'UTF-8'); + } + if ($rebuildXmlrpcvals > 0) { // build the xmlrpc val out of the data received, and substitute it $temp = new Value($this->_xh['value'], $this->_xh['vt']); @@ -616,8 +634,8 @@ class XMLParser $methodname = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']); $this->_xh['method'] = $methodname; // we allow the callback to f.e. give us back a mangled method name by manipulating $this - if (isset($this->callbacks['methodname'])) { - call_user_func($this->callbacks['methodname'], $methodname, $this, $parser); + if (isset($this->current_parsing_options['methodname_callback'])) { + call_user_func($this->current_parsing_options['methodname_callback'], $methodname, $this, $parser); } break; @@ -840,4 +858,44 @@ class XMLParser return false; } + + // BC layer + + public function __set($name, $value) + { + //trigger_error('setting property Response::' . $name . ' is deprecated', E_USER_DEPRECATED); + + switch ($name) { + case 'accept': + $this->current_parsing_options['accept'] = $value; + break; + default: + $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS); + trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); + } + } + + public function __isset($name) + { + //trigger_error('checking property Response::' . $name . ' is deprecated', E_USER_DEPRECATED); + + switch ($name) { + case 'accept': + return isset($this->current_parsing_options['accept']); + default: + return false; + } + } + + public function __unset($name) + { + switch ($name) { + case 'accept': + unset($this->current_parsing_options['accept']); + break; + default: + $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS); + trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); + } + } } diff --git a/src/Request.php b/src/Request.php index 37cba2be..9f5ad18e 100644 --- a/src/Request.php +++ b/src/Request.php @@ -315,8 +315,8 @@ class Request return new Response($data, 0, '', 'xml', $this->httpResponse); } + /// @todo move this block of code into the XMLParser if ($respEncoding != '') { - // Since parsing will fail if charset is not specified in the xml prologue, // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... // The following code might be better for mb_string enabled installs, but makes the lib about 200% slower... @@ -328,21 +328,17 @@ class Request if ($respEncoding == 'ISO-8859-1') { $data = utf8_encode($data); } else { - $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received response: ' . $respEncoding); + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': unsupported charset encoding of received response: ' . $respEncoding); } } } } - // PHP internally might use ISO-8859-1, so we have to tell the xml parser to give us back data in the expected charset. // What if internal encoding is not in one of the 3 allowed? We use the broadest one, ie. utf8 - // This allows to send data which is native in various charset, by extending xmlrpc_encode_entities() and - // setting xmlrpc_internalencoding - if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { - /// @todo emit a warning - $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8'); - } else { + if (in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { $options = array(XML_OPTION_TARGET_ENCODING => PhpXmlRpc::$xmlrpc_internalencoding); + } else { + $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8', 'target_charset' => PhpXmlRpc::$xmlrpc_internalencoding); } $xmlRpcParser = $this->getParser(); diff --git a/src/Server.php b/src/Server.php index e5871f08..9596c7e4 100644 --- a/src/Server.php +++ b/src/Server.php @@ -531,11 +531,11 @@ class Server if ($this->response_charset_encoding == 'auto') { $respEncoding = ''; if (isset($_SERVER['HTTP_ACCEPT_CHARSET'])) { - // here we should check if we can match the client-requested encoding - // with the encodings we know we can generate. + // here we should check if we can match the client-requested encoding with the encodings we know we can generate. /// @todo we should parse q=0.x preferences instead of getting first charset specified... $clientAcceptedCharsets = explode(',', strtoupper($_SERVER['HTTP_ACCEPT_CHARSET'])); // Give preference to internal encoding +/// @todo if mbstring is enabled, we can support other charsets too! Add a method to the Encoder! $knownCharsets = array(PhpXmlRpc::$xmlrpc_internalencoding, 'UTF-8', 'ISO-8859-1', 'US-ASCII'); foreach ($knownCharsets as $charset) { foreach ($clientAcceptedCharsets as $accepted) { @@ -583,14 +583,14 @@ class Server { // decompose incoming XML into request structure + /// @todo move this block of code into the XMLParser if ($reqEncoding != '') { // Since parsing will fail if // - charset is not specified in the xml prologue, // - the encoding is not UTF8 and // - there are non-ascii chars in the text, // we try to work round that... - // The following code might be better for mb_string enabled installs, but - // makes the lib about 200% slower... + // The following code might be better for mb_string enabled installs, but it makes the lib about 200% slower... //if (!is_valid_charset($reqEncoding, array('UTF-8'))) if (!in_array($reqEncoding, array('UTF-8', 'US-ASCII')) && !XMLParser::hasEncoding($data)) { if (function_exists('mb_convert_encoding')) { @@ -599,21 +599,17 @@ class Server if ($reqEncoding == 'ISO-8859-1') { $data = utf8_encode($data); } else { - $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $reqEncoding); + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': unsupported charset encoding of received request: ' . $reqEncoding); } } } } - // PHP internally might use ISO-8859-1, so we have to tell the xml parser to give us back data in the expected charset. // What if internal encoding is not in one of the 3 allowed? We use the broadest one, ie. utf8 - // This allows to send data which is native in various charset, - // by extending xmlrpc_encode_entities() and setting xmlrpc_internalencoding - if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { - /// @todo emit a warning - $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8'); - } else { + if (in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { $options = array(XML_OPTION_TARGET_ENCODING => PhpXmlRpc::$xmlrpc_internalencoding); + } else { + $options = array(XML_OPTION_TARGET_ENCODING => 'UTF-8', 'target_charset' => PhpXmlRpc::$xmlrpc_internalencoding); } // register a callback with the xml parser for when it finds the method name $options['methodname_callback'] = array($this, 'methodNameCallback'); -- 2.47.0