From 06696aa76517601ae1a308d44b11f814acd2bc80 Mon Sep 17 00:00:00 2001 From: gggeek Date: Tue, 17 Jan 2023 09:18:34 +0000 Subject: [PATCH] make value of received strings actally be converted to the desired charset; make decodeXML do the same; comments --- src/Encoder.php | 15 ++++++++------- src/Helper/XMLParser.php | 10 +++++++--- src/Request.php | 2 +- src/Server.php | 2 +- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/Encoder.php b/src/Encoder.php index 7691503a..bd5bac31 100644 --- a/src/Encoder.php +++ b/src/Encoder.php @@ -305,12 +305,14 @@ class Encoder * xmlrpc value into the appropriate object (a.k.a. deserialize). * * @param string $xmlVal - * @param array $options + * @param array $options unused atm * @return Value|Request|Response|false false on error, or an instance of either Value, Request or Response * * @todo is this a good name/class for this method? It does something quite different from 'decode' after all * (returning objects vs returns plain php values)... In fact, it belongs rather to a Parser class - * Feature creep -- we should allow an option to return php native types instead of PhpXmlRpc objects instances + * @todo feature creep -- we should allow an option to return php native types instead of PhpXmlRpc objects instances + * @todo feature creep -- allow source charset to be passed in as an option, in case the xml misses its declaration + * @todo feature creep -- allow expected type (val/req/resp) to be passed in as an option */ public function decodeXml($xmlVal, $options = array()) { @@ -319,7 +321,7 @@ class Encoder if ($valEncoding != '') { // Since parsing will fail if - // - charset is not specified in the xml prologue, + // - charset is not specified in the xml declaration, // - the encoding is not UTF8 and // - there are non-ascii chars in the text, // we try to work round that... @@ -339,11 +341,10 @@ class Encoder } // What if internal encoding is not in one of the 3 allowed? We use the broadest one, ie. utf8! - if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { - /// @todo emit a warning - $parserOptions = array(XML_OPTION_TARGET_ENCODING => 'UTF-8'); - } else { + if (in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { $parserOptions = array(XML_OPTION_TARGET_ENCODING => PhpXmlRpc::$xmlrpc_internalencoding); + } else { + $parserOptions = array(XML_OPTION_TARGET_ENCODING => 'UTF-8', 'target_charset' => PhpXmlRpc::$xmlrpc_internalencoding); } $xmlRpcParser = $this->getParser(); diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index a931e664..dd21736d 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -53,6 +53,7 @@ class XMLParser 'ac' => '', 'stack' => array(), 'valuestack' => array(), + 'isf' => 0, 'isf_reason' => '', 'value' => null, @@ -118,7 +119,7 @@ class XMLParser } /** - * @param int[] $options passed to the xml parser + * @param array $options passed to the xml parser */ public function __construct(array $options = array()) { @@ -131,7 +132,7 @@ class XMLParser * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE * @param array $options integer-key options are passed to the xml parser, in addition to the options received in * the constructor. String-key options are used independently - * @return void + * @return void the caller has to look into $this->_xh to find the results * @throws \Exception this can happen if a callback function is set and it does throw (ie. we do not catch exceptions) */ public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) @@ -140,6 +141,7 @@ class XMLParser 'ac' => '', 'stack' => array(), 'valuestack' => array(), + 'isf' => 0, 'isf_reason' => '', 'value' => null, @@ -487,7 +489,7 @@ class XMLParser // in case there is charset conversion required, do it here, to catch both cases of string values if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) { - $this->_xh['vt'] = mb_convert_encoding($this->_xh['vt'], $this->current_parsing_options['target_charset'], 'UTF-8'); + $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8'); } if ($rebuildXmlrpcvals > 0) { @@ -833,6 +835,8 @@ class XMLParser * * @param string $xmlChunk * @return bool + * + * @todo rename to hasEncodingDeclaration */ public static function hasEncoding($xmlChunk) { diff --git a/src/Request.php b/src/Request.php index 9f5ad18e..c82bc122 100644 --- a/src/Request.php +++ b/src/Request.php @@ -317,7 +317,7 @@ class Request /// @todo move this block of code into the XMLParser if ($respEncoding != '') { - // Since parsing will fail if charset is not specified in the xml prologue, + // Since parsing will fail if charset is not specified in the xml declaration, // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... // The following code might be better for mb_string enabled installs, but makes the lib about 200% slower... //if (!is_valid_charset($respEncoding, array('UTF-8'))) diff --git a/src/Server.php b/src/Server.php index 9596c7e4..b4136d53 100644 --- a/src/Server.php +++ b/src/Server.php @@ -586,7 +586,7 @@ class Server /// @todo move this block of code into the XMLParser if ($reqEncoding != '') { // Since parsing will fail if - // - charset is not specified in the xml prologue, + // - charset is not specified in the xml declaration, // - the encoding is not UTF8 and // - there are non-ascii chars in the text, // we try to work round that... -- 2.47.0