From 1ca4b1e930fd6d697d8c6947fca4a02ce5725fa9 Mon Sep 17 00:00:00 2001 From: gggeek Date: Sun, 12 Jul 2015 12:34:53 +0100 Subject: [PATCH] Allow easier configuration to detect exotic charsets when the received payload does not declare them --- demo/server/server.php | 3 +++ src/Helper/XMLParser.php | 14 +++++++++----- src/PhpXmlRpc.php | 8 +++++++- tests/3LocalhostTest.php | 28 +++++++++++++++++++++++++++- tests/4LocalhostMultiTest.php | 2 +- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/demo/server/server.php b/demo/server/server.php index 546d265..6c36717 100644 --- a/demo/server/server.php +++ b/demo/server/server.php @@ -956,6 +956,9 @@ $s->compress_response = true; if (isset($_GET['RESPONSE_ENCODING'])) { $s->response_charset_encoding = $_GET['RESPONSE_ENCODING']; } +if (isset($_GET['DETECT_ENCODINGS'])) { + PhpXmlRpc\PhpXmlRpc::$xmlrpc_detectencodings = $_GET['DETECT_ENCODINGS']; +} if (isset($_GET['EXCEPTION_HANDLING'])) { $s->exception_handling = $_GET['EXCEPTION_HANDLING']; } diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index b62bf48..2bd14e8 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -454,8 +454,10 @@ class XMLParser * * @param string $httpHeader the http Content-type header * @param string $xmlChunk xml content buffer - * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled) - * @return string + * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled). + * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings + * @return string the encoding determined. Null if it can't be determined and mbstring is enabled, + * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled * * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! */ @@ -464,10 +466,10 @@ class XMLParser // discussion: see http://www.yale.edu/pclt/encoding/ // 1 - test if encoding is specified in HTTP HEADERS - //Details: + // Details: // LWS: (\13\10)?( |\t)+ // token: (any char but excluded stuff)+ - // quoted string: " (any char but double quotes and cointrol chars)* " + // quoted string: " (any char but double quotes and control chars)* " // header: Content-type = ...; charset=value(; ...)* // where value is of type token, no LWS allowed between 'charset' and value // Note: we do not check for invalid chars in VALUE: @@ -507,8 +509,10 @@ class XMLParser } // 4 - if mbstring is available, let it do the guesswork - // NB: we favour finding an encoding that is compatible with what we can process if (extension_loaded('mbstring')) { + if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) { + $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings; + } if ($encodingPrefs) { $enc = mb_detect_encoding($xmlChunk, $encodingPrefs); } else { diff --git a/src/PhpXmlRpc.php b/src/PhpXmlRpc.php index 84597e6..0f39faf 100644 --- a/src/PhpXmlRpc.php +++ b/src/PhpXmlRpc.php @@ -60,9 +60,15 @@ class PhpXmlRpc // The charset encoding used by the server for received requests and // by the client for received responses when received charset cannot be determined - // or is not supported + // and mbstring extension is not enabled public static $xmlrpc_defencoding = "UTF-8"; + // The list of encodings used by the server for requests and by the client for responses + // to detect the charset of the received payload when + // - the charset cannot be determined by looking at http headers, xml declaration or BOM + // - mbstring extension is enabled + public static $xmlrpc_detectencodings = array(); + // The encoding used internally by PHP. // String values received as xml will be converted to this, and php strings will be converted to xml // as if having been coded with this. diff --git a/tests/3LocalhostTest.php b/tests/3LocalhostTest.php index d81c0d0..bf51721 100644 --- a/tests/3LocalhostTest.php +++ b/tests/3LocalhostTest.php @@ -206,13 +206,39 @@ class LocalhostTest extends PHPUnit_Framework_TestCase PhpXmlRpc\PhpXmlRpc::$xmlrpc_internalencoding = 'UTF-8'; // we have to set the encoding declaration either in the http header or xml prolog, as mb_detect_encoding - // (used on the server side) will fail recognizing these 2 + // (used on the server side) will fail recognizing these 2 charsets $v = $this->send(mb_convert_encoding(str_replace('_ENC_', 'UCS-4', $str), 'UCS-4', 'UTF-8')); $this->assertEquals($sendString, $v->scalarval()); $v = $this->send(mb_convert_encoding(str_replace('_ENC_', 'UTF-16', $str), 'UTF-16', 'UTF-8')); $this->assertEquals($sendString, $v->scalarval()); PhpXmlRpc\PhpXmlRpc::$xmlrpc_internalencoding = 'ISO-8859-1'; + } + public function testExoticCharsetsRequests2() + { + // note that we should disable this call also when mbstring is missing server-side + if (!function_exists('mb_convert_encoding')) { + $this->markTestSkipped('Miss mbstring extension to test exotic charsets'); + return; + } + $sendString = '安室奈美恵'; // No idea what this means :-) NB: NOT a valid ISO8859 string! + $str = ' + + examples.stringecho + + + '.$sendString.' + + +'; + + PhpXmlRpc\PhpXmlRpc::$xmlrpc_internalencoding = 'UTF-8'; + // no encoding declaration either in the http header or xml prolog, let mb_detect_encoding + // (used on the server side) sort it out + $this->client->path = $this->args['URI'].'?DETECT_ENCODINGS[]=EUC-JP&DETECT_ENCODINGS[]=UTF-8'; + $v = $this->send(mb_convert_encoding($str, 'EUC-JP', 'UTF-8')); + $this->assertEquals($sendString, $v->scalarval()); + PhpXmlRpc\PhpXmlRpc::$xmlrpc_internalencoding = 'ISO-8859-1'; } /*public function testLatin1Method() diff --git a/tests/4LocalhostMultiTest.php b/tests/4LocalhostMultiTest.php index 27eaafc..2489ae5 100644 --- a/tests/4LocalhostMultiTest.php +++ b/tests/4LocalhostMultiTest.php @@ -19,7 +19,7 @@ class LocalhostMultiTest extends LocalhostTest */ function _runtests() { - $unsafeMethods = array('testHttps', 'testCatchExceptions', 'testUtf8Method', 'testServerComments'); + $unsafeMethods = array('testHttps', 'testCatchExceptions', 'testUtf8Method', 'testServerComments', 'testExoticCharsetsRequests2'); foreach(get_class_methods('LocalhostTest') as $method) { if(strpos($method, 'test') === 0 && !in_array($method, $unsafeMethods)) -- 2.43.0