Fix: allow library to receive calls/responses where LATIN-1 charset is set in http...
authorgggeek <giunta.gaetano@gmail.com>
Sat, 18 Apr 2015 23:53:12 +0000 (00:53 +0100)
committergggeek <giunta.gaetano@gmail.com>
Sat, 18 Apr 2015 23:53:12 +0000 (00:53 +0100)
NEWS
lib/xmlrpc.inc
lib/xmlrpcs.inc

diff --git a/NEWS b/NEWS
index 5d36f47..d320809 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,11 @@
-XML-RPC for PHP version 3.0.1 - 201X/Y/Z
+XML-RPC for PHP version 3.0.1 - 2015/4/18
+
+This release corrects all bugs that have been reported and successfully reproduced since
+version 3.0.0:
+
+* the library does not decode correctly LATIN-1 requests/responses if the character set is only set in the xml prolog
+* the client can not call remote methods which use LATIN-1 or UTF8 characters in their names
+* the debugger sends incorrect requests when the payload includes LATIN-1 characters
 
 Taking baby steps to modern-world php, this release is now tested using Travis ( https://travis-ci.org/ ).
 
@@ -42,7 +49,7 @@ the library is still considered to be production quality.
 
 XML-RPC for PHP version 2.2.2 - 2009/03/16
 
-This release corrects all bugs that have been reported and sucesfully reproduced since
+This release corrects all bugs that have been reported and successfully reproduced since
 version 2.2.1.
 Regardless of the intimidating message about dropping PHP 4 support, it still does
 support that ancient, broken and insecure platform.
@@ -61,7 +68,7 @@ support that ancient, broken and insecure platform.
 
 XML-RPC for PHP version 2.2.1 - 2008/03/06
 
-This release corrects all bugs that have been reported and sucesfully reproduced.
+This release corrects all bugs that have been reported and successfully reproduced.
 It is the last release of the library that will support PHP 4.
 
 * fixed: work around bug in php 5.2.2 which broke support of HTTP_RAW_POST_DATA
@@ -152,7 +159,7 @@ CHANGELOG IN DETAIL:
 * documentation for single parameters of exposed methods can be added to the dispatch map
   (and turned into html docs in conjunction with a future release of the extras package)
 * full response payload is saved into xmlrpcresp object for further debugging
-* stricter parsing of incmoing xmlrpc messages: two more invalid cases are now detected
+* stricter parsing of incoming xmlrpc messages: two more invalid cases are now detected
   (double data element inside array and struct/array after scalar inside value element)
 * debugger can now generate code that wraps a remote method into php function (works for jsonrpc, too)
 * debugger has better support for being activated via a single GET call (for integration into other tools?)
@@ -247,7 +254,7 @@ HTTPS support:
    $xmlrpc_internalencoding was set to UTF-8
  * fixed bug in xmlrpc_server::echoInput() (and marked method as deprecated)
  * correctly set cookies/http headers into xmlrpcresp objects even when the
-   sned() method call fails for some reason
+   send() method call fails for some reason
  * added a benchmark file in the testsuite directory
 
 A couple of (private/protected) methods have been refactored, as well as a
index 117c7b8..a1d0ca8 100644 (file)
 \r
        // set to TRUE to enable encoding of php NULL values to <EX:NIL/> instead of <NIL/>\r
        $GLOBALS['xmlrpc_null_apache_encoding']=false;\r
-    $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions';\r
+       $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions';\r
 \r
        // used to store state during parsing\r
        // quick explanation of components:\r
                                        curl_setopt($curl, CURLOPT_SSLKEYPASSWD, $keypass);\r
                                }\r
 \r
-                // Upgrade transparently to more stringent check for versions of php which do not support otherwise.\r
-                // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls\r
-                if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0)\r
-                {\r
-                    $this->verifyhost = 2;\r
-                }\r
+                               // Upgrade transparently to more stringent check for versions of php which do not support otherwise.\r
+                               // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls\r
+                               if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0)\r
+                               {\r
+                                       $this->verifyhost = 2;\r
+                               }\r
                                // whether to verify cert's common name (CN); 0 for no, 1 to verify that it exists, and 2 to verify that it matches the hostname used\r
                                curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, $this->verifyhost);\r
                        }\r
                                        curl_close($curl);\r
                                }\r
                                $resp =& $msg->parseResponse($result, true, $this->return_type);\r
-                // if we got back a 302, we can not reuse the curl handle for later calls\r
-                if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive)\r
-                {\r
-                    curl_close($curl);\r
-                    $this->xmlrpc_curl_handle = null;\r
-                }\r
+                               // if we got back a 302, we can not reuse the curl handle for later calls\r
+                               if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive)\r
+                               {\r
+                                       curl_close($curl);\r
+                                       $this->xmlrpc_curl_handle = null;\r
+                               }\r
                        }\r
                        return $resp;\r
                }\r
@@ -2599,17 +2599,24 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha
                        $GLOBALS['_xh']['isf_reason']='';\r
                        $GLOBALS['_xh']['rt']=''; // 'methodcall or 'methodresponse'\r
 \r
-                       // if response charset encoding is not known / supported, try to use\r
-                       // the default encoding and parse the xml anyway, but log a warning...\r
-                       if (!in_array($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
-                       // the following code might be better for mb_string enabled installs, but\r
+                       // Since parsing will fail if charset is not specified in the xml prologue,\r
+                       // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+                       // The following code might be better for mb_string enabled installs, but\r
                        // makes the lib about 200% slower...\r
-                       //if (!is_valid_charset($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
-                       {\r
-                               error_log('XML-RPC: '.__METHOD__.': invalid charset encoding of received response: '.$resp_encoding);\r
-                               $resp_encoding = $GLOBALS['xmlrpc_defencoding'];\r
+                       //if (!is_valid_charset($resp_encoding, array('UTF-8')))\r
+                       if (!in_array($resp_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($data)) {\r
+                               if ($resp_encoding == 'ISO-8859-1') {\r
+                                       $data = utf8_encode($data);\r
+                               } else {\r
+                                       if (extension_loaded('mbstring')) {\r
+                                               $data = mb_convert_encoding($data, 'UTF-8', $resp_encoding);\r
+                                       } else {\r
+                                               error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $resp_encoding);\r
+                                       }\r
+                               }\r
                        }\r
-                       $parser = xml_parser_create($resp_encoding);\r
+\r
+                       $parser = xml_parser_create();\r
                        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
                        // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell\r
                        // the xml parser to give us back data in the expected charset.\r
@@ -3566,8 +3573,28 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha
                $GLOBALS['_xh']['isf_reason'] = '';\r
                $GLOBALS['_xh']['method'] = false;\r
                $GLOBALS['_xh']['rt'] = '';\r
-               /// @todo 'guestimate' encoding\r
-               $parser = xml_parser_create();\r
+\r
+               // 'guestimate' encoding\r
+               $val_encoding = guess_encoding('', $xml_val);\r
+\r
+               // Since parsing will fail if charset is not specified in the xml prologue,\r
+               // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+               // The following code might be better for mb_string enabled installs, but\r
+               // makes the lib about 200% slower...\r
+               //if (!is_valid_charset($val_encoding, array('UTF-8')))\r
+               if (!in_array($val_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($xml_val)) {\r
+                       if ($val_encoding == 'ISO-8859-1') {\r
+                               $xml_val = utf8_encode($xml_val);\r
+                       } else {\r
+                               if (extension_loaded('mbstring')) {\r
+                                       $xml_val = mb_convert_encoding($xml_val, 'UTF-8', $val_encoding);\r
+                               } else {\r
+                                       error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $val_encoding);\r
+                               }\r
+                       }\r
+               }\r
+\r
+        $parser = xml_parser_create();\r
                xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
                // What if internal encoding is not in one of the 3 allowed?\r
                // we use the broadest one, ie. utf8!\r
@@ -3779,6 +3806,43 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha
                }\r
        }\r
 \r
+       /**\r
+        * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)\r
+        *\r
+        * @param string $xmlChunk\r
+        * @return bool\r
+        */\r
+       function has_encoding($xmlChunk)\r
+       {\r
+               // scan the first bytes of the data for a UTF-16 (or other) BOM pattern\r
+               //       (source: http://www.w3.org/TR/2000/REC-xml-20001006)\r
+               if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk))\r
+               {\r
+                       return true;\r
+               }\r
+               elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk))\r
+               {\r
+                       return true;\r
+               }\r
+               elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk))\r
+               {\r
+                       return true;\r
+               }\r
+\r
+               // test if encoding is specified in the xml declaration\r
+               // Details:\r
+               // SPACE:                (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+\r
+               // EQ:                  SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*\r
+               if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .\r
+                       '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",\r
+                       $xmlChunk, $matches))\r
+               {\r
+                       return true;\r
+               }\r
+\r
+               return false;\r
+       }\r
+\r
        /**\r
        * Checks if a given charset encoding is present in a list of encodings or\r
        * if it is a valid subset of any encoding in the list\r
index 180595c..6dd64a5 100644 (file)
                        $GLOBALS['_xh']['rt']='';\r
 \r
                        // decompose incoming XML into request structure\r
+\r
                        if ($req_encoding != '')\r
                        {\r
-                               if (!in_array($req_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
-                               // the following code might be better for mb_string enabled installs, but\r
-                               // makes the lib about 200% slower...\r
-                               //if (!is_valid_charset($req_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
-                               {\r
-                                       error_log('XML-RPC: '.__METHOD__.': invalid charset encoding of received request: '.$req_encoding);\r
-                                       $req_encoding = $GLOBALS['xmlrpc_defencoding'];\r
-                               }\r
-                               /// @BUG this will fail on PHP 5 if charset is not specified in the xml prologue,\r
-                               // the encoding is not UTF8 and there are non-ascii chars in the text...\r
-                               /// @todo use an empty string for php 5 ???\r
-                               $parser = xml_parser_create($req_encoding);\r
-                       }\r
-                       else\r
-                       {\r
-                               $parser = xml_parser_create();\r
+                // Since parsing will fail if charset is not specified in the xml prologue,\r
+                // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+                // The following code might be better for mb_string enabled installs, but\r
+                // makes the lib about 200% slower...\r
+                //if (!is_valid_charset($req_encoding, array('UTF-8')))\r
+                if (!in_array($req_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($data)) {\r
+                    if ($req_encoding == 'ISO-8859-1') {\r
+                        $data = utf8_encode($data);\r
+                    } else {\r
+                        if (extension_loaded('mbstring')) {\r
+                            $data = mb_convert_encoding($data, 'UTF-8', $req_encoding);\r
+                        } else {\r
+                            error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $req_encoding);\r
+                        }\r
+                    }\r
+                }\r
                        }\r
 \r
+                       $parser = xml_parser_create();\r
                        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
                        // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell\r
                        // the xml parser to give us back data in the expected charset\r