Fix: allow library to receive calls/responses where LATIN-1 charset is set in http...
[plcapi.git] / lib / xmlrpc.inc
index 117c7b8..a1d0ca8 100644 (file)
 \r
        // set to TRUE to enable encoding of php NULL values to <EX:NIL/> instead of <NIL/>\r
        $GLOBALS['xmlrpc_null_apache_encoding']=false;\r
-    $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions';\r
+       $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions';\r
 \r
        // used to store state during parsing\r
        // quick explanation of components:\r
                                        curl_setopt($curl, CURLOPT_SSLKEYPASSWD, $keypass);\r
                                }\r
 \r
-                // Upgrade transparently to more stringent check for versions of php which do not support otherwise.\r
-                // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls\r
-                if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0)\r
-                {\r
-                    $this->verifyhost = 2;\r
-                }\r
+                               // Upgrade transparently to more stringent check for versions of php which do not support otherwise.\r
+                               // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls\r
+                               if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0)\r
+                               {\r
+                                       $this->verifyhost = 2;\r
+                               }\r
                                // whether to verify cert's common name (CN); 0 for no, 1 to verify that it exists, and 2 to verify that it matches the hostname used\r
                                curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, $this->verifyhost);\r
                        }\r
                                        curl_close($curl);\r
                                }\r
                                $resp =& $msg->parseResponse($result, true, $this->return_type);\r
-                // if we got back a 302, we can not reuse the curl handle for later calls\r
-                if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive)\r
-                {\r
-                    curl_close($curl);\r
-                    $this->xmlrpc_curl_handle = null;\r
-                }\r
+                               // if we got back a 302, we can not reuse the curl handle for later calls\r
+                               if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive)\r
+                               {\r
+                                       curl_close($curl);\r
+                                       $this->xmlrpc_curl_handle = null;\r
+                               }\r
                        }\r
                        return $resp;\r
                }\r
@@ -2599,17 +2599,24 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha
                        $GLOBALS['_xh']['isf_reason']='';\r
                        $GLOBALS['_xh']['rt']=''; // 'methodcall or 'methodresponse'\r
 \r
-                       // if response charset encoding is not known / supported, try to use\r
-                       // the default encoding and parse the xml anyway, but log a warning...\r
-                       if (!in_array($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
-                       // the following code might be better for mb_string enabled installs, but\r
+                       // Since parsing will fail if charset is not specified in the xml prologue,\r
+                       // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+                       // The following code might be better for mb_string enabled installs, but\r
                        // makes the lib about 200% slower...\r
-                       //if (!is_valid_charset($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
-                       {\r
-                               error_log('XML-RPC: '.__METHOD__.': invalid charset encoding of received response: '.$resp_encoding);\r
-                               $resp_encoding = $GLOBALS['xmlrpc_defencoding'];\r
+                       //if (!is_valid_charset($resp_encoding, array('UTF-8')))\r
+                       if (!in_array($resp_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($data)) {\r
+                               if ($resp_encoding == 'ISO-8859-1') {\r
+                                       $data = utf8_encode($data);\r
+                               } else {\r
+                                       if (extension_loaded('mbstring')) {\r
+                                               $data = mb_convert_encoding($data, 'UTF-8', $resp_encoding);\r
+                                       } else {\r
+                                               error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $resp_encoding);\r
+                                       }\r
+                               }\r
                        }\r
-                       $parser = xml_parser_create($resp_encoding);\r
+\r
+                       $parser = xml_parser_create();\r
                        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
                        // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell\r
                        // the xml parser to give us back data in the expected charset.\r
@@ -3566,8 +3573,28 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha
                $GLOBALS['_xh']['isf_reason'] = '';\r
                $GLOBALS['_xh']['method'] = false;\r
                $GLOBALS['_xh']['rt'] = '';\r
-               /// @todo 'guestimate' encoding\r
-               $parser = xml_parser_create();\r
+\r
+               // 'guestimate' encoding\r
+               $val_encoding = guess_encoding('', $xml_val);\r
+\r
+               // Since parsing will fail if charset is not specified in the xml prologue,\r
+               // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+               // The following code might be better for mb_string enabled installs, but\r
+               // makes the lib about 200% slower...\r
+               //if (!is_valid_charset($val_encoding, array('UTF-8')))\r
+               if (!in_array($val_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($xml_val)) {\r
+                       if ($val_encoding == 'ISO-8859-1') {\r
+                               $xml_val = utf8_encode($xml_val);\r
+                       } else {\r
+                               if (extension_loaded('mbstring')) {\r
+                                       $xml_val = mb_convert_encoding($xml_val, 'UTF-8', $val_encoding);\r
+                               } else {\r
+                                       error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $val_encoding);\r
+                               }\r
+                       }\r
+               }\r
+\r
+        $parser = xml_parser_create();\r
                xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
                // What if internal encoding is not in one of the 3 allowed?\r
                // we use the broadest one, ie. utf8!\r
@@ -3779,6 +3806,43 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha
                }\r
        }\r
 \r
+       /**\r
+        * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)\r
+        *\r
+        * @param string $xmlChunk\r
+        * @return bool\r
+        */\r
+       function has_encoding($xmlChunk)\r
+       {\r
+               // scan the first bytes of the data for a UTF-16 (or other) BOM pattern\r
+               //       (source: http://www.w3.org/TR/2000/REC-xml-20001006)\r
+               if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk))\r
+               {\r
+                       return true;\r
+               }\r
+               elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk))\r
+               {\r
+                       return true;\r
+               }\r
+               elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk))\r
+               {\r
+                       return true;\r
+               }\r
+\r
+               // test if encoding is specified in the xml declaration\r
+               // Details:\r
+               // SPACE:                (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+\r
+               // EQ:                  SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*\r
+               if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .\r
+                       '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",\r
+                       $xmlChunk, $matches))\r
+               {\r
+                       return true;\r
+               }\r
+\r
+               return false;\r
+       }\r
+\r
        /**\r
        * Checks if a given charset encoding is present in a list of encodings or\r
        * if it is a valid subset of any encoding in the list\r