\r
// set to TRUE to enable encoding of php NULL values to <EX:NIL/> instead of <NIL/>\r
$GLOBALS['xmlrpc_null_apache_encoding']=false;\r
- $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions';\r
+ $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions';\r
\r
// used to store state during parsing\r
// quick explanation of components:\r
curl_setopt($curl, CURLOPT_SSLKEYPASSWD, $keypass);\r
}\r
\r
- // Upgrade transparently to more stringent check for versions of php which do not support otherwise.\r
- // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls\r
- if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0)\r
- {\r
- $this->verifyhost = 2;\r
- }\r
+ // Upgrade transparently to more stringent check for versions of php which do not support otherwise.\r
+ // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls\r
+ if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0)\r
+ {\r
+ $this->verifyhost = 2;\r
+ }\r
// whether to verify cert's common name (CN); 0 for no, 1 to verify that it exists, and 2 to verify that it matches the hostname used\r
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, $this->verifyhost);\r
}\r
curl_close($curl);\r
}\r
$resp =& $msg->parseResponse($result, true, $this->return_type);\r
- // if we got back a 302, we can not reuse the curl handle for later calls\r
- if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive)\r
- {\r
- curl_close($curl);\r
- $this->xmlrpc_curl_handle = null;\r
- }\r
+ // if we got back a 302, we can not reuse the curl handle for later calls\r
+ if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive)\r
+ {\r
+ curl_close($curl);\r
+ $this->xmlrpc_curl_handle = null;\r
+ }\r
}\r
return $resp;\r
}\r
$GLOBALS['_xh']['isf_reason']='';\r
$GLOBALS['_xh']['rt']=''; // 'methodcall or 'methodresponse'\r
\r
- // if response charset encoding is not known / supported, try to use\r
- // the default encoding and parse the xml anyway, but log a warning...\r
- if (!in_array($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
- // the following code might be better for mb_string enabled installs, but\r
+ // Since parsing will fail if charset is not specified in the xml prologue,\r
+ // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+ // The following code might be better for mb_string enabled installs, but\r
// makes the lib about 200% slower...\r
- //if (!is_valid_charset($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII')))\r
- {\r
- error_log('XML-RPC: '.__METHOD__.': invalid charset encoding of received response: '.$resp_encoding);\r
- $resp_encoding = $GLOBALS['xmlrpc_defencoding'];\r
+ //if (!is_valid_charset($resp_encoding, array('UTF-8')))\r
+ if (!in_array($resp_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($data)) {\r
+ if ($resp_encoding == 'ISO-8859-1') {\r
+ $data = utf8_encode($data);\r
+ } else {\r
+ if (extension_loaded('mbstring')) {\r
+ $data = mb_convert_encoding($data, 'UTF-8', $resp_encoding);\r
+ } else {\r
+ error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $resp_encoding);\r
+ }\r
+ }\r
}\r
- $parser = xml_parser_create($resp_encoding);\r
+\r
+ $parser = xml_parser_create();\r
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
// G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell\r
// the xml parser to give us back data in the expected charset.\r
$GLOBALS['_xh']['isf_reason'] = '';\r
$GLOBALS['_xh']['method'] = false;\r
$GLOBALS['_xh']['rt'] = '';\r
- /// @todo 'guestimate' encoding\r
- $parser = xml_parser_create();\r
+\r
+ // 'guestimate' encoding\r
+ $val_encoding = guess_encoding('', $xml_val);\r
+\r
+ // Since parsing will fail if charset is not specified in the xml prologue,\r
+ // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that...\r
+ // The following code might be better for mb_string enabled installs, but\r
+ // makes the lib about 200% slower...\r
+ //if (!is_valid_charset($val_encoding, array('UTF-8')))\r
+ if (!in_array($val_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($xml_val)) {\r
+ if ($val_encoding == 'ISO-8859-1') {\r
+ $xml_val = utf8_encode($xml_val);\r
+ } else {\r
+ if (extension_loaded('mbstring')) {\r
+ $xml_val = mb_convert_encoding($xml_val, 'UTF-8', $val_encoding);\r
+ } else {\r
+ error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $val_encoding);\r
+ }\r
+ }\r
+ }\r
+\r
+ $parser = xml_parser_create();\r
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);\r
// What if internal encoding is not in one of the 3 allowed?\r
// we use the broadest one, ie. utf8!\r
}\r
}\r
\r
+ /**\r
+ * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)\r
+ *\r
+ * @param string $xmlChunk\r
+ * @return bool\r
+ */\r
+ function has_encoding($xmlChunk)\r
+ {\r
+ // scan the first bytes of the data for a UTF-16 (or other) BOM pattern\r
+ // (source: http://www.w3.org/TR/2000/REC-xml-20001006)\r
+ if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk))\r
+ {\r
+ return true;\r
+ }\r
+ elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk))\r
+ {\r
+ return true;\r
+ }\r
+ elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk))\r
+ {\r
+ return true;\r
+ }\r
+\r
+ // test if encoding is specified in the xml declaration\r
+ // Details:\r
+ // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+\r
+ // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*\r
+ if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .\r
+ '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",\r
+ $xmlChunk, $matches))\r
+ {\r
+ return true;\r
+ }\r
+\r
+ return false;\r
+ }\r
+\r
/**\r
* Checks if a given charset encoding is present in a list of encodings or\r
* if it is a valid subset of any encoding in the list\r