From: gggeek Date: Sat, 18 Apr 2015 23:53:12 +0000 (+0100) Subject: Fix: allow library to receive calls/responses where LATIN-1 charset is set in http... X-Git-Tag: 3.0.1~4 X-Git-Url: http://git.onelab.eu/?p=plcapi.git;a=commitdiff_plain;h=f45909a92b268970729c121269af62d71eb7cd47 Fix: allow library to receive calls/responses where LATIN-1 charset is set in http headers, not xml prolog --- diff --git a/NEWS b/NEWS index 5d36f47..d320809 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,11 @@ -XML-RPC for PHP version 3.0.1 - 201X/Y/Z +XML-RPC for PHP version 3.0.1 - 2015/4/18 + +This release corrects all bugs that have been reported and successfully reproduced since +version 3.0.0: + +* the library does not decode correctly LATIN-1 requests/responses if the character set is only set in the xml prolog +* the client can not call remote methods which use LATIN-1 or UTF8 characters in their names +* the debugger sends incorrect requests when the payload includes LATIN-1 characters Taking baby steps to modern-world php, this release is now tested using Travis ( https://travis-ci.org/ ). @@ -42,7 +49,7 @@ the library is still considered to be production quality. XML-RPC for PHP version 2.2.2 - 2009/03/16 -This release corrects all bugs that have been reported and sucesfully reproduced since +This release corrects all bugs that have been reported and successfully reproduced since version 2.2.1. Regardless of the intimidating message about dropping PHP 4 support, it still does support that ancient, broken and insecure platform. @@ -61,7 +68,7 @@ support that ancient, broken and insecure platform. XML-RPC for PHP version 2.2.1 - 2008/03/06 -This release corrects all bugs that have been reported and sucesfully reproduced. +This release corrects all bugs that have been reported and successfully reproduced. It is the last release of the library that will support PHP 4. * fixed: work around bug in php 5.2.2 which broke support of HTTP_RAW_POST_DATA @@ -152,7 +159,7 @@ CHANGELOG IN DETAIL: * documentation for single parameters of exposed methods can be added to the dispatch map (and turned into html docs in conjunction with a future release of the extras package) * full response payload is saved into xmlrpcresp object for further debugging -* stricter parsing of incmoing xmlrpc messages: two more invalid cases are now detected +* stricter parsing of incoming xmlrpc messages: two more invalid cases are now detected (double data element inside array and struct/array after scalar inside value element) * debugger can now generate code that wraps a remote method into php function (works for jsonrpc, too) * debugger has better support for being activated via a single GET call (for integration into other tools?) @@ -247,7 +254,7 @@ HTTPS support: $xmlrpc_internalencoding was set to UTF-8 * fixed bug in xmlrpc_server::echoInput() (and marked method as deprecated) * correctly set cookies/http headers into xmlrpcresp objects even when the - sned() method call fails for some reason + send() method call fails for some reason * added a benchmark file in the testsuite directory A couple of (private/protected) methods have been refactored, as well as a diff --git a/lib/xmlrpc.inc b/lib/xmlrpc.inc index 117c7b8..a1d0ca8 100644 --- a/lib/xmlrpc.inc +++ b/lib/xmlrpc.inc @@ -225,7 +225,7 @@ // set to TRUE to enable encoding of php NULL values to instead of $GLOBALS['xmlrpc_null_apache_encoding']=false; - $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions'; + $GLOBALS['xmlrpc_null_apache_encoding_ns']='http://ws.apache.org/xmlrpc/namespaces/extensions'; // used to store state during parsing // quick explanation of components: @@ -1622,12 +1622,12 @@ curl_setopt($curl, CURLOPT_SSLKEYPASSWD, $keypass); } - // Upgrade transparently to more stringent check for versions of php which do not support otherwise. - // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls - if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0) - { - $this->verifyhost = 2; - } + // Upgrade transparently to more stringent check for versions of php which do not support otherwise. + // Doing it in constructor would be cleaner; doing it here saves us a couple of function calls + if($this->verifyhost == 1 && $info = curl_version() && version_compare($info['version'], '7.28.1') >= 0) + { + $this->verifyhost = 2; + } // whether to verify cert's common name (CN); 0 for no, 1 to verify that it exists, and 2 to verify that it matches the hostname used curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, $this->verifyhost); } @@ -1707,12 +1707,12 @@ curl_close($curl); } $resp =& $msg->parseResponse($result, true, $this->return_type); - // if we got back a 302, we can not reuse the curl handle for later calls - if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive) - { - curl_close($curl); - $this->xmlrpc_curl_handle = null; - } + // if we got back a 302, we can not reuse the curl handle for later calls + if($resp->faultCode() == $GLOBALS['xmlrpcerr']['http_error'] && $keepalive) + { + curl_close($curl); + $this->xmlrpc_curl_handle = null; + } } return $resp; } @@ -2599,17 +2599,24 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha $GLOBALS['_xh']['isf_reason']=''; $GLOBALS['_xh']['rt']=''; // 'methodcall or 'methodresponse' - // if response charset encoding is not known / supported, try to use - // the default encoding and parse the xml anyway, but log a warning... - if (!in_array($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) - // the following code might be better for mb_string enabled installs, but + // Since parsing will fail if charset is not specified in the xml prologue, + // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... + // The following code might be better for mb_string enabled installs, but // makes the lib about 200% slower... - //if (!is_valid_charset($resp_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) - { - error_log('XML-RPC: '.__METHOD__.': invalid charset encoding of received response: '.$resp_encoding); - $resp_encoding = $GLOBALS['xmlrpc_defencoding']; + //if (!is_valid_charset($resp_encoding, array('UTF-8'))) + if (!in_array($resp_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($data)) { + if ($resp_encoding == 'ISO-8859-1') { + $data = utf8_encode($data); + } else { + if (extension_loaded('mbstring')) { + $data = mb_convert_encoding($data, 'UTF-8', $resp_encoding); + } else { + error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $resp_encoding); + } + } } - $parser = xml_parser_create($resp_encoding); + + $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell // the xml parser to give us back data in the expected charset. @@ -3566,8 +3573,28 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha $GLOBALS['_xh']['isf_reason'] = ''; $GLOBALS['_xh']['method'] = false; $GLOBALS['_xh']['rt'] = ''; - /// @todo 'guestimate' encoding - $parser = xml_parser_create(); + + // 'guestimate' encoding + $val_encoding = guess_encoding('', $xml_val); + + // Since parsing will fail if charset is not specified in the xml prologue, + // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... + // The following code might be better for mb_string enabled installs, but + // makes the lib about 200% slower... + //if (!is_valid_charset($val_encoding, array('UTF-8'))) + if (!in_array($val_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($xml_val)) { + if ($val_encoding == 'ISO-8859-1') { + $xml_val = utf8_encode($xml_val); + } else { + if (extension_loaded('mbstring')) { + $xml_val = mb_convert_encoding($xml_val, 'UTF-8', $val_encoding); + } else { + error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $val_encoding); + } + } + } + + $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); // What if internal encoding is not in one of the 3 allowed? // we use the broadest one, ie. utf8! @@ -3779,6 +3806,43 @@ xmlrpc_encode_entitites($this->errstr, $GLOBALS['xmlrpc_internalencoding'], $cha } } + /** + * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration) + * + * @param string $xmlChunk + * @return bool + */ + function has_encoding($xmlChunk) + { + // scan the first bytes of the data for a UTF-16 (or other) BOM pattern + // (source: http://www.w3.org/TR/2000/REC-xml-20001006) + if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) + { + return true; + } + elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) + { + return true; + } + elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) + { + return true; + } + + // test if encoding is specified in the xml declaration + // Details: + // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ + // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* + if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . + '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", + $xmlChunk, $matches)) + { + return true; + } + + return false; + } + /** * Checks if a given charset encoding is present in a list of encodings or * if it is a valid subset of any encoding in the list diff --git a/lib/xmlrpcs.inc b/lib/xmlrpcs.inc index 180595c..6dd64a5 100644 --- a/lib/xmlrpcs.inc +++ b/lib/xmlrpcs.inc @@ -928,26 +928,28 @@ $GLOBALS['_xh']['rt']=''; // decompose incoming XML into request structure + if ($req_encoding != '') { - if (!in_array($req_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) - // the following code might be better for mb_string enabled installs, but - // makes the lib about 200% slower... - //if (!is_valid_charset($req_encoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) - { - error_log('XML-RPC: '.__METHOD__.': invalid charset encoding of received request: '.$req_encoding); - $req_encoding = $GLOBALS['xmlrpc_defencoding']; - } - /// @BUG this will fail on PHP 5 if charset is not specified in the xml prologue, - // the encoding is not UTF8 and there are non-ascii chars in the text... - /// @todo use an empty string for php 5 ??? - $parser = xml_parser_create($req_encoding); - } - else - { - $parser = xml_parser_create(); + // Since parsing will fail if charset is not specified in the xml prologue, + // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... + // The following code might be better for mb_string enabled installs, but + // makes the lib about 200% slower... + //if (!is_valid_charset($req_encoding, array('UTF-8'))) + if (!in_array($req_encoding, array('UTF-8', 'US-ASCII')) && !has_encoding($data)) { + if ($req_encoding == 'ISO-8859-1') { + $data = utf8_encode($data); + } else { + if (extension_loaded('mbstring')) { + $data = mb_convert_encoding($data, 'UTF-8', $req_encoding); + } else { + error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of received request: ' . $req_encoding); + } + } + } } + $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); // G. Giunta 2005/02/13: PHP internally uses ISO-8859-1, so we have to tell // the xml parser to give us back data in the expected charset