X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2FEncoder.php;h=ebf3060d4996160c753b6b5d1cf2229eaa73688d;hb=7fbd194478e080d624bab075d189bf3ba215325d;hp=636ef35920cf3835deb4f03a1ba517a95c1ae141;hpb=b081e4541f5728a5f18d15832f0b72e2e133aad2;p=plcapi.git diff --git a/src/Encoder.php b/src/Encoder.php index 636ef35..ebf3060 100644 --- a/src/Encoder.php +++ b/src/Encoder.php @@ -4,6 +4,10 @@ namespace PhpXmlRpc; use PhpXmlRpc\Helper\XMLParser; +/** + * A helper class to easily convert between Value objects and php native values + * @todo implement an interface + */ class Encoder { /** @@ -33,13 +37,13 @@ class Encoder switch ($xmlrpcVal->kindOf()) { case 'scalar': if (in_array('extension_api', $options)) { - reset($xmlrpcVal->me); - list($typ, $val) = each($xmlrpcVal->me); + $val = reset($xmlrpcVal->me); + $typ = key($xmlrpcVal->me); switch ($typ) { case 'dateTime.iso8601': $xmlrpcVal->scalar = $val; $xmlrpcVal->type = 'datetime'; - $xmlrpcVal->timestamp = \PhpXmlRpc\Helper\Date::iso8601_decode($val); + $xmlrpcVal->timestamp = \PhpXmlRpc\Helper\Date::iso8601Decode($val); return $xmlrpcVal; case 'base64': @@ -60,26 +64,24 @@ class Encoder $out = strtotime($out); } if (is_int($out)) { - $result = new \Datetime(); + $result = new \DateTime(); $result->setTimestamp($out); return $result; - } elseif (is_a($out, 'Datetime')) { + } elseif (is_a($out, 'DateTimeInterface')) { return $out; } } return $xmlrpcVal->scalarval(); case 'array': - $size = $xmlrpcVal->arraysize(); $arr = array(); - for ($i = 0; $i < $size; $i++) { - $arr[] = $this->decode($xmlrpcVal->arraymem($i), $options); + foreach($xmlrpcVal as $value) { + $arr[] = $this->decode($value, $options); } return $arr; case 'struct': - $xmlrpcVal->structreset(); // If user said so, try to rebuild php objects for specific struct vals. /// @todo should we raise a warning for class not found? // shall we check for proper subclass of xmlrpc value instead of @@ -88,14 +90,14 @@ class Encoder && class_exists($xmlrpcVal->_php_class) ) { $obj = @new $xmlrpcVal->_php_class(); - while (list($key, $value) = $xmlrpcVal->structeach()) { + foreach ($xmlrpcVal as $key => $value) { $obj->$key = $this->decode($value, $options); } return $obj; } else { $arr = array(); - while (list($key, $value) = $xmlrpcVal->structeach()) { + foreach ($xmlrpcVal as $key => $value) { $arr[$key] = $this->decode($value, $options); } @@ -105,7 +107,7 @@ class Encoder $paramCount = $xmlrpcVal->getNumParams(); $arr = array(); for ($i = 0; $i < $paramCount; $i++) { - $arr[] = $this->decode($xmlrpcVal->getParam($i)); + $arr[] = $this->decode($xmlrpcVal->getParam($i), $options); } return $arr; @@ -155,7 +157,7 @@ class Encoder // case 'array': // PHP arrays can be encoded to either xmlrpc structs or arrays, - // depending on wheter they are hashes or plain 0..n integer indexed + // depending on whether they are hashes or plain 0..n integer indexed // A shorter one-liner would be // $tmp = array_diff(array_keys($phpVal), range(0, count($phpVal)-1)); // but execution time skyrockets! @@ -178,12 +180,11 @@ class Encoder case 'object': if (is_a($phpVal, 'PhpXmlRpc\Value')) { $xmlrpcVal = $phpVal; - } elseif (is_a($phpVal, 'DateTime')) { + } elseif (is_a($phpVal, 'DateTimeInterface')) { $xmlrpcVal = new Value($phpVal->format('Ymd\TH:i:s'), Value::$xmlrpcStruct); } else { $arr = array(); - reset($phpVal); - while (list($k, $v) = each($phpVal)) { + foreach($phpVal as $k => $v) { $arr[$k] = $this->encode($v, $options); } $xmlrpcVal = new Value($arr, Value::$xmlrpcStruct); @@ -209,11 +210,11 @@ class Encoder } else { $xmlrpcVal = new Value(); } + break; // catch "user function", "unknown type" default: // giancarlo pinerolo - // it has to return - // an empty object in case, not a boolean. + // it has to return an empty object in case, not a boolean. $xmlrpcVal = new Value(); break; } @@ -225,15 +226,37 @@ class Encoder * Convert the xml representation of a method response, method request or single * xmlrpc value into the appropriate object (a.k.a. deserialize). * + * Q: is this a good name for this method? It does something quite different from 'decode' after all (returning objects vs returns plain php values)... + * * @param string $xmlVal * @param array $options * * @return mixed false on error, or an instance of either Value, Request or Response */ - public function decode_xml($xmlVal, $options = array()) + public function decodeXml($xmlVal, $options = array()) { + // 'guestimate' encoding + $valEncoding = XMLParser::guessEncoding('', $xmlVal); + if ($valEncoding != '') { + + // Since parsing will fail if charset is not specified in the xml prologue, + // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... + // The following code might be better for mb_string enabled installs, but + // makes the lib about 200% slower... + //if (!is_valid_charset($valEncoding, array('UTF-8')) + if (!in_array($valEncoding, array('UTF-8', 'US-ASCII')) && !XMLParser::hasEncoding($xmlVal)) { + if ($valEncoding == 'ISO-8859-1') { + $xmlVal = utf8_encode($xmlVal); + } else { + if (extension_loaded('mbstring')) { + $xmlVal = mb_convert_encoding($xmlVal, 'UTF-8', $valEncoding); + } else { + error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of xml text: ' . $valEncoding); + } + } + } + } - /// @todo 'guestimate' encoding $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); // What if internal encoding is not in one of the 3 allowed? @@ -269,10 +292,12 @@ class Encoder } switch ($xmlRpcParser->_xh['rt']) { case 'methodresponse': - $v = &$xmlRpcParser->_xh['value']; + $v = $xmlRpcParser->_xh['value']; if ($xmlRpcParser->_xh['isf'] == 1) { - $vc = $v->structmem('faultCode'); - $vs = $v->structmem('faultString'); + /** @var Value $vc */ + $vc = $v['faultCode']; + /** @var Value $vs */ + $vs = $v['faultString']; $r = new Response(0, $vc->scalarval(), $vs->scalarval()); } else { $r = new Response($v); @@ -280,12 +305,12 @@ class Encoder return $r; case 'methodcall': - $m = new Request($xmlRpcParser->_xh['method']); + $req = new Request($xmlRpcParser->_xh['method']); for ($i = 0; $i < count($xmlRpcParser->_xh['params']); $i++) { - $m->addParam($xmlRpcParser->_xh['params'][$i]); + $req->addParam($xmlRpcParser->_xh['params'][$i]); } - return $m; + return $req; case 'value': return $xmlRpcParser->_xh['value']; default: @@ -293,88 +318,4 @@ class Encoder } } - /** - * xml charset encoding guessing helper function. - * Tries to determine the charset encoding of an XML chunk received over HTTP. - * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type, - * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of unconforming (legacy?) clients/servers, - * which will be most probably using UTF-8 anyway... - * - * @param string $httpHeader the http Content-type header - * @param string $xmlChunk xml content buffer - * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled) - * @return string - * - * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! - */ - public static function guess_encoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null) - { - // discussion: see http://www.yale.edu/pclt/encoding/ - // 1 - test if encoding is specified in HTTP HEADERS - - //Details: - // LWS: (\13\10)?( |\t)+ - // token: (any char but excluded stuff)+ - // quoted string: " (any char but double quotes and cointrol chars)* " - // header: Content-type = ...; charset=value(; ...)* - // where value is of type token, no LWS allowed between 'charset' and value - // Note: we do not check for invalid chars in VALUE: - // this had better be done using pure ereg as below - // Note 2: we might be removing whitespace/tabs that ought to be left in if - // the received charset is a quoted string. But nobody uses such charset names... - - /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? - $matches = array(); - if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) { - return strtoupper(trim($matches[1], " \t\"")); - } - - // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern - // (source: http://www.w3.org/TR/2000/REC-xml-20001006) - // NOTE: actually, according to the spec, even if we find the BOM and determine - // an encoding, we should check if there is an encoding specified - // in the xml declaration, and verify if they match. - /// @todo implement check as described above? - /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM) - if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { - return 'UCS-4'; - } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { - return 'UTF-16'; - } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) { - return 'UTF-8'; - } - - // 3 - test if encoding is specified in the xml declaration - // Details: - // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ - // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* - if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . - '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", - $xmlChunk, $matches)) { - return strtoupper(substr($matches[2], 1, -1)); - } - - // 4 - if mbstring is available, let it do the guesswork - // NB: we favour finding an encoding that is compatible with what we can process - if (extension_loaded('mbstring')) { - if ($encodingPrefs) { - $enc = mb_detect_encoding($xmlChunk, $encodingPrefs); - } else { - $enc = mb_detect_encoding($xmlChunk); - } - // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... - // IANA also likes better US-ASCII, so go with it - if ($enc == 'ASCII') { - $enc = 'US-' . $enc; - } - - return $enc; - } else { - // no encoding specified: as per HTTP1.1 assume it is iso-8859-1? - // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types - // this should be the standard. And we should be getting text/xml as request and response. - // BUT we have to be backward compatible with the lib, which always used UTF-8 as default... - return PhpXmlRpc::$xmlrpc_defencoding; - } - } }