X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2FEncoder.php;h=220ce88165a353f1f15faf99e3d39497e2ef9a19;hb=9d4cfc9508b03a579dd62a28c0e9e428b5052f9b;hp=f464f8224aad462c4686d55b00cd9dcde55b83df;hpb=64b37dfbd32a7858e729e03eb276a2eaa0f19ccd;p=plcapi.git diff --git a/src/Encoder.php b/src/Encoder.php index f464f82..220ce88 100644 --- a/src/Encoder.php +++ b/src/Encoder.php @@ -4,10 +4,13 @@ namespace PhpXmlRpc; use PhpXmlRpc\Helper\XMLParser; +/** + * A helper class to easily convert between Value objects and php native values + */ class Encoder { /** - * Takes an xmlrpc value in PHP xmlrpcval object format and translates it into native PHP types. + * Takes an xmlrpc value in object format and translates it into native PHP types. * * Works with xmlrpc requests objects as input, too. * @@ -23,103 +26,96 @@ class Encoder * * @author Dan Libby (dan@libby.com) * - * @param Value|Request $xmlrpc_val + * @param Value|Request $xmlrpcVal * @param array $options if 'decode_php_objs' is set in the options array, xmlrpc structs can be decoded into php objects; if 'dates_as_objects' is set xmlrpc datetimes are decoded as php DateTime objects (standard is + * * @return mixed */ - function decode($xmlrpc_val, $options=array()) + public function decode($xmlrpcVal, $options = array()) { - switch($xmlrpc_val->kindOf()) - { + switch ($xmlrpcVal->kindOf()) { case 'scalar': - if (in_array('extension_api', $options)) - { - reset($xmlrpc_val->me); - list($typ,$val) = each($xmlrpc_val->me); - switch ($typ) - { + if (in_array('extension_api', $options)) { + reset($xmlrpcVal->me); + list($typ, $val) = each($xmlrpcVal->me); + switch ($typ) { case 'dateTime.iso8601': - $xmlrpc_val->scalar = $val; - $xmlrpc_val->type = 'datetime'; - $xmlrpc_val->timestamp = \PhpXmlRpc\Helper\Date::iso8601_decode($val); - return $xmlrpc_val; + $xmlrpcVal->scalar = $val; + $xmlrpcVal->type = 'datetime'; + $xmlrpcVal->timestamp = \PhpXmlRpc\Helper\Date::iso8601Decode($val); + + return $xmlrpcVal; case 'base64': - $xmlrpc_val->scalar = $val; - $xmlrpc_val->type = $typ; - return $xmlrpc_val; + $xmlrpcVal->scalar = $val; + $xmlrpcVal->type = $typ; + + return $xmlrpcVal; default: - return $xmlrpc_val->scalarval(); + return $xmlrpcVal->scalarval(); } } - if (in_array('dates_as_objects', $options) && $xmlrpc_val->scalartyp() == 'dateTime.iso8601') - { + if (in_array('dates_as_objects', $options) && $xmlrpcVal->scalartyp() == 'dateTime.iso8601') { // we return a Datetime object instead of a string - // since now the constructor of xmlrpcval accepts safely strings, ints and datetimes, + // since now the constructor of xmlrpc value accepts safely strings, ints and datetimes, // we cater to all 3 cases here - $out = $xmlrpc_val->scalarval(); - if (is_string($out)) - { + $out = $xmlrpcVal->scalarval(); + if (is_string($out)) { $out = strtotime($out); } - if (is_int($out)) - { + if (is_int($out)) { $result = new \Datetime(); $result->setTimestamp($out); + return $result; - } - elseif (is_a($out, 'Datetime')) - { + } elseif (is_a($out, 'Datetime')) { return $out; } } - return $xmlrpc_val->scalarval(); + + return $xmlrpcVal->scalarval(); case 'array': - $size = $xmlrpc_val->arraysize(); $arr = array(); - for($i = 0; $i < $size; $i++) - { - $arr[] = $this->decode($xmlrpc_val->arraymem($i), $options); + foreach($xmlrpcVal as $value) { + $arr[] = $this->decode($value, $options); } + return $arr; case 'struct': - $xmlrpc_val->structreset(); // If user said so, try to rebuild php objects for specific struct vals. /// @todo should we raise a warning for class not found? - // shall we check for proper subclass of xmlrpcval instead of + // shall we check for proper subclass of xmlrpc value instead of // presence of _php_class to detect what we can do? - if (in_array('decode_php_objs', $options) && $xmlrpc_val->_php_class != '' - && class_exists($xmlrpc_val->_php_class)) - { - $obj = @new $xmlrpc_val->_php_class; - while(list($key,$value)=$xmlrpc_val->structeach()) - { + if (in_array('decode_php_objs', $options) && $xmlrpcVal->_php_class != '' + && class_exists($xmlrpcVal->_php_class) + ) { + $obj = @new $xmlrpcVal->_php_class(); + foreach ($xmlrpcVal as $key => $value) { $obj->$key = $this->decode($value, $options); } + return $obj; - } - else - { + } else { $arr = array(); - while(list($key,$value)=$xmlrpc_val->structeach()) - { + foreach ($xmlrpcVal as $key => $value) { $arr[$key] = $this->decode($value, $options); } + return $arr; } case 'msg': - $paramcount = $xmlrpc_val->getNumParams(); + $paramCount = $xmlrpcVal->getNumParams(); $arr = array(); - for($i = 0; $i < $paramcount; $i++) - { - $arr[] = $this->decode($xmlrpc_val->getParam($i)); + for ($i = 0; $i < $paramCount; $i++) { + $arr[] = $this->decode($xmlrpcVal->getParam($i), $options); } + return $arr; - } + } } /** * Takes native php types and encodes them into xmlrpc PHP object format. - * It will not re-encode xmlrpcval objects. + * It will not re-encode xmlrpc value objects. * * Feature creep -- could support more types via optional type argument * (string => datetime support has been added, ??? => base64 not yet) @@ -130,141 +126,142 @@ class Encoder * * @author Dan Libby (dan@libby.com) * - * @param mixed $php_val the value to be converted into an xmlrpcval object + * @param mixed $phpVal the value to be converted into an xmlrpc value object * @param array $options can include 'encode_php_objs', 'auto_dates', 'null_extension' or 'extension_api' - * @return xmlrpcval + * + * @return \PhpXmlrpc\Value */ - function encode($php_val, $options=array()) + public function encode($phpVal, $options = array()) { - $type = gettype($php_val); - switch($type) - { + $type = gettype($phpVal); + switch ($type) { case 'string': - if (in_array('auto_dates', $options) && preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $php_val)) - $xmlrpc_val = new Value($php_val, Value::$xmlrpcDateTime); - else - $xmlrpc_val = new Value($php_val, Value::$xmlrpcString); + if (in_array('auto_dates', $options) && preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $phpVal)) { + $xmlrpcVal = new Value($phpVal, Value::$xmlrpcDateTime); + } else { + $xmlrpcVal = new Value($phpVal, Value::$xmlrpcString); + } break; case 'integer': - $xmlrpc_val = new Value($php_val, Value::$xmlrpcInt); + $xmlrpcVal = new Value($phpVal, Value::$xmlrpcInt); break; case 'double': - $xmlrpc_val = new Value($php_val, Value::$xmlrpcDouble); + $xmlrpcVal = new Value($phpVal, Value::$xmlrpcDouble); break; - // - // Add support for encoding/decoding of booleans, since they are supported in PHP + // + // Add support for encoding/decoding of booleans, since they are supported in PHP case 'boolean': - $xmlrpc_val = new Value($php_val, Value::$xmlrpcBoolean); + $xmlrpcVal = new Value($phpVal, Value::$xmlrpcBoolean); break; - // + // case 'array': // PHP arrays can be encoded to either xmlrpc structs or arrays, // depending on wheter they are hashes or plain 0..n integer indexed // A shorter one-liner would be - // $tmp = array_diff(array_keys($php_val), range(0, count($php_val)-1)); + // $tmp = array_diff(array_keys($phpVal), range(0, count($phpVal)-1)); // but execution time skyrockets! $j = 0; $arr = array(); $ko = false; - foreach($php_val as $key => $val) - { + foreach ($phpVal as $key => $val) { $arr[$key] = $this->encode($val, $options); - if(!$ko && $key !== $j) - { + if (!$ko && $key !== $j) { $ko = true; } $j++; } - if($ko) - { - $xmlrpc_val = new Value($arr, Value::$xmlrpcStruct); - } - else - { - $xmlrpc_val = new Value($arr, Value::$xmlrpcArray); + if ($ko) { + $xmlrpcVal = new Value($arr, Value::$xmlrpcStruct); + } else { + $xmlrpcVal = new Value($arr, Value::$xmlrpcArray); } break; case 'object': - if(is_a($php_val, 'xmlrpcval')) - { - $xmlrpc_val = $php_val; - } - else if(is_a($php_val, 'DateTime')) - { - $xmlrpc_val = new Value($php_val->format('Ymd\TH:i:s'), Value::$xmlrpcStruct); - } - else - { + if (is_a($phpVal, 'PhpXmlRpc\Value')) { + $xmlrpcVal = $phpVal; + } elseif (is_a($phpVal, 'DateTime')) { + $xmlrpcVal = new Value($phpVal->format('Ymd\TH:i:s'), Value::$xmlrpcStruct); + } else { $arr = array(); - reset($php_val); - while(list($k,$v) = each($php_val)) - { + reset($phpVal); + while (list($k, $v) = each($phpVal)) { $arr[$k] = $this->encode($v, $options); } - $xmlrpc_val = new Value($arr, Value::$xmlrpcStruct); - if (in_array('encode_php_objs', $options)) - { - // let's save original class name into xmlrpcval: + $xmlrpcVal = new Value($arr, Value::$xmlrpcStruct); + if (in_array('encode_php_objs', $options)) { + // let's save original class name into xmlrpc value: // might be useful later on... - $xmlrpc_val->_php_class = get_class($php_val); + $xmlrpcVal->_php_class = get_class($phpVal); } } break; case 'NULL': - if (in_array('extension_api', $options)) - { - $xmlrpc_val = new Value('', Value::$xmlrpcString); - } - else if (in_array('null_extension', $options)) - { - $xmlrpc_val = new Value('', Value::$xmlrpcNull); - } - else - { - $xmlrpc_val = new Value(); + if (in_array('extension_api', $options)) { + $xmlrpcVal = new Value('', Value::$xmlrpcString); + } elseif (in_array('null_extension', $options)) { + $xmlrpcVal = new Value('', Value::$xmlrpcNull); + } else { + $xmlrpcVal = new Value(); } break; case 'resource': - if (in_array('extension_api', $options)) - { - $xmlrpc_val = new Value((int)$php_val, Value::$xmlrpcInt); - } - else - { - $xmlrpc_val = new Value(); + if (in_array('extension_api', $options)) { + $xmlrpcVal = new Value((int)$phpVal, Value::$xmlrpcInt); + } else { + $xmlrpcVal = new Value(); } // catch "user function", "unknown type" default: // giancarlo pinerolo // it has to return // an empty object in case, not a boolean. - $xmlrpc_val = new Value(); + $xmlrpcVal = new Value(); break; - } - return $xmlrpc_val; + } + + return $xmlrpcVal; } /** * Convert the xml representation of a method response, method request or single - * xmlrpc value into the appropriate object (a.k.a. deserialize) - * @param string $xml_val + * xmlrpc value into the appropriate object (a.k.a. deserialize). + * + * @param string $xmlVal * @param array $options + * * @return mixed false on error, or an instance of either Value, Request or Response */ - function decode_xml($xml_val, $options=array()) + public function decodeXml($xmlVal, $options = array()) { + // 'guestimate' encoding + $valEncoding = XMLParser::guessEncoding('', $xmlVal); + if ($valEncoding != '') { + + // Since parsing will fail if charset is not specified in the xml prologue, + // the encoding is not UTF8 and there are non-ascii chars in the text, we try to work round that... + // The following code might be better for mb_string enabled installs, but + // makes the lib about 200% slower... + //if (!is_valid_charset($valEncoding, array('UTF-8')) + if (!in_array($valEncoding, array('UTF-8', 'US-ASCII')) && !XMLParser::hasEncoding($xmlVal)) { + if ($valEncoding == 'ISO-8859-1') { + $xmlVal = utf8_encode($xmlVal); + } else { + if (extension_loaded('mbstring')) { + $xmlVal = mb_convert_encoding($xmlVal, 'UTF-8', $valEncoding); + } else { + error_log('XML-RPC: ' . __METHOD__ . ': invalid charset encoding of xml text: ' . $valEncoding); + } + } + } + } - /// @todo 'guestimate' encoding $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true); // What if internal encoding is not in one of the 3 allowed? // we use the broadest one, ie. utf8! - if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) - { + if (!in_array(PhpXmlRpc::$xmlrpc_internalencoding, array('UTF-8', 'ISO-8859-1', 'US-ASCII'))) { xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, 'UTF-8'); - } - else - { + } else { xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, PhpXmlRpc::$xmlrpc_internalencoding); } @@ -274,43 +271,42 @@ class Encoder xml_set_element_handler($parser, 'xmlrpc_se_any', 'xmlrpc_ee'); xml_set_character_data_handler($parser, 'xmlrpc_cd'); xml_set_default_handler($parser, 'xmlrpc_dh'); - if(!xml_parse($parser, $xml_val, 1)) - { + if (!xml_parse($parser, $xmlVal, 1)) { $errstr = sprintf('XML error: %s at line %d, column %d', - xml_error_string(xml_get_error_code($parser)), - xml_get_current_line_number($parser), xml_get_current_column_number($parser)); + xml_error_string(xml_get_error_code($parser)), + xml_get_current_line_number($parser), xml_get_current_column_number($parser)); error_log($errstr); xml_parser_free($parser); + return false; } xml_parser_free($parser); - if ($xmlRpcParser->_xh['isf'] > 1) // test that $xmlrpc->_xh['value'] is an obj, too??? - { + if ($xmlRpcParser->_xh['isf'] > 1) { + // test that $xmlrpc->_xh['value'] is an obj, too??? + error_log($xmlRpcParser->_xh['isf_reason']); + return false; } - switch ($xmlRpcParser->_xh['rt']) - { + switch ($xmlRpcParser->_xh['rt']) { case 'methodresponse': - $v =& $xmlRpcParser->_xh['value']; - if ($xmlRpcParser->_xh['isf'] == 1) - { - $vc = $v->structmem('faultCode'); - $vs = $v->structmem('faultString'); + $v = &$xmlRpcParser->_xh['value']; + if ($xmlRpcParser->_xh['isf'] == 1) { + $vc = $v['faultCode']; + $vs = $v['faultString']; $r = new Response(0, $vc->scalarval(), $vs->scalarval()); - } - else - { + } else { $r = new Response($v); } + return $r; case 'methodcall': - $m = new Request($xmlRpcParser->_xh['method']); - for($i=0; $i < count($xmlRpcParser->_xh['params']); $i++) - { - $m->addParam($xmlRpcParser->_xh['params'][$i]); + $req = new Request($xmlRpcParser->_xh['method']); + for ($i = 0; $i < count($xmlRpcParser->_xh['params']); $i++) { + $req->addParam($xmlRpcParser->_xh['params'][$i]); } - return $m; + + return $req; case 'value': return $xmlRpcParser->_xh['value']; default: @@ -318,103 +314,4 @@ class Encoder } } - -/** - * xml charset encoding guessing helper function. - * Tries to determine the charset encoding of an XML chunk received over HTTP. - * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type, - * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of unconforming (legacy?) clients/servers, - * which will be most probably using UTF-8 anyway... - * - * @param string $httpheader the http Content-type header - * @param string $xmlchunk xml content buffer - * @param string $encoding_prefs comma separated list of character encodings to be used as default (when mb extension is enabled) - * @return string - * - * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! - */ -function guess_encoding($httpheader='', $xmlchunk='', $encoding_prefs=null) -{ - // discussion: see http://www.yale.edu/pclt/encoding/ - // 1 - test if encoding is specified in HTTP HEADERS - - //Details: - // LWS: (\13\10)?( |\t)+ - // token: (any char but excluded stuff)+ - // quoted string: " (any char but double quotes and cointrol chars)* " - // header: Content-type = ...; charset=value(; ...)* - // where value is of type token, no LWS allowed between 'charset' and value - // Note: we do not check for invalid chars in VALUE: - // this had better be done using pure ereg as below - // Note 2: we might be removing whitespace/tabs that ought to be left in if - // the received charset is a quoted string. But nobody uses such charset names... - - /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? - $matches = array(); - if(preg_match('/;\s*charset\s*=([^;]+)/i', $httpheader, $matches)) - { - return strtoupper(trim($matches[1], " \t\"")); - } - - // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern - // (source: http://www.w3.org/TR/2000/REC-xml-20001006) - // NOTE: actually, according to the spec, even if we find the BOM and determine - // an encoding, we should check if there is an encoding specified - // in the xml declaration, and verify if they match. - /// @todo implement check as described above? - /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM) - if(preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlchunk)) - { - return 'UCS-4'; - } - elseif(preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlchunk)) - { - return 'UTF-16'; - } - elseif(preg_match('/^(\xEF\xBB\xBF)/', $xmlchunk)) - { - return 'UTF-8'; - } - - // 3 - test if encoding is specified in the xml declaration - // Details: - // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ - // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* - if (preg_match('/^<\?xml\s+version\s*=\s*'. "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))". - '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", - $xmlchunk, $matches)) - { - return strtoupper(substr($matches[2], 1, -1)); - } - - // 4 - if mbstring is available, let it do the guesswork - // NB: we favour finding an encoding that is compatible with what we can process - if(extension_loaded('mbstring')) - { - if($encoding_prefs) - { - $enc = mb_detect_encoding($xmlchunk, $encoding_prefs); - } - else - { - $enc = mb_detect_encoding($xmlchunk); - } - // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... - // IANA also likes better US-ASCII, so go with it - if($enc == 'ASCII') - { - $enc = 'US-'.$enc; - } - return $enc; - } - else - { - // no encoding specified: as per HTTP1.1 assume it is iso-8859-1? - // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types - // this should be the standard. And we should be getting text/xml as request and response. - // BUT we have to be backward compatible with the lib, which always used UTF-8 as default... - return PhpXmlRpc::$xmlrpc_defencoding; - } } - -} \ No newline at end of file