X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2FHelper%2FXMLParser.php;h=f6e79a9a0fa62fc02c88883e5384e2c39e688ed3;hb=b337d292eb5b5656d27a2fc9ab6796be300c59a3;hp=b4798ccf97c21b4b7435b2f2abd53b88d2cd915a;hpb=b0a92b19241515e3484421bfc08ec13a54a2eb1c;p=plcapi.git
diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php
index b4798ccf..f6e79a9a 100644
--- a/src/Helper/XMLParser.php
+++ b/src/Helper/XMLParser.php
@@ -7,30 +7,48 @@ use PhpXmlRpc\Value;
/**
* Deals with parsing the XML.
+ * @see http://xmlrpc.com/spec.md
+ *
+ * @todo implement an interface to allow for alternative implementations
+ * - make access to $_xh protected, return more high-level data structures
+ * - add parseRequest, parseResponse, parseValue methods
+ * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding
+ * while parsing, which is faster than doing it later by going over the rebuilt data structure
*/
class XMLParser
{
- // used to store state during parsing
- // quick explanation of components:
- // ac - used to accumulate values
- // stack - array with genealogy of xml elements names:
- // used to validate nesting of xmlrpc elements
- // valuestack - array used for parsing arrays and structs
- // lv - used to indicate "looking for a value": implements
- // the logic to allow values with no types to be strings
- // isf - used to indicate a parsing fault (2) or xmlrpc response fault (1)
- // isf_reason - used for storing xmlrpc response fault string
- // method - used to store method name
- // params - used to store parameters in method calls
- // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
- // rt - 'methodcall or 'methodresponse'
+ const RETURN_XMLRPCVALS = 'xmlrpcvals';
+ const RETURN_EPIVALS = 'epivals';
+ const RETURN_PHP = 'phpvals';
+
+ const ACCEPT_REQUEST = 1;
+ const ACCEPT_RESPONSE = 2;
+ const ACCEPT_VALUE = 4;
+ const ACCEPT_FAULT = 8;
+
+ // Used to store state during parsing and to pass parsing results to callers.
+ // Quick explanation of components:
+ // private:
+ // ac - used to accumulate values
+ // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements
+ // valuestack - array used for parsing arrays and structs
+ // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
+ // public:
+ // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1)
+ // isf_reason - used for storing xmlrpc response fault string
+ // value - used to store the value in responses
+ // method - used to store method name in requests
+ // params - used to store parameters in requests
+ // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
+ // rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode)
public $_xh = array(
'ac' => '',
'stack' => array(),
'valuestack' => array(),
'isf' => 0,
'isf_reason' => '',
- 'method' => false, // so we can check later if we got a methodname or not
+ 'value' => null,
+ 'method' => false,
'params' => array(),
'pt' => array(),
'rt' => '',
@@ -41,6 +59,7 @@ class XMLParser
'BOOLEAN' => array('VALUE'),
'I4' => array('VALUE'),
'I8' => array('VALUE'),
+ 'EX:I8' => array('VALUE'),
'INT' => array('VALUE'),
'STRING' => array('VALUE'),
'DOUBLE' => array('VALUE'),
@@ -59,27 +78,132 @@ class XMLParser
'EX:NIL' => array('VALUE'), // only used when extension activated
);
+ /** @var array $parsing_options */
+ protected $parsing_options = array();
+ /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
+ protected $accept = 3;
+ /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
+ protected $maxChunkLength = 4194304;
+
+ /**
+ * @param array $options passed to the xml parser
+ */
+ public function __construct(array $options = array())
+ {
+ $this->parsing_options = $options;
+ }
+
+ /**
+ * @param string $data
+ * @param string $returnType
+ * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
+ * @param array $options
+ */
+ public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array())
+ {
+ $this->_xh = array(
+ 'ac' => '',
+ 'stack' => array(),
+ 'valuestack' => array(),
+ 'isf' => 0,
+ 'isf_reason' => '',
+ 'value' => null,
+ 'method' => false, // so we can check later if we got a methodname or not
+ 'params' => array(),
+ 'pt' => array(),
+ 'rt' => '',
+ );
+
+ $len = strlen($data);
+
+ // we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below
+ if ($len == 0) {
+ $this->_xh['isf'] = 3;
+ $this->_xh['isf_reason'] = 'XML error 5: empty document';
+ return;
+ }
+
+ $parser = xml_parser_create();
+
+ foreach ($this->parsing_options as $key => $val) {
+ xml_parser_set_option($parser, $key, $val);
+ }
+ foreach ($options as $key => $val) {
+ xml_parser_set_option($parser, $key, $val);
+ }
+ // always set this, in case someone tries to disable it via options...
+ xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1);
+
+ xml_set_object($parser, $this);
+
+ switch($returnType) {
+ case self::RETURN_PHP:
+ xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
+ break;
+ case self::RETURN_EPIVALS:
+ xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi');
+ break;
+ default:
+ xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
+ }
+
+ xml_set_character_data_handler($parser, 'xmlrpc_cd');
+ xml_set_default_handler($parser, 'xmlrpc_dh');
+
+ $this->accept = $accept;
+
+ // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors
+ for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) {
+ $chunk = substr($data, $offset, $this->maxChunkLength);
+ // error handling: xml not well formed
+ if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) {
+ $errCode = xml_get_error_code($parser);
+ $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
+ xml_get_current_line_number($parser), xml_get_current_column_number($parser));
+
+ $this->_xh['isf'] = 3;
+ $this->_xh['isf_reason'] = $errStr;
+ break;
+ }
+ }
+
+ xml_parser_free($parser);
+ }
+
/**
* xml parser handler function for opening element tags.
+ * @internal
+ * @param resource $parser
+ * @param string $name
+ * @param $attrs
+ * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
*/
public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
{
// if invalid xmlrpc already detected, skip all processing
if ($this->_xh['isf'] < 2) {
+
// check for correct element nesting
- // top level element can only be of 2 types
- /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
- /// there is only a single top level element in xml anyway
if (count($this->_xh['stack']) == 0) {
- if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && (
- $name != 'VALUE' && !$acceptSingleVals)
- ) {
+ // top level element can only be of 2 types
+ /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
+ /// there is only a single top level element in xml anyway
+ // BC
+ if ($acceptSingleVals === false) {
+ $accept = $this->accept;
+ } else {
+ $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE;
+ }
+ if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) ||
+ ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) ||
+ ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) ||
+ ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) {
+ $this->_xh['rt'] = strtolower($name);
+ } else {
$this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = 'missing top level xmlrpc element';
+ $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
return;
- } else {
- $this->_xh['rt'] = strtolower($name);
}
} else {
// not top level element: see if parent is OK
@@ -101,8 +225,17 @@ class XMLParser
$this->_xh['lv'] = 1;
$this->_xh['php_class'] = null;
break;
- case 'I4':
case 'I8':
+ case 'EX:I8':
+ if (PHP_INT_SIZE === 4) {
+ // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
+ $this->_xh['isf'] = 2;
+ $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
+
+ return;
+ }
+ // fall through voluntarily
+ case 'I4':
case 'INT':
case 'STRING':
case 'BOOLEAN':
@@ -110,7 +243,7 @@ class XMLParser
case 'DATETIME.ISO8601':
case 'BASE64':
if ($this->_xh['vt'] != 'value') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
@@ -121,7 +254,7 @@ class XMLParser
case 'STRUCT':
case 'ARRAY':
if ($this->_xh['vt'] != 'value') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
@@ -141,7 +274,7 @@ class XMLParser
break;
case 'DATA':
if ($this->_xh['vt'] != 'data') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "found two data elements inside an array element";
@@ -161,7 +294,8 @@ class XMLParser
$this->_xh['isf'] = 1;
break;
case 'MEMBER':
- $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on
+ // set member name to null, in case we do not find in the xml later on
+ $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = '';
//$this->_xh['ac']='';
// Drop trough intentionally
case 'PARAM':
@@ -172,7 +306,7 @@ class XMLParser
case 'EX:NIL':
if (PhpXmlRpc::$xmlrpc_null_extension) {
if ($this->_xh['vt'] != 'value') {
- //two data elements inside a value: an error occurred!
+ // two data elements inside a value: an error occurred!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
@@ -184,7 +318,7 @@ class XMLParser
// we do not support the extension, so
// drop through intentionally
default:
- /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
+ // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
$this->_xh['isf'] = 2;
$this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
break;
@@ -201,7 +335,12 @@ class XMLParser
}
/**
- * Used in decoding xml chunks that might represent single xmlrpc values.
+ * xml parser handler function for opening element tags.
+ * Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses.
+ * @deprecated
+ * @param resource $parser
+ * @param $name
+ * @param $attrs
*/
public function xmlrpc_se_any($parser, $name, $attrs)
{
@@ -210,8 +349,12 @@ class XMLParser
/**
* xml parser handler function for close element tags.
+ * @internal
+ * @param resource $parser
+ * @param string $name
+ * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility
*/
- public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true)
+ public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1)
{
if ($this->_xh['isf'] < 2) {
// push this element name from stack
@@ -228,7 +371,7 @@ class XMLParser
$this->_xh['vt'] = Value::$xmlrpcString;
}
- if ($rebuildXmlrpcvals) {
+ if ($rebuildXmlrpcvals > 0) {
// build the xmlrpc val out of the data received, and substitute it
$temp = new Value($this->_xh['value'], $this->_xh['vt']);
// in case we got info about underlying php class, save it
@@ -236,32 +379,39 @@ class XMLParser
if (isset($this->_xh['php_class'])) {
$temp->_php_class = $this->_xh['php_class'];
}
- // check if we are inside an array or struct:
- // if value just built is inside an array, let's move it into array on the stack
- $vscount = count($this->_xh['valuestack']);
- if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
- $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp;
- } else {
- $this->_xh['value'] = $temp;
+ $this->_xh['value'] = $temp;
+ } elseif ($rebuildXmlrpcvals < 0) {
+ if ($this->_xh['vt'] == Value::$xmlrpcDateTime) {
+ $this->_xh['value'] = (object)array(
+ 'xmlrpc_type' => 'datetime',
+ 'scalar' => $this->_xh['value'],
+ 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value'])
+ );
+ } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) {
+ $this->_xh['value'] = (object)array(
+ 'xmlrpc_type' => 'base64',
+ 'scalar' => $this->_xh['value']
+ );
}
} else {
- /// @todo this needs to treat correctly php-serialized objects,
+ /// @todo this should handle php-serialized objects,
/// since std deserializing is done by php_xmlrpc_decode,
/// which we will not be calling...
- if (isset($this->_xh['php_class'])) {
- }
+ //if (isset($this->_xh['php_class'])) {
+ //}
+ }
- // check if we are inside an array or struct:
- // if value just built is inside an array, let's move it into array on the stack
- $vscount = count($this->_xh['valuestack']);
- if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
- $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
- }
+ // check if we are inside an array or struct:
+ // if value just built is inside an array, let's move it into array on the stack
+ $vscount = count($this->_xh['valuestack']);
+ if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
+ $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
}
break;
case 'BOOLEAN':
case 'I4':
case 'I8':
+ case 'EX:I8':
case 'INT':
case 'STRING':
case 'DOUBLE':
@@ -274,7 +424,7 @@ class XMLParser
$this->_xh['value'] = $this->_xh['ac'];
} elseif ($name == 'DATETIME.ISO8601') {
if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) {
- error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']);
+ Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']);
}
$this->_xh['vt'] = Value::$xmlrpcDateTime;
$this->_xh['value'] = $this->_xh['ac'];
@@ -293,7 +443,7 @@ class XMLParser
} else {
// log if receiving something strange, even though we set the value to false anyway
if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) {
- error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']);
+ Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']);
}
$this->_xh['value'] = false;
}
@@ -303,18 +453,18 @@ class XMLParser
// NOTE: regexp could be much stricter than this...
if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) {
/// @todo: find a better way of throwing an error than this!
- error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']);
+ Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']);
$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
} else {
// it's ok, add it on
$this->_xh['value'] = (double)$this->_xh['ac'];
}
} else {
- // we have an I4/INT
+ // we have an I4/I8/INT
// we must check that only 0123456789- are characters here
if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) {
/// @todo find a better way of throwing an error than this!
- error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']);
+ Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']);
$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
} else {
// it's ok, add it on
@@ -333,7 +483,7 @@ class XMLParser
$vscount = count($this->_xh['valuestack']);
$this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
} else {
- error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml');
+ Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml');
}
break;
case 'DATA':
@@ -356,7 +506,7 @@ class XMLParser
$this->_xh['params'][] = $this->_xh['value'];
$this->_xh['pt'][] = $this->_xh['vt'];
} else {
- error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');
+ Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');
}
break;
case 'METHODNAME':
@@ -386,14 +536,31 @@ class XMLParser
/**
* Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values.
+ * @internal
+ * @param resource $parser
+ * @param string $name
*/
public function xmlrpc_ee_fast($parser, $name)
{
- $this->xmlrpc_ee($parser, $name, false);
+ $this->xmlrpc_ee($parser, $name, 0);
+ }
+
+ /**
+ * Used in decoding xmlrpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).
+ * @internal
+ * @param resource $parser
+ * @param string $name
+ */
+ public function xmlrpc_ee_epi($parser, $name)
+ {
+ $this->xmlrpc_ee($parser, $name, -1);
}
/**
* xml parser handler function for character data.
+ * @internal
+ * @param resource $parser
+ * @param string $data
*/
public function xmlrpc_cd($parser, $data)
{
@@ -410,6 +577,9 @@ class XMLParser
/**
* xml parser handler function for 'other stuff', ie. not char data or
* element start/end tag. In fact it only gets called on unknown entities...
+ * @internal
+ * @param $parser
+ * @param string data
*/
public function xmlrpc_dh($parser, $data)
{
@@ -420,7 +590,7 @@ class XMLParser
}
}
- return true;
+ //return true;
}
/**