From: gggeek Date: Sun, 22 Jan 2023 16:40:07 +0000 (+0000) Subject: allow the library to use a stricter mode when parsing invalid xml X-Git-Tag: 4.10.0~107 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=0e4ac06681f2ae6e83072bca1d69622330dac7cd;p=plcapi.git allow the library to use a stricter mode when parsing invalid xml --- diff --git a/NEWS.md b/NEWS.md index 01224a20..02d949eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,12 +18,17 @@ In order to enable this, you should set `PhpXmlRpc\PhpXmlRpc::$xmlrpc_return_datetimes = true`. NB: since the xml-rpc spec mandates that no Timezone is used on the wire for dateTime values, the DateTime objects created by the library will be set to the default php timzeone, set using the 'date.timezone' ini setting. + NB: if the received strings are not parseable as dates, NULL will be returned instead of an object. + +* new: allow the library to be stricter in parsing the received xml: by setting + `PhpXmlRpc\PhpXmlRpc::$xmlrpc_reject_invalid_values = true`, incoming xml which has data not conforming to the expected + format for value elements of type date, int, float, double, base64 will be rejected instead of passed on to the application * new: it is now possible to tell the library to allow non-standard formats for received datetime value, such as f.e. datetimes with a timezone specifier, by setting a custom value to `PhpXmlRpc\PhpXmlRpc::$xmlrpc_datetime_format`. Note that the regex used by default to validate the incoming date strings has been tightened not to accept clearly - invalid dates such as a month '13', day '32' or hour '25'. The vale '60' is now allowed for seconds, as required by - the ISO 8601 standard for leap seconds. + invalid dates such as a month '13', day '32' or hour '25'. The value '60' is now allowed for seconds, as required for + leap seconds by the ISO 8601 standard. * fixed: when calling `Client::multicall()` with `$client->return_type = 'xml'`, we would be always falling back to non-multicall requests @@ -41,7 +46,8 @@ closer to supporting DIC patterns * new: when calling `Wrapper::wrapXmlrpcMethod` and `wrapXmlrpcServer`, it is possible to pass 'throw_on_fault' as option - to argument `$extraOptions`. This will make the generated function throw on errors instead of returning a Response object + to argument `$extraOptions`. This will make the generated function throw on http errors and xml-rpc faults instead of + returning a Response object * new: when calling `Wrapper::wrapXmlrpcMethod`, `wrapXmlrpcServer`, `wrapPhpFunction` and `wrapPhpClass` it is possible to pass 'encode_nulls' as option to argument `$extraOptions`. This will make the generated code emit a '' @@ -64,8 +70,8 @@ * improved: removed usage of `extension_loaded` in favour of `function_exists` when checking for mbstring. This allows for mbstring functions to be polyfilled -* improved: the code generated by the various code-generating methods of `Wrapper` are formatted slightly better, and - include more phpdoc blocks too +* improved: the code generated by the various code-generating methods of `Wrapper` are formatted better, and include + more phpdoc blocks too * improved: made the `Wrapper` and `Client` classes easy to subclass for use by the PhpJsonRpc library diff --git a/src/Helper/XMLParser.php b/src/Helper/XMLParser.php index 1666a82a..fefecc09 100644 --- a/src/Helper/XMLParser.php +++ b/src/Helper/XMLParser.php @@ -98,7 +98,7 @@ class XMLParser //protected $accept = 3; /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ protected $maxChunkLength = 4194304; - /** @var array */ + /** @var array supported keys: accept, target_charset, methodname_callback, xmlrpc_null_extension, xmlrpc_return_datetimes */ protected $current_parsing_options = array(); public function getLogger() @@ -120,7 +120,12 @@ class XMLParser /** * @param array $options integer keys: options passed to the xml parser - * string keys: target_charset, methodname_callback, xmlrpc_null_extension, xmlrpc_return_datetimes + * string keys: + * - target_charset (string) + * - methodname_callback (callable) + * - xmlrpc_null_extension (bool) + * - xmlrpc_return_datetimes (bool) + * - xmlrpc_reject_invalid_values (bool) */ public function __construct(array $options = array()) { @@ -132,7 +137,9 @@ class XMLParser * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE * @param array $options integer-key options are passed to the xml parser, string-key options are used independently. - * These options are the options received in the constructor. + * These options are added to options received in the constructor. + * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values + * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used * @return void the caller has to look into $this->_xh to find the results * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) * @@ -197,6 +204,7 @@ class XMLParser case 'xmlrpc_null_extension': case 'xmlrpc_return_datetimes': + case 'xmlrpc_reject_invalid_values': $this->current_parsing_options[$key] = $val; break; @@ -213,6 +221,9 @@ class XMLParser if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) { $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes; } + if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) { + $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values; + } // NB: we use '' instead of null to force charset detection from the xml declaration $parser = xml_parser_create(''); @@ -544,72 +555,119 @@ class XMLParser } break; + case 'STRING': + $this->_xh['vt'] = strtolower($name); + $this->_xh['lv'] = 3; // indicate we've found a value + $this->_xh['value'] = $this->_xh['ac']; + break; + case 'BOOLEAN': + $this->_xh['vt'] = strtolower($name); + $this->_xh['lv'] = 3; // indicate we've found a value + // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted, + // even though the spec never mentions them (see e.g. Blogger api docs) + // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here + if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) { + $this->_xh['value'] = true; + } else { + // log if receiving something strange, even though we set the value to false anyway + if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid data received in BOOLEAN value: ' . $this->_xh['ac']); + if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) + { + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = 'Invalid data received in BOOLEAN value: ' . $this->_xh['ac']; + return; + } + } + $this->_xh['value'] = false; + } + break; + case 'I4': case 'I8': case 'EX:I8': case 'INT': - case 'STRING': - case 'DOUBLE': - case 'DATETIME.ISO8601': - case 'BASE64': $this->_xh['vt'] = strtolower($name); - /// @todo: optimization creep - remove the if/elseif cycle below - /// since the case() in which we are already did that - if ($name == 'STRING') { - $this->_xh['value'] = $this->_xh['ac']; - } elseif ($name == 'DATETIME.ISO8601') { - if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) { - $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); + $this->_xh['lv'] = 3; // indicate we've found a value + // we must check that only 0123456789- are characters here + if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric data received in INT: ' . $this->_xh['ac']); + if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) + { + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = 'Non numeric data received in INT value: ' . $this->_xh['ac']; + return; } - $this->_xh['vt'] = Value::$xmlrpcDateTime; - if ($this->current_parsing_options['xmlrpc_return_datetimes']) { - $this->_xh['value'] = new \DateTime($this->_xh['ac']); - } else { - $this->_xh['value'] = $this->_xh['ac']; + /// @todo: find a better way of reporting an error value than this! Use NaN? + $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; + } else { + // it's ok, add it on + $this->_xh['value'] = (int)$this->_xh['ac']; + } + break; + + case 'DOUBLE': + $this->_xh['vt'] = strtolower($name); + $this->_xh['lv'] = 3; // indicate we've found a value + // we must check that only 0123456789-. are characters here + // NOTE: regexp could be much stricter than this... + if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric data received in DOUBLE value: ' . $this->_xh['ac']); + if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) + { + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = 'Non numeric data received in DOUBLE value: ' . $this->_xh['ac']; + return; } - } elseif ($name == 'BASE64') { - /// @todo check for failure of base64 decoding / catch warnings - $this->_xh['value'] = base64_decode($this->_xh['ac']); - } elseif ($name == 'BOOLEAN') { - // special case here: we translate boolean 1 or 0 into PHP constants true or false. - // Strings 'true' and 'false' are accepted, even though the spec never mentions them (see e.g. - // Blogger api docs) - // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here - if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) { - $this->_xh['value'] = true; - } else { - // log if receiving something strange, even though we set the value to false anyway - if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { - $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); - } - $this->_xh['value'] = false; + + $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; + } else { + // it's ok, add it on + $this->_xh['value'] = (double)$this->_xh['ac']; + } + break; + + case 'DATETIME.ISO8601': + $this->_xh['vt'] = strtolower($name); + $this->_xh['lv'] = 3; // indicate we've found a value + if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) { + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid data received in DATETIME value: ' . $this->_xh['ac']); + if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) + { + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = 'Invalid data received in DATETIME value: ' . $this->_xh['ac']; + return; } - } elseif ($name == 'DOUBLE') { - // we have a DOUBLE - // we must check that only 0123456789-. are characters here - // NOTE: regexp could be much stricter than this... - if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { - /// @todo: find a better way of throwing an error than this! - $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); - $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; - } else { - // it's ok, add it on - $this->_xh['value'] = (double)$this->_xh['ac']; + } + $this->_xh['vt'] = Value::$xmlrpcDateTime; + if ($this->current_parsing_options['xmlrpc_return_datetimes']) { + try { + $this->_xh['value'] = new \DateTime($this->_xh['ac']); + } catch(\Exception $e) { + // q: what to do? we can not guarantee that a valid date can be created. Return null or throw? + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': ' . $e->getMessage()); + $this->_xh['value'] = null; } } else { - // we have an I4/I8/INT - // we must check that only 0123456789- are characters here - if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { - /// @todo find a better way of throwing an error than this! - $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); - $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; - } else { - // it's ok, add it on - $this->_xh['value'] = (int)$this->_xh['ac']; - } + $this->_xh['value'] = $this->_xh['ac']; } + break; + + case 'BASE64': + $this->_xh['vt'] = strtolower($name); $this->_xh['lv'] = 3; // indicate we've found a value + /// @todo check: should we silence warnings here? + $v = base64_decode($this->_xh['ac']); + if ($v === false) { + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value'); + if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value'; + return; + } + } + $this->_xh['value'] = $v; break; case 'NAME': @@ -620,8 +678,13 @@ class XMLParser // add to array in the stack the last element built, unless no VALUE was found if ($this->_xh['vt']) { $vscount = count($this->_xh['valuestack']); + if (!isset($this->_xh['valuestack'][$vscount - 1]['name'])) { + /// @todo handle the case of the NAME element actually following the VALUE in the xml!!! + $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': missing NAME inside STRUCT in received xml'); + } $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; } else { + /// @todo return a parsing error? $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); } break; @@ -647,6 +710,7 @@ class XMLParser $this->_xh['params'][] = $this->_xh['value']; $this->_xh['pt'][] = $this->_xh['vt']; } else { + /// @todo return a parsing error? $this->getLogger()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); } break; @@ -668,9 +732,13 @@ class XMLParser $this->_xh['value'] = null; $this->_xh['lv'] = 3; break; + } else if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { + $this->_xh['isf'] = 2; + $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension'; + return; } + // drop through intentionally if nil extension not enabled - // drop through intentionally if nil extension not enabled case 'PARAMS': case 'FAULT': case 'METHODCALL': @@ -680,7 +748,7 @@ class XMLParser default: // End of INVALID ELEMENT // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se, - // + // $this->_xh['isf'] is set to 2 break; } } diff --git a/src/PhpXmlRpc.php b/src/PhpXmlRpc.php index 22c4b64a..c6c01b3b 100644 --- a/src/PhpXmlRpc.php +++ b/src/PhpXmlRpc.php @@ -137,10 +137,19 @@ class PhpXmlRpc /** * @var bool - * Set to TRUE to make the library use DateTime objects instead of strings for all values parsed from incoming XML + * Set to TRUE to make the library use DateTime objects instead of strings for all values parsed from incoming XML. + * NB: if the received strings are not parseable as dates, NULL will be returned! */ public static $xmlrpc_return_datetimes = false; + /** + * @var bool + * Set to TRUE to make the library reject incoming xml which uses invalid data from xml-rpc Value elements, such + * as base64 strings which can not be decoded, dateTime strings which do not represent a valid date, invalid bools, + * floats and inteers + */ + public static $xmlrpc_reject_invalid_values = false; + /** * @var bool * Set to TRUE to enable encoding of php NULL values to instead of