WIP introduce namespaces; move all global functions and variables to classes
[plcapi.git] / src / Helper / XMLParser.php
1 <?php
2
3 namespace PhpXmlRpc\Helper;
4
5 use PhpXmlRpc\PhpXmlRpc;
6 use PhpXmlRpc\Value;
7
8 /**
9  * Deals with parsing the XML
10  */
11 class XMLParser
12 {
13     // used to store state during parsing
14     // quick explanation of components:
15     //   ac - used to accumulate values
16     //   stack - array with genealogy of xml elements names:
17     //           used to validate nesting of xmlrpc elements
18     //   valuestack - array used for parsing arrays and structs
19     //   lv - used to indicate "looking for a value": implements
20     //        the logic to allow values with no types to be strings
21     //   isf - used to indicate a parsing fault (2) or xmlrpcresp fault (1)
22     //   isf_reason - used for storing xmlrpcresp fault string
23     //   method - used to store method name
24     //   params - used to store parameters in method calls
25     //   pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
26     //   rt  - 'methodcall or 'methodresponse'
27     public $_xh = array(
28         'ac' => '',
29         'stack' => array(),
30         'valuestack' => array(),
31         'isf' => 0,
32         'isf_reason' => '',
33         'method' => false, // so we can check later if we got a methodname or not
34         'params' => array(),
35         'pt' => array(),
36         'rt' => ''
37     );
38
39     public $xmlrpc_valid_parents = array(
40         'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
41         'BOOLEAN' => array('VALUE'),
42         'I4' => array('VALUE'),
43         'INT' => array('VALUE'),
44         'STRING' => array('VALUE'),
45         'DOUBLE' => array('VALUE'),
46         'DATETIME.ISO8601' => array('VALUE'),
47         'BASE64' => array('VALUE'),
48         'MEMBER' => array('STRUCT'),
49         'NAME' => array('MEMBER'),
50         'DATA' => array('ARRAY'),
51         'ARRAY' => array('VALUE'),
52         'STRUCT' => array('VALUE'),
53         'PARAM' => array('PARAMS'),
54         'METHODNAME' => array('METHODCALL'),
55         'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
56         'FAULT' => array('METHODRESPONSE'),
57         'NIL' => array('VALUE'), // only used when extension activated
58         'EX:NIL' => array('VALUE') // only used when extension activated
59     );
60
61     /**
62      * xml parser handler function for opening element tags
63      */
64     function xmlrpc_se($parser, $name, $attrs, $accept_single_vals=false)
65     {
66         // if invalid xmlrpc already detected, skip all processing
67         if ($this->_xh['isf'] < 2)
68         {
69             // check for correct element nesting
70             // top level element can only be of 2 types
71             /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
72             ///       there is only a single top level element in xml anyway
73             if (count($this->_xh['stack']) == 0)
74             {
75                 if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && (
76                         $name != 'VALUE' && !$accept_single_vals))
77                 {
78                     $this->_xh['isf'] = 2;
79                     $this->_xh['isf_reason'] = 'missing top level xmlrpc element';
80                     return;
81                 }
82                 else
83                 {
84                     $this->_xh['rt'] = strtolower($name);
85                 }
86             }
87             else
88             {
89                 // not top level element: see if parent is OK
90                 $parent = end($this->_xh['stack']);
91                 if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name]))
92                 {
93                     $this->_xh['isf'] = 2;
94                     $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
95                     return;
96                 }
97             }
98
99             switch($name)
100             {
101                 // optimize for speed switch cases: most common cases first
102                 case 'VALUE':
103                     /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
104                     $this->_xh['vt']='value'; // indicator: no value found yet
105                     $this->_xh['ac']='';
106                     $this->_xh['lv']=1;
107                     $this->_xh['php_class']=null;
108                     break;
109                 case 'I4':
110                 case 'INT':
111                 case 'STRING':
112                 case 'BOOLEAN':
113                 case 'DOUBLE':
114                 case 'DATETIME.ISO8601':
115                 case 'BASE64':
116                     if ($this->_xh['vt']!='value')
117                     {
118                         //two data elements inside a value: an error occurred!
119                         $this->_xh['isf'] = 2;
120                         $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
121                         return;
122                     }
123                     $this->_xh['ac']=''; // reset the accumulator
124                     break;
125                 case 'STRUCT':
126                 case 'ARRAY':
127                     if ($this->_xh['vt']!='value')
128                     {
129                         //two data elements inside a value: an error occurred!
130                         $this->_xh['isf'] = 2;
131                         $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
132                         return;
133                     }
134                     // create an empty array to hold child values, and push it onto appropriate stack
135                     $cur_val = array();
136                     $cur_val['values'] = array();
137                     $cur_val['type'] = $name;
138                     // check for out-of-band information to rebuild php objs
139                     // and in case it is found, save it
140                     if (@isset($attrs['PHP_CLASS']))
141                     {
142                         $cur_val['php_class'] = $attrs['PHP_CLASS'];
143                     }
144                     $this->_xh['valuestack'][] = $cur_val;
145                     $this->_xh['vt']='data'; // be prepared for a data element next
146                     break;
147                 case 'DATA':
148                     if ($this->_xh['vt']!='data')
149                     {
150                         //two data elements inside a value: an error occurred!
151                         $this->_xh['isf'] = 2;
152                         $this->_xh['isf_reason'] = "found two data elements inside an array element";
153                         return;
154                     }
155                 case 'METHODCALL':
156                 case 'METHODRESPONSE':
157                 case 'PARAMS':
158                     // valid elements that add little to processing
159                     break;
160                 case 'METHODNAME':
161                 case 'NAME':
162                     /// @todo we could check for 2 NAME elements inside a MEMBER element
163                     $this->_xh['ac']='';
164                     break;
165                 case 'FAULT':
166                     $this->_xh['isf']=1;
167                     break;
168                 case 'MEMBER':
169                     $this->_xh['valuestack'][count($this->_xh['valuestack'])-1]['name']=''; // set member name to null, in case we do not find in the xml later on
170                     //$this->_xh['ac']='';
171                 // Drop trough intentionally
172                 case 'PARAM':
173                     // clear value type, so we can check later if no value has been passed for this param/member
174                     $this->_xh['vt']=null;
175                     break;
176                 case 'NIL':
177                 case 'EX:NIL':
178                     if (PhpXmlRpc::$xmlrpc_null_extension)
179                     {
180                         if ($this->_xh['vt']!='value')
181                         {
182                             //two data elements inside a value: an error occurred!
183                             $this->_xh['isf'] = 2;
184                             $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
185                             return;
186                         }
187                         $this->_xh['ac']=''; // reset the accumulator
188                         break;
189                     }
190                 // we do not support the <NIL/> extension, so
191                 // drop through intentionally
192                 default:
193                     /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
194                     $this->_xh['isf'] = 2;
195                     $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
196                     break;
197             }
198
199             // Save current element name to stack, to validate nesting
200             $this->_xh['stack'][] = $name;
201
202             /// @todo optimization creep: move this inside the big switch() above
203             if($name!='VALUE')
204             {
205                 $this->_xh['lv']=0;
206             }
207         }
208     }
209
210     /**
211      * Used in decoding xml chunks that might represent single xmlrpc values
212      */
213     function xmlrpc_se_any($parser, $name, $attrs)
214     {
215         $this->xmlrpc_se($parser, $name, $attrs, true);
216     }
217
218     /**
219      * xml parser handler function for close element tags
220      */
221     function xmlrpc_ee($parser, $name, $rebuild_xmlrpcvals = true)
222     {
223         if ($this->_xh['isf'] < 2)
224         {
225             // push this element name from stack
226             // NB: if XML validates, correct opening/closing is guaranteed and
227             // we do not have to check for $name == $curr_elem.
228             // we also checked for proper nesting at start of elements...
229             $curr_elem = array_pop($this->_xh['stack']);
230
231             switch($name)
232             {
233                 case 'VALUE':
234                     // This if() detects if no scalar was inside <VALUE></VALUE>
235                     if ($this->_xh['vt']=='value')
236                     {
237                         $this->_xh['value']=$this->_xh['ac'];
238                         $this->_xh['vt']=Value::$xmlrpcString;
239                     }
240
241                     if ($rebuild_xmlrpcvals)
242                     {
243                         // build the xmlrpc val out of the data received, and substitute it
244                         $temp = new Value($this->_xh['value'], $this->_xh['vt']);
245                         // in case we got info about underlying php class, save it
246                         // in the object we're rebuilding
247                         if (isset($this->_xh['php_class']))
248                             $temp->_php_class = $this->_xh['php_class'];
249                         // check if we are inside an array or struct:
250                         // if value just built is inside an array, let's move it into array on the stack
251                         $vscount = count($this->_xh['valuestack']);
252                         if ($vscount && $this->_xh['valuestack'][$vscount-1]['type']=='ARRAY')
253                         {
254                             $this->_xh['valuestack'][$vscount-1]['values'][] = $temp;
255                         }
256                         else
257                         {
258                             $this->_xh['value'] = $temp;
259                         }
260                     }
261                     else
262                     {
263                         /// @todo this needs to treat correctly php-serialized objects,
264                         /// since std deserializing is done by php_xmlrpc_decode,
265                         /// which we will not be calling...
266                         if (isset($this->_xh['php_class']))
267                         {
268                         }
269
270                         // check if we are inside an array or struct:
271                         // if value just built is inside an array, let's move it into array on the stack
272                         $vscount = count($this->_xh['valuestack']);
273                         if ($vscount && $this->_xh['valuestack'][$vscount-1]['type']=='ARRAY')
274                         {
275                             $this->_xh['valuestack'][$vscount-1]['values'][] = $this->_xh['value'];
276                         }
277                     }
278                     break;
279                 case 'BOOLEAN':
280                 case 'I4':
281                 case 'INT':
282                 case 'STRING':
283                 case 'DOUBLE':
284                 case 'DATETIME.ISO8601':
285                 case 'BASE64':
286                     $this->_xh['vt']=strtolower($name);
287                     /// @todo: optimization creep - remove the if/elseif cycle below
288                     /// since the case() in which we are already did that
289                     if ($name=='STRING')
290                     {
291                         $this->_xh['value']=$this->_xh['ac'];
292                     }
293                     elseif ($name=='DATETIME.ISO8601')
294                     {
295                         if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac']))
296                         {
297                             error_log('XML-RPC: invalid value received in DATETIME: '.$this->_xh['ac']);
298                         }
299                         $this->_xh['vt']=Value::$xmlrpcDateTime;
300                         $this->_xh['value']=$this->_xh['ac'];
301                     }
302                     elseif ($name=='BASE64')
303                     {
304                         /// @todo check for failure of base64 decoding / catch warnings
305                         $this->_xh['value']=base64_decode($this->_xh['ac']);
306                     }
307                     elseif ($name=='BOOLEAN')
308                     {
309                         // special case here: we translate boolean 1 or 0 into PHP
310                         // constants true or false.
311                         // Strings 'true' and 'false' are accepted, even though the
312                         // spec never mentions them (see eg. Blogger api docs)
313                         // NB: this simple checks helps a lot sanitizing input, ie no
314                         // security problems around here
315                         if ($this->_xh['ac']=='1' || strcasecmp($this->_xh['ac'], 'true') == 0)
316                         {
317                             $this->_xh['value']=true;
318                         }
319                         else
320                         {
321                             // log if receiving something strange, even though we set the value to false anyway
322                             if ($this->_xh['ac']!='0' && strcasecmp($this->_xh['ac'], 'false') != 0)
323                                 error_log('XML-RPC: invalid value received in BOOLEAN: '.$this->_xh['ac']);
324                             $this->_xh['value']=false;
325                         }
326                     }
327                     elseif ($name=='DOUBLE')
328                     {
329                         // we have a DOUBLE
330                         // we must check that only 0123456789-.<space> are characters here
331                         // NOTE: regexp could be much stricter than this...
332                         if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac']))
333                         {
334                             /// @todo: find a better way of throwing an error than this!
335                             error_log('XML-RPC: non numeric value received in DOUBLE: '.$this->_xh['ac']);
336                             $this->_xh['value']='ERROR_NON_NUMERIC_FOUND';
337                         }
338                         else
339                         {
340                             // it's ok, add it on
341                             $this->_xh['value']=(double)$this->_xh['ac'];
342                         }
343                     }
344                     else
345                     {
346                         // we have an I4/INT
347                         // we must check that only 0123456789-<space> are characters here
348                         if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac']))
349                         {
350                             /// @todo find a better way of throwing an error than this!
351                             error_log('XML-RPC: non numeric value received in INT: '.$this->_xh['ac']);
352                             $this->_xh['value']='ERROR_NON_NUMERIC_FOUND';
353                         }
354                         else
355                         {
356                             // it's ok, add it on
357                             $this->_xh['value']=(int)$this->_xh['ac'];
358                         }
359                     }
360                     //$this->_xh['ac']=''; // is this necessary?
361                     $this->_xh['lv']=3; // indicate we've found a value
362                     break;
363                 case 'NAME':
364                     $this->_xh['valuestack'][count($this->_xh['valuestack'])-1]['name'] = $this->_xh['ac'];
365                     break;
366                 case 'MEMBER':
367                     //$this->_xh['ac']=''; // is this necessary?
368                     // add to array in the stack the last element built,
369                     // unless no VALUE was found
370                     if ($this->_xh['vt'])
371                     {
372                         $vscount = count($this->_xh['valuestack']);
373                         $this->_xh['valuestack'][$vscount-1]['values'][$this->_xh['valuestack'][$vscount-1]['name']] = $this->_xh['value'];
374                     } else
375                         error_log('XML-RPC: missing VALUE inside STRUCT in received xml');
376                     break;
377                 case 'DATA':
378                     //$this->_xh['ac']=''; // is this necessary?
379                     $this->_xh['vt']=null; // reset this to check for 2 data elements in a row - even if they're empty
380                     break;
381                 case 'STRUCT':
382                 case 'ARRAY':
383                     // fetch out of stack array of values, and promote it to current value
384                     $curr_val = array_pop($this->_xh['valuestack']);
385                     $this->_xh['value'] = $curr_val['values'];
386                     $this->_xh['vt']=strtolower($name);
387                     if (isset($curr_val['php_class']))
388                     {
389                         $this->_xh['php_class'] = $curr_val['php_class'];
390                     }
391                     break;
392                 case 'PARAM':
393                     // add to array of params the current value,
394                     // unless no VALUE was found
395                     if ($this->_xh['vt'])
396                     {
397                         $this->_xh['params'][]=$this->_xh['value'];
398                         $this->_xh['pt'][]=$this->_xh['vt'];
399                     }
400                     else
401                         error_log('XML-RPC: missing VALUE inside PARAM in received xml');
402                     break;
403                 case 'METHODNAME':
404                     $this->_xh['method']=preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']);
405                     break;
406                 case 'NIL':
407                 case 'EX:NIL':
408                     if (PhpXmlRpc::$xmlrpc_null_extension)
409                     {
410                         $this->_xh['vt']='null';
411                         $this->_xh['value']=null;
412                         $this->_xh['lv']=3;
413                         break;
414                     }
415                 // drop through intentionally if nil extension not enabled
416                 case 'PARAMS':
417                 case 'FAULT':
418                 case 'METHODCALL':
419                 case 'METHORESPONSE':
420                     break;
421                 default:
422                     // End of INVALID ELEMENT!
423                     // shall we add an assert here for unreachable code???
424                     break;
425             }
426         }
427     }
428
429     /**
430      * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values
431      */
432     function xmlrpc_ee_fast($parser, $name)
433     {
434         $this->xmlrpc_ee($parser, $name, false);
435     }
436
437     /**
438      * xml parser handler function for character data
439      */
440     function xmlrpc_cd($parser, $data)
441     {
442         // skip processing if xml fault already detected
443         if ($this->_xh['isf'] < 2)
444         {
445             // "lookforvalue==3" means that we've found an entire value
446             // and should discard any further character data
447             if($this->_xh['lv']!=3)
448             {
449                 // G. Giunta 2006-08-23: useless change of 'lv' from 1 to 2
450                 //if($this->_xh['lv']==1)
451                 //{
452                 // if we've found text and we're just in a <value> then
453                 // say we've found a value
454                 //$this->_xh['lv']=2;
455                 //}
456                 // we always initialize the accumulator before starting parsing, anyway...
457                 //if(!@isset($this->_xh['ac']))
458                 //{
459                 //    $this->_xh['ac'] = '';
460                 //}
461                 $this->_xh['ac'].=$data;
462             }
463         }
464     }
465
466     /**
467      * xml parser handler function for 'other stuff', ie. not char data or
468      * element start/end tag. In fact it only gets called on unknown entities...
469      */
470     function xmlrpc_dh($parser, $data)
471     {
472         // skip processing if xml fault already detected
473         if ($this->_xh['isf'] < 2)
474         {
475             if(substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';')
476             {
477                 // G. Giunta 2006-08-25: useless change of 'lv' from 1 to 2
478                 //if($this->_xh['lv']==1)
479                 //{
480                 //    $this->_xh['lv']=2;
481                 //}
482                 $this->_xh['ac'].=$data;
483             }
484         }
485         return true;
486     }
487
488 }