Source for file Parser.php

Documentation is available at Parser.php

  1. <?php
  2. //
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2004 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 3.0 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/3_0.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Author: Stig Bakken <ssb@fast.no> |
  17. // | Tomas V.V.Cox <cox@idecnet.com> |
  18. // | Stephan Schmidt <schst@php-tools.net> |
  19. // +----------------------------------------------------------------------+
  20. //
  21. // $Id: Parser.php,v 1.1 2006/07/12 17:57:59 ron Exp $
  22.  
  23.  
  24.  
  25. /**
  26. * XML Parser class.
  27. *
  28. * This is an XML parser based on PHP's "xml" extension,
  29. * based on the bundled expat library.
  30. *
  31. * @category XML
  32. * @package XML_Parser
  33. * @author Stig Bakken <ssb@fast.no>
  34. * @author Tomas V.V.Cox <cox@idecnet.com>
  35. * @author Stephan Schmidt <schst@php-tools.net>
  36. */
  37.  
  38. /**
  39. * uses PEAR's error handling
  40. */
  41. require_once 'PEAR.php';
  42.  
  43. /**
  44. * resource could not be created
  45. */
  46. define('XML_PARSER_ERROR_NO_RESOURCE', 200);
  47.  
  48. /**
  49. * unsupported mode
  50. */
  51. define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
  52.  
  53. /**
  54. * invalid encoding was given
  55. */
  56. define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
  57.  
  58. /**
  59. * specified file could not be read
  60. */
  61. define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
  62.  
  63. /**
  64. * invalid input
  65. */
  66. define('XML_PARSER_ERROR_INVALID_INPUT', 204);
  67.  
  68. /**
  69. * remote file cannot be retrieved in safe mode
  70. */
  71. define('XML_PARSER_ERROR_REMOTE', 205);
  72.  
  73. /**
  74. * XML Parser class.
  75. *
  76. * This is an XML parser based on PHP's "xml" extension,
  77. * based on the bundled expat library.
  78. *
  79. * Notes:
  80. * - It requires PHP 4.0.4pl1 or greater
  81. * - From revision 1.17, the function names used by the 'func' mode
  82. * are in the format "xmltag_$elem", for example: use "xmltag_name"
  83. * to handle the <name></name> tags of your xml file.
  84. *
  85. * @category XML
  86. * @package XML_Parser
  87. * @author Stig Bakken <ssb@fast.no>
  88. * @author Tomas V.V.Cox <cox@idecnet.com>
  89. * @author Stephan Schmidt <schst@php-tools.net>
  90. * @todo create XML_Parser_Namespace to parse documents with namespaces
  91. * @todo create XML_Parser_Pull
  92. * @todo Tests that need to be made:
  93. * - mixing character encodings
  94. * - a test using all expat handlers
  95. * - options (folding, output charset)
  96. * - different parsing modes
  97. */
  98. class XML_Parser extends PEAR
  99. {
  100. // {{{ properties
  101.  
  102.  
  103. /**
  104. * XML parser handle
  105. *
  106. * @var resource
  107. * @see xml_parser_create()
  108. */
  109. var $parser;
  110.  
  111. /**
  112. * File handle if parsing from a file
  113. *
  114. * @var resource
  115. */
  116. var $fp;
  117.  
  118. /**
  119. * Whether to do case folding
  120. *
  121. * If set to true, all tag and attribute names will
  122. * be converted to UPPER CASE.
  123. *
  124. * @var boolean
  125. */
  126. var $folding = true;
  127.  
  128. /**
  129. * Mode of operation, one of "event" or "func"
  130. *
  131. * @var string
  132. */
  133. var $mode;
  134.  
  135. /**
  136. * Mapping from expat handler function to class method.
  137. *
  138. * @var array
  139. */
  140. var $handler = array(
  141. 'character_data_handler' => 'cdataHandler',
  142. 'default_handler' => 'defaultHandler',
  143. 'processing_instruction_handler' => 'piHandler',
  144. 'unparsed_entity_decl_handler' => 'unparsedHandler',
  145. 'notation_decl_handler' => 'notationHandler',
  146. 'external_entity_ref_handler' => 'entityrefHandler'
  147. );
  148.  
  149. /**
  150. * source encoding
  151. *
  152. * @var string
  153. */
  154. var $srcenc;
  155.  
  156. /**
  157. * target encoding
  158. *
  159. * @var string
  160. */
  161. var $tgtenc;
  162.  
  163. /**
  164. * handler object
  165. *
  166. * @var object
  167. */
  168. var $_handlerObj;
  169.  
  170. // }}}
  171. // {{{ constructor
  172.  
  173.  
  174. /**
  175. * Creates an XML parser.
  176. *
  177. * This is needed for PHP4 compatibility, it will
  178. * call the constructor, when a new instance is created.
  179. *
  180. * @param string $srcenc source charset encoding, use NULL (default) to use
  181. * whatever the document specifies
  182. * @param string $mode how this parser object should work, "event" for
  183. * startelement/endelement-type events, "func"
  184. * to have it call functions named after elements
  185. * @param string $tgenc a valid target encoding
  186. */
  187. function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
  188. {
  189. XML_Parser::__construct($srcenc, $mode, $tgtenc);
  190. }
  191. // }}}
  192.  
  193.  
  194. /**
  195. * PHP5 constructor
  196. *
  197. * @param string $srcenc source charset encoding, use NULL (default) to use
  198. * whatever the document specifies
  199. * @param string $mode how this parser object should work, "event" for
  200. * startelement/endelement-type events, "func"
  201. * to have it call functions named after elements
  202. * @param string $tgenc a valid target encoding
  203. */
  204. function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
  205. {
  206. $this->PEAR('XML_Parser_Error');
  207.  
  208. $this->mode = $mode;
  209. $this->srcenc = $srcenc;
  210. $this->tgtenc = $tgtenc;
  211. }
  212. // }}}
  213.  
  214.  
  215. /**
  216. * Sets the mode of the parser.
  217. *
  218. * Possible modes are:
  219. * - func
  220. * - event
  221. *
  222. * You can set the mode using the second parameter
  223. * in the constructor.
  224. *
  225. * This method is only needed, when switching to a new
  226. * mode at a later point.
  227. *
  228. * @access public
  229. * @param string mode, either 'func' or 'event'
  230. * @return boolean|object true on success, PEAR_Error otherwise
  231. */
  232. function setMode($mode)
  233. {
  234. if ($mode != 'func' && $mode != 'event') {
  235. $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
  236. }
  237.  
  238. $this->mode = $mode;
  239. return true;
  240. }
  241.  
  242. /**
  243. * Sets the object, that will handle the XML events
  244. *
  245. * This allows you to create a handler object independent of the
  246. * parser object that you are using and easily switch the underlying
  247. * parser.
  248. *
  249. * If no object will be set, XML_Parser assumes that you
  250. * extend this class and handle the events in $this.
  251. *
  252. * @access public
  253. * @param object object to handle the events
  254. * @return boolean will always return true
  255. * @since v1.2.0beta3
  256. */
  257. function setHandlerObj(&$obj)
  258. {
  259. $this->_handlerObj = &$obj;
  260. return true;
  261. }
  262.  
  263. /**
  264. * Init the element handlers
  265. *
  266. * @access private
  267. */
  268. function _initHandlers()
  269. {
  270. if (!is_resource($this->parser)) {
  271. return false;
  272. }
  273.  
  274. if (!is_object($this->_handlerObj)) {
  275. $this->_handlerObj = &$this;
  276. }
  277. switch ($this->mode) {
  278.  
  279. case 'func':
  280. xml_set_object($this->parser, $this->_handlerObj);
  281. xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
  282. break;
  283.  
  284. case 'event':
  285. xml_set_object($this->parser, $this->_handlerObj);
  286. xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
  287. break;
  288. default:
  289. return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
  290. break;
  291. }
  292.  
  293.  
  294. /**
  295. * set additional handlers for character data, entities, etc.
  296. */
  297. foreach ($this->handler as $xml_func => $method) {
  298. if (method_exists($this->_handlerObj, $method)) {
  299. $xml_func = 'xml_set_' . $xml_func;
  300. $xml_func($this->parser, $method);
  301. }
  302. }
  303. }
  304.  
  305. // {{{ _create()
  306.  
  307.  
  308. /**
  309. * create the XML parser resource
  310. *
  311. * Has been moved from the constructor to avoid
  312. * problems with object references.
  313. *
  314. * Furthermore it allows us returning an error
  315. * if something fails.
  316. *
  317. * @access private
  318. * @return boolean|object true on success, PEAR_Error otherwise
  319. *
  320. * @see xml_parser_create
  321. */
  322. function _create()
  323. {
  324. if ($this->srcenc === null) {
  325. $xp = @xml_parser_create();
  326. } else {
  327. $xp = @xml_parser_create($this->srcenc);
  328. }
  329. if (is_resource($xp)) {
  330. if ($this->tgtenc !== null) {
  331. if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
  332. $this->tgtenc)) {
  333. return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
  334. }
  335. }
  336. $this->parser = $xp;
  337. $result = $this->_initHandlers($this->mode);
  338. if ($this->isError($result)) {
  339. return $result;
  340. }
  341. xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
  342.  
  343. return true;
  344. }
  345. return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
  346. }
  347.  
  348. // }}}
  349. // {{{ reset()
  350.  
  351.  
  352. /**
  353. * Reset the parser.
  354. *
  355. * This allows you to use one parser instance
  356. * to parse multiple XML documents.
  357. *
  358. * @access public
  359. * @return boolean|object true on success, PEAR_Error otherwise
  360. */
  361. function reset()
  362. {
  363. $result = $this->_create();
  364. if ($this->isError( $result )) {
  365. return $result;
  366. }
  367. return true;
  368. }
  369.  
  370. // }}}
  371. // {{{ setInputFile()
  372.  
  373.  
  374. /**
  375. * Sets the input xml file to be parsed
  376. *
  377. * @param string Filename (full path)
  378. * @return resource fopen handle of the given file
  379. * @throws XML_Parser_Error
  380. * @see setInput(), setInputString(), parse()
  381. * @access public
  382. */
  383. function setInputFile($file)
  384. {
  385. /**
  386. * check, if file is a remote file
  387. */
  388. if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
  389. if (!ini_get('allow_url_fopen')) {
  390. return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
  391. }
  392. }
  393. $fp = @fopen($file, 'rb');
  394. if (is_resource($fp)) {
  395. $this->fp = $fp;
  396. return $fp;
  397. }
  398. return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
  399. }
  400.  
  401. // }}}
  402. // {{{ setInputString()
  403. /**
  404. * XML_Parser::setInputString()
  405. *
  406. * Sets the xml input from a string
  407. *
  408. * @param string $data a string containing the XML document
  409. * @return null
  410. ***/
  411. function setInputString($data)
  412. {
  413. $this->fp = $data;
  414. return null;
  415. }
  416. // }}}
  417. // {{{ setInput()
  418.  
  419.  
  420. /**
  421. * Sets the file handle to use with parse().
  422. *
  423. * You should use setInputFile() or setInputString() if you
  424. * pass a string
  425. *
  426. * @param mixed $fp Can be either a resource returned from fopen(),
  427. * a URL, a local filename or a string.
  428. * @access public
  429. * @see parse()
  430. * @uses setInputString(), setInputFile()
  431. */
  432. function setInput($fp)
  433. {
  434. if (is_resource($fp)) {
  435. $this->fp = $fp;
  436. return true;
  437. }
  438. // see if it's an absolute URL (has a scheme at the beginning)
  439. elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
  440. return $this->setInputFile($fp);
  441. }
  442. // see if it's a local file
  443. elseif (file_exists($fp)) {
  444. return $this->setInputFile($fp);
  445. }
  446. // it must be a string
  447. else {
  448. $this->fp = $fp;
  449. return true;
  450. }
  451.  
  452. return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
  453. }
  454.  
  455. // }}}
  456. // {{{ parse()
  457.  
  458.  
  459. /**
  460. * Central parsing function.
  461. *
  462. * @return true|object PEAR error returns true on success, or a PEAR_Error otherwise
  463. * @access public
  464. */
  465. function parse()
  466. {
  467. /**
  468. * reset the parser
  469. */
  470. $result = $this->reset();
  471. if ($this->isError($result)) {
  472. return $result;
  473. }
  474. // if $this->fp was fopened previously
  475. if (is_resource($this->fp)) {
  476. while ($data = fread($this->fp, 4096)) {
  477. if (!$this->_parseString($data, feof($this->fp))) {
  478. $error = &$this->raiseError();
  479. $this->free();
  480. return $error;
  481. }
  482. }
  483. // otherwise, $this->fp must be a string
  484. } else {
  485. if (!$this->_parseString($this->fp, true)) {
  486. $error = &$this->raiseError();
  487. $this->free();
  488. return $error;
  489. }
  490. }
  491. $this->free();
  492.  
  493. return true;
  494. }
  495.  
  496. /**
  497. * XML_Parser::_parseString()
  498. *
  499. * @param string $data
  500. * @param boolean $eof
  501. * @return bool
  502. * @access private
  503. * @see parseString()
  504. ***/
  505. function _parseString($data, $eof = false)
  506. {
  507. return xml_parse($this->parser, $data, $eof);
  508. }
  509. // }}}
  510. // {{{ parseString()
  511.  
  512.  
  513. /**
  514. * XML_Parser::parseString()
  515. *
  516. * Parses a string.
  517. *
  518. * @param string $data XML data
  519. * @param boolean $eof If set and TRUE, data is the last piece of data sent in this parser
  520. * @throws XML_Parser_Error
  521. * @return Pear Error|true true on success or a PEAR Error
  522. * @see _parseString()
  523. */
  524. function parseString($data, $eof = false)
  525. {
  526. if (!isset($this->parser) || !is_resource($this->parser)) {
  527. $this->reset();
  528. }
  529. if (!$this->_parseString($data, $eof)) {
  530. $error = &$this->raiseError();
  531. $this->free();
  532. return $error;
  533. }
  534.  
  535. if ($eof === true) {
  536. $this->free();
  537. }
  538. return true;
  539. }
  540. /**
  541. * XML_Parser::free()
  542. *
  543. * Free the internal resources associated with the parser
  544. *
  545. * @return null
  546. ***/
  547. function free()
  548. {
  549. if (isset($this->parser) && is_resource($this->parser)) {
  550. xml_parser_free($this->parser);
  551. unset( $this->parser );
  552. }
  553. if (isset($this->fp) && is_resource($this->fp)) {
  554. fclose($this->fp);
  555. }
  556. unset($this->fp);
  557. return null;
  558. }
  559. /**
  560. * XML_Parser::raiseError()
  561. *
  562. * Throws a XML_Parser_Error
  563. *
  564. * @param string $msg the error message
  565. * @param integer $ecode the error message code
  566. * @return XML_Parser_Error
  567. ***/
  568. function raiseError($msg = null, $ecode = 0)
  569. {
  570. $msg = !is_null($msg) ? $msg : $this->parser;
  571. $err = &new XML_Parser_Error($msg, $ecode);
  572. return parent::raiseError($err);
  573. }
  574. // }}}
  575. // {{{ funcStartHandler()
  576.  
  577.  
  578. function funcStartHandler($xp, $elem, $attribs)
  579. {
  580. $func = 'xmltag_' . $elem;
  581. if (strchr($func, '.')) {
  582. $func = str_replace('.', '_', $func);
  583. }
  584. if (method_exists($this->_handlerObj, $func)) {
  585. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
  586. } elseif (method_exists($this->_handlerObj, 'xmltag')) {
  587. call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
  588. }
  589. }
  590.  
  591. // }}}
  592. // {{{ funcEndHandler()
  593.  
  594.  
  595. function funcEndHandler($xp, $elem)
  596. {
  597. $func = 'xmltag_' . $elem . '_';
  598. if (strchr($func, '.')) {
  599. $func = str_replace('.', '_', $func);
  600. }
  601. if (method_exists($this->_handlerObj, $func)) {
  602. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
  603. } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
  604. call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
  605. }
  606. }
  607.  
  608. // }}}
  609. // {{{ startHandler()
  610.  
  611.  
  612. /**
  613. *
  614. * @abstract
  615. */
  616. function startHandler($xp, $elem, &$attribs)
  617. {
  618. return NULL;
  619. }
  620.  
  621. // }}}
  622. // {{{ endHandler()
  623.  
  624.  
  625. /**
  626. *
  627. * @abstract
  628. */
  629. function endHandler($xp, $elem)
  630. {
  631. return NULL;
  632. }
  633.  
  634.  
  635. // }}}me
  636.  
  637. }
  638.  
  639. /**
  640. * error class, replaces PEAR_Error
  641. *
  642. * An instance of this class will be returned
  643. * if an error occurs inside XML_Parser.
  644. *
  645. * There are three advantages over using the standard PEAR_Error:
  646. * - All messages will be prefixed
  647. * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
  648. * - messages can be generated from the xml_parser resource
  649. *
  650. * @package XML_Parser
  651. * @access public
  652. * @see PEAR_Error
  653. */
  654. class XML_Parser_Error extends PEAR_Error
  655. {
  656. // {{{ properties
  657.  
  658.  
  659. /**
  660. * prefix for all messages
  661. *
  662. * @var string
  663. */
  664. var $error_message_prefix = 'XML_Parser: ';
  665.  
  666. // }}}
  667. // {{{ constructor()
  668. /**
  669. * construct a new error instance
  670. *
  671. * You may either pass a message or an xml_parser resource as first
  672. * parameter. If a resource has been passed, the last error that
  673. * happened will be retrieved and returned.
  674. *
  675. * @access public
  676. * @param string|resource message or parser resource
  677. * @param integer error code
  678. * @param integer error handling
  679. * @param integer error level
  680. */
  681. function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
  682. {
  683. if (is_resource($msgorparser)) {
  684. $code = xml_get_error_code($msgorparser);
  685. $msgorparser = sprintf('%s at XML input line %d:%d',
  686. xml_error_string($code),
  687. xml_get_current_line_number($msgorparser),
  688. xml_get_current_column_number($msgorparser));
  689. }
  690. $this->PEAR_Error($msgorparser, $code, $mode, $level);
  691. }
  692. // }}}
  693.  
  694. }
  695. ?>

Documentation generated on Mon, 04 Dec 2006 11:09:46 -0500 by phpDocumentor 1.3.0RC3