690 lines
19 KiB
PHP
690 lines
19 KiB
PHP
<?php
|
|
//
|
|
// +----------------------------------------------------------------------+
|
|
// | PHP Version 4 |
|
|
// +----------------------------------------------------------------------+
|
|
// | Copyright (c) 1997-2004 The PHP Group |
|
|
// +----------------------------------------------------------------------+
|
|
// | This source file is subject to version 3.0 of the PHP license, |
|
|
// | that is bundled with this package in the file LICENSE, and is |
|
|
// | available at through the world-wide-web at |
|
|
// | http://www.php.net/license/3_0.txt. |
|
|
// | If you did not receive a copy of the PHP license and are unable to |
|
|
// | obtain it through the world-wide-web, please send a note to |
|
|
// | license@php.net so we can mail you a copy immediately. |
|
|
// +----------------------------------------------------------------------+
|
|
// | Author: Stig Bakken <ssb@fast.no> |
|
|
// | Tomas V.V.Cox <cox@idecnet.com> |
|
|
// | Stephan Schmidt <schst@php-tools.net> |
|
|
// +----------------------------------------------------------------------+
|
|
//
|
|
// $Id: Parser.php,v 1.28 2006/12/01 16:23:22 schst Exp $
|
|
|
|
/**
|
|
* XML Parser class.
|
|
*
|
|
* This is an XML parser based on PHP's "xml" extension,
|
|
* based on the bundled expat library.
|
|
*
|
|
* @category XML
|
|
* @package XML_Parser
|
|
* @author Stig Bakken <ssb@fast.no>
|
|
* @author Tomas V.V.Cox <cox@idecnet.com>
|
|
* @author Stephan Schmidt <schst@php-tools.net>
|
|
*/
|
|
|
|
/**
|
|
* uses PEAR's error handling
|
|
*/
|
|
require_once 'PEAR.php';
|
|
|
|
/**
|
|
* resource could not be created
|
|
*/
|
|
define('XML_PARSER_ERROR_NO_RESOURCE', 200);
|
|
|
|
/**
|
|
* unsupported mode
|
|
*/
|
|
define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
|
|
|
|
/**
|
|
* invalid encoding was given
|
|
*/
|
|
define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
|
|
|
|
/**
|
|
* specified file could not be read
|
|
*/
|
|
define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
|
|
|
|
/**
|
|
* invalid input
|
|
*/
|
|
define('XML_PARSER_ERROR_INVALID_INPUT', 204);
|
|
|
|
/**
|
|
* remote file cannot be retrieved in safe mode
|
|
*/
|
|
define('XML_PARSER_ERROR_REMOTE', 205);
|
|
|
|
/**
|
|
* XML Parser class.
|
|
*
|
|
* This is an XML parser based on PHP's "xml" extension,
|
|
* based on the bundled expat library.
|
|
*
|
|
* Notes:
|
|
* - It requires PHP 4.0.4pl1 or greater
|
|
* - From revision 1.17, the function names used by the 'func' mode
|
|
* are in the format "xmltag_$elem", for example: use "xmltag_name"
|
|
* to handle the <name></name> tags of your xml file.
|
|
*
|
|
* @category XML
|
|
* @package XML_Parser
|
|
* @author Stig Bakken <ssb@fast.no>
|
|
* @author Tomas V.V.Cox <cox@idecnet.com>
|
|
* @author Stephan Schmidt <schst@php-tools.net>
|
|
* @todo create XML_Parser_Namespace to parse documents with namespaces
|
|
* @todo create XML_Parser_Pull
|
|
* @todo Tests that need to be made:
|
|
* - mixing character encodings
|
|
* - a test using all expat handlers
|
|
* - options (folding, output charset)
|
|
* - different parsing modes
|
|
*/
|
|
class XML_Parser extends PEAR
|
|
{
|
|
// {{{ properties
|
|
|
|
/**
|
|
* XML parser handle
|
|
*
|
|
* @var resource
|
|
* @see xml_parser_create()
|
|
*/
|
|
var $parser;
|
|
|
|
/**
|
|
* File handle if parsing from a file
|
|
*
|
|
* @var resource
|
|
*/
|
|
var $fp;
|
|
|
|
/**
|
|
* Whether to do case folding
|
|
*
|
|
* If set to true, all tag and attribute names will
|
|
* be converted to UPPER CASE.
|
|
*
|
|
* @var boolean
|
|
*/
|
|
var $folding = true;
|
|
|
|
/**
|
|
* Mode of operation, one of "event" or "func"
|
|
*
|
|
* @var string
|
|
*/
|
|
var $mode;
|
|
|
|
/**
|
|
* Mapping from expat handler function to class method.
|
|
*
|
|
* @var array
|
|
*/
|
|
var $handler = array(
|
|
'character_data_handler' => 'cdataHandler',
|
|
'default_handler' => 'defaultHandler',
|
|
'processing_instruction_handler' => 'piHandler',
|
|
'unparsed_entity_decl_handler' => 'unparsedHandler',
|
|
'notation_decl_handler' => 'notationHandler',
|
|
'external_entity_ref_handler' => 'entityrefHandler'
|
|
);
|
|
|
|
/**
|
|
* source encoding
|
|
*
|
|
* @var string
|
|
*/
|
|
var $srcenc;
|
|
|
|
/**
|
|
* target encoding
|
|
*
|
|
* @var string
|
|
*/
|
|
var $tgtenc;
|
|
|
|
/**
|
|
* handler object
|
|
*
|
|
* @var object
|
|
*/
|
|
var $_handlerObj;
|
|
|
|
/**
|
|
* valid encodings
|
|
*
|
|
* @var array
|
|
*/
|
|
var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII');
|
|
|
|
// }}}
|
|
// {{{ constructor
|
|
|
|
/**
|
|
* Creates an XML parser.
|
|
*
|
|
* This is needed for PHP4 compatibility, it will
|
|
* call the constructor, when a new instance is created.
|
|
*
|
|
* @param string $srcenc source charset encoding, use NULL (default) to use
|
|
* whatever the document specifies
|
|
* @param string $mode how this parser object should work, "event" for
|
|
* startelement/endelement-type events, "func"
|
|
* to have it call functions named after elements
|
|
* @param string $tgenc a valid target encoding
|
|
*/
|
|
function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
|
|
{
|
|
XML_Parser::__construct($srcenc, $mode, $tgtenc);
|
|
}
|
|
// }}}
|
|
|
|
/**
|
|
* PHP5 constructor
|
|
*
|
|
* @param string $srcenc source charset encoding, use NULL (default) to use
|
|
* whatever the document specifies
|
|
* @param string $mode how this parser object should work, "event" for
|
|
* startelement/endelement-type events, "func"
|
|
* to have it call functions named after elements
|
|
* @param string $tgenc a valid target encoding
|
|
*/
|
|
function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
|
|
{
|
|
$this->PEAR('XML_Parser_Error');
|
|
|
|
$this->mode = $mode;
|
|
$this->srcenc = $srcenc;
|
|
$this->tgtenc = $tgtenc;
|
|
}
|
|
// }}}
|
|
|
|
/**
|
|
* Sets the mode of the parser.
|
|
*
|
|
* Possible modes are:
|
|
* - func
|
|
* - event
|
|
*
|
|
* You can set the mode using the second parameter
|
|
* in the constructor.
|
|
*
|
|
* This method is only needed, when switching to a new
|
|
* mode at a later point.
|
|
*
|
|
* @access public
|
|
* @param string mode, either 'func' or 'event'
|
|
* @return boolean|object true on success, PEAR_Error otherwise
|
|
*/
|
|
function setMode($mode)
|
|
{
|
|
if ($mode != 'func' && $mode != 'event') {
|
|
$this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
|
|
}
|
|
|
|
$this->mode = $mode;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Sets the object, that will handle the XML events
|
|
*
|
|
* This allows you to create a handler object independent of the
|
|
* parser object that you are using and easily switch the underlying
|
|
* parser.
|
|
*
|
|
* If no object will be set, XML_Parser assumes that you
|
|
* extend this class and handle the events in $this.
|
|
*
|
|
* @access public
|
|
* @param object object to handle the events
|
|
* @return boolean will always return true
|
|
* @since v1.2.0beta3
|
|
*/
|
|
function setHandlerObj(&$obj)
|
|
{
|
|
$this->_handlerObj = &$obj;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Init the element handlers
|
|
*
|
|
* @access private
|
|
*/
|
|
function _initHandlers()
|
|
{
|
|
if ( ! is_resource($this->parser)) {
|
|
return false;
|
|
}
|
|
|
|
if ( ! is_object($this->_handlerObj)) {
|
|
$this->_handlerObj = &$this;
|
|
}
|
|
switch ($this->mode) {
|
|
|
|
case 'func':
|
|
xml_set_object($this->parser, $this->_handlerObj);
|
|
xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
|
|
break;
|
|
|
|
case 'event':
|
|
xml_set_object($this->parser, $this->_handlerObj);
|
|
xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
|
|
break;
|
|
default:
|
|
return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
|
|
break;
|
|
}
|
|
|
|
|
|
/**
|
|
* set additional handlers for character data, entities, etc.
|
|
*/
|
|
foreach ($this->handler as $xml_func => $method) {
|
|
if (method_exists($this->_handlerObj, $method)) {
|
|
$xml_func = 'xml_set_' . $xml_func;
|
|
$xml_func($this->parser, $method);
|
|
}
|
|
}
|
|
}
|
|
|
|
// {{{ _create()
|
|
|
|
/**
|
|
* create the XML parser resource
|
|
*
|
|
* Has been moved from the constructor to avoid
|
|
* problems with object references.
|
|
*
|
|
* Furthermore it allows us returning an error
|
|
* if something fails.
|
|
*
|
|
* @access private
|
|
* @return boolean|object true on success, PEAR_Error otherwise
|
|
*
|
|
* @see xml_parser_create
|
|
*/
|
|
function _create()
|
|
{
|
|
if ($this->srcenc === null) {
|
|
$xp = @xml_parser_create();
|
|
} else {
|
|
$xp = @xml_parser_create($this->srcenc);
|
|
}
|
|
if (is_resource($xp)) {
|
|
if ($this->tgtenc !== null) {
|
|
if ( ! @xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
|
|
$this->tgtenc)) {
|
|
return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
|
|
}
|
|
}
|
|
$this->parser = $xp;
|
|
$result = $this->_initHandlers($this->mode);
|
|
if ($this->isError($result)) {
|
|
return $result;
|
|
}
|
|
xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
|
|
return true;
|
|
}
|
|
if ( ! in_array(strtoupper($this->srcenc), $this->_validEncodings)) {
|
|
return $this->raiseError('invalid source encoding', XML_PARSER_ERROR_INVALID_ENCODING);
|
|
}
|
|
return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
|
|
}
|
|
|
|
// }}}
|
|
// {{{ reset()
|
|
|
|
/**
|
|
* Reset the parser.
|
|
*
|
|
* This allows you to use one parser instance
|
|
* to parse multiple XML documents.
|
|
*
|
|
* @access public
|
|
* @return boolean|object true on success, PEAR_Error otherwise
|
|
*/
|
|
function reset()
|
|
{
|
|
$result = $this->_create();
|
|
if ($this->isError( $result )) {
|
|
return $result;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// }}}
|
|
// {{{ setInputFile()
|
|
|
|
/**
|
|
* Sets the input xml file to be parsed
|
|
*
|
|
* @param string Filename (full path)
|
|
* @return resource fopen handle of the given file
|
|
* @throws XML_Parser_Error
|
|
* @see setInput(), setInputString(), parse()
|
|
* @access public
|
|
*/
|
|
function setInputFile($file)
|
|
{
|
|
/**
|
|
* check, if file is a remote file
|
|
*/
|
|
if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
|
|
if ( ! ini_get('allow_url_fopen')) {
|
|
return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
|
|
}
|
|
}
|
|
|
|
$fp = @fopen($file, 'rb');
|
|
if (is_resource($fp)) {
|
|
$this->fp = $fp;
|
|
return $fp;
|
|
}
|
|
return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
|
|
}
|
|
|
|
// }}}
|
|
// {{{ setInputString()
|
|
|
|
/**
|
|
* XML_Parser::setInputString()
|
|
*
|
|
* Sets the xml input from a string
|
|
*
|
|
* @param string $data a string containing the XML document
|
|
* @return null
|
|
**/
|
|
function setInputString($data)
|
|
{
|
|
$this->fp = $data;
|
|
return null;
|
|
}
|
|
|
|
// }}}
|
|
// {{{ setInput()
|
|
|
|
/**
|
|
* Sets the file handle to use with parse().
|
|
*
|
|
* You should use setInputFile() or setInputString() if you
|
|
* pass a string
|
|
*
|
|
* @param mixed $fp Can be either a resource returned from fopen(),
|
|
* a URL, a local filename or a string.
|
|
* @access public
|
|
* @see parse()
|
|
* @uses setInputString(), setInputFile()
|
|
*/
|
|
function setInput($fp)
|
|
{
|
|
if (is_resource($fp)) {
|
|
$this->fp = $fp;
|
|
return true;
|
|
}
|
|
// see if it's an absolute URL (has a scheme at the beginning)
|
|
elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
|
|
return $this->setInputFile($fp);
|
|
}
|
|
// see if it's a local file
|
|
elseif (file_exists($fp)) {
|
|
return $this->setInputFile($fp);
|
|
}
|
|
// it must be a string
|
|
else {
|
|
$this->fp = $fp;
|
|
return true;
|
|
}
|
|
|
|
return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
|
|
}
|
|
|
|
// }}}
|
|
// {{{ parse()
|
|
|
|
/**
|
|
* Central parsing function.
|
|
*
|
|
* @return true|object PEAR error returns true on success, or a PEAR_Error otherwise
|
|
* @access public
|
|
*/
|
|
function parse()
|
|
{
|
|
/**
|
|
* reset the parser
|
|
*/
|
|
$result = $this->reset();
|
|
if ($this->isError($result)) {
|
|
return $result;
|
|
}
|
|
// if $this->fp was fopened previously
|
|
if (is_resource($this->fp)) {
|
|
|
|
while ($data = fread($this->fp, 4096)) {
|
|
if ( ! $this->_parseString($data, feof($this->fp))) {
|
|
$error = &$this->raiseError();
|
|
$this->free();
|
|
return $error;
|
|
}
|
|
}
|
|
// otherwise, $this->fp must be a string
|
|
} else {
|
|
if ( ! $this->_parseString($this->fp, true)) {
|
|
$error = &$this->raiseError();
|
|
$this->free();
|
|
return $error;
|
|
}
|
|
}
|
|
$this->free();
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* XML_Parser::_parseString()
|
|
*
|
|
* @param string $data
|
|
* @param boolean $eof
|
|
* @return bool
|
|
* @access private
|
|
* @see parseString()
|
|
**/
|
|
function _parseString($data, $eof = false)
|
|
{
|
|
return xml_parse($this->parser, $data, $eof);
|
|
}
|
|
|
|
// }}}
|
|
// {{{ parseString()
|
|
|
|
/**
|
|
* XML_Parser::parseString()
|
|
*
|
|
* Parses a string.
|
|
*
|
|
* @param string $data XML data
|
|
* @param boolean $eof If set and TRUE, data is the last piece of data sent in this parser
|
|
* @throws XML_Parser_Error
|
|
* @return Pear Error|true true on success or a PEAR Error
|
|
* @see _parseString()
|
|
*/
|
|
function parseString($data, $eof = false)
|
|
{
|
|
if ( ! isset($this->parser) || !is_resource($this->parser)) {
|
|
$this->reset();
|
|
}
|
|
|
|
if ( ! $this->_parseString($data, $eof)) {
|
|
$error = &$this->raiseError();
|
|
$this->free();
|
|
return $error;
|
|
}
|
|
|
|
if ($eof === true) {
|
|
$this->free();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* XML_Parser::free()
|
|
*
|
|
* Free the internal resources associated with the parser
|
|
*
|
|
* @return null
|
|
**/
|
|
function free()
|
|
{
|
|
if (isset($this->parser) && is_resource($this->parser)) {
|
|
xml_parser_free($this->parser);
|
|
unset( $this->parser );
|
|
}
|
|
if (isset($this->fp) && is_resource($this->fp)) {
|
|
fclose($this->fp);
|
|
}
|
|
unset($this->fp);
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* XML_Parser::raiseError()
|
|
*
|
|
* Throws a XML_Parser_Error
|
|
*
|
|
* @param string $msg the error message
|
|
* @param integer $ecode the error message code
|
|
* @return XML_Parser_Error
|
|
**/
|
|
function raiseError($msg = null, $ecode = 0)
|
|
{
|
|
$msg = !is_null($msg) ? $msg : $this->parser;
|
|
$err = &new XML_Parser_Error($msg, $ecode);
|
|
return parent::raiseError($err);
|
|
}
|
|
|
|
// }}}
|
|
// {{{ funcStartHandler()
|
|
|
|
function funcStartHandler($xp, $elem, $attribs)
|
|
{
|
|
$func = 'xmltag_' . $elem;
|
|
$func = str_replace(array('.', '-', ':'), '_', $func);
|
|
if (method_exists($this->_handlerObj, $func)) {
|
|
call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
|
|
} elseif (method_exists($this->_handlerObj, 'xmltag')) {
|
|
call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
|
|
}
|
|
}
|
|
|
|
// }}}
|
|
// {{{ funcEndHandler()
|
|
|
|
function funcEndHandler($xp, $elem)
|
|
{
|
|
$func = 'xmltag_' . $elem . '_';
|
|
$func = str_replace(array('.', '-', ':'), '_', $func);
|
|
if (method_exists($this->_handlerObj, $func)) {
|
|
call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
|
|
} elseif (method_exists($this->_handlerObj, 'xmltag_')) {
|
|
call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
|
|
}
|
|
}
|
|
|
|
// }}}
|
|
// {{{ startHandler()
|
|
|
|
/**
|
|
*
|
|
* @abstract
|
|
*/
|
|
function startHandler($xp, $elem, &$attribs)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
// }}}
|
|
// {{{ endHandler()
|
|
|
|
/**
|
|
*
|
|
* @abstract
|
|
*/
|
|
function endHandler($xp, $elem)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
|
|
// }}}me
|
|
}
|
|
|
|
/**
|
|
* error class, replaces PEAR_Error
|
|
*
|
|
* An instance of this class will be returned
|
|
* if an error occurs inside XML_Parser.
|
|
*
|
|
* There are three advantages over using the standard PEAR_Error:
|
|
* - All messages will be prefixed
|
|
* - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
|
|
* - messages can be generated from the xml_parser resource
|
|
*
|
|
* @package XML_Parser
|
|
* @access public
|
|
* @see PEAR_Error
|
|
*/
|
|
class XML_Parser_Error extends PEAR_Error
|
|
{
|
|
// {{{ properties
|
|
|
|
/**
|
|
* prefix for all messages
|
|
*
|
|
* @var string
|
|
*/
|
|
var $error_message_prefix = 'XML_Parser: ';
|
|
|
|
// }}}
|
|
// {{{ constructor()
|
|
/**
|
|
* construct a new error instance
|
|
*
|
|
* You may either pass a message or an xml_parser resource as first
|
|
* parameter. If a resource has been passed, the last error that
|
|
* happened will be retrieved and returned.
|
|
*
|
|
* @access public
|
|
* @param string|resource message or parser resource
|
|
* @param integer error code
|
|
* @param integer error handling
|
|
* @param integer error level
|
|
*/
|
|
function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
|
|
{
|
|
if (is_resource($msgorparser)) {
|
|
$code = xml_get_error_code($msgorparser);
|
|
$msgorparser = sprintf('%s at XML input line %d:%d',
|
|
xml_error_string($code),
|
|
xml_get_current_line_number($msgorparser),
|
|
xml_get_current_column_number($msgorparser));
|
|
}
|
|
$this->PEAR_Error($msgorparser, $code, $mode, $level);
|
|
}
|
|
// }}}
|
|
}
|
|
?>
|