2010-04-08 08:03:04 +04:00
< ? php
2010-08-23 10:21:41 +04:00
namespace Symfony\Component\Yaml ;
2010-04-08 08:03:04 +04:00
/*
* This file is part of the symfony package .
* ( c ) Fabien Potencier < fabien . potencier @ symfony - project . com >
*
* For the full copyright and license information , please view the LICENSE
* file that was distributed with this source code .
*/
/**
* Parser parses YAML strings to convert them to PHP arrays .
*
* @ package symfony
* @ subpackage yaml
* @ author Fabien Potencier < fabien . potencier @ symfony - project . com >
*/
class Parser
{
protected $offset = 0 ;
protected $lines = array ();
protected $currentLineNb = - 1 ;
protected $currentLine = '' ;
protected $refs = array ();
/**
* Constructor
*
* @ param integer $offset The offset of YAML document ( used for line numbers in error messages )
*/
public function __construct ( $offset = 0 )
{
$this -> offset = $offset ;
}
/**
* Parses a YAML string to a PHP value .
*
* @ param string $value A YAML string
*
* @ return mixed A PHP value
*
* @ throws \InvalidArgumentException If the YAML is not valid
*/
public function parse ( $value )
{
$this -> currentLineNb = - 1 ;
$this -> currentLine = '' ;
$this -> lines = explode ( " \n " , $this -> cleanup ( $value ));
$data = array ();
while ( $this -> moveToNextLine ())
{
if ( $this -> isCurrentLineEmpty ())
{
continue ;
}
// tab?
if ( preg_match ( '#^\t+#' , $this -> currentLine ))
{
throw new ParserException ( sprintf ( 'A YAML file cannot contain tabs as indentation at line %d (%s).' , $this -> getRealCurrentLineNb () + 1 , $this -> currentLine ));
}
$isRef = $isInPlace = $isProcessed = false ;
if ( preg_match ( '#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#' , $this -> currentLine , $values ))
{
if ( isset ( $values [ 'value' ]) && preg_match ( '#^&(?P<ref>[^ ]+) *(?P<value>.*)#' , $values [ 'value' ], $matches ))
{
$isRef = $matches [ 'ref' ];
$values [ 'value' ] = $matches [ 'value' ];
}
// array
if ( ! isset ( $values [ 'value' ]) || '' == trim ( $values [ 'value' ], ' ' ) || 0 === strpos ( ltrim ( $values [ 'value' ], ' ' ), '#' ))
{
$c = $this -> getRealCurrentLineNb () + 1 ;
$parser = new Parser ( $c );
$parser -> refs =& $this -> refs ;
$data [] = $parser -> parse ( $this -> getNextEmbedBlock ());
}
else
{
if ( isset ( $values [ 'leadspaces' ])
&& ' ' == $values [ 'leadspaces' ]
&& preg_match ( '#^(?P<key>' . Inline :: REGEX_QUOTED_STRING . '|[^ \'"\{].*?) *\:(\s+(?P<value>.+?))?\s*$#' , $values [ 'value' ], $matches ))
{
// this is a compact notation element, add to next block and parse
$c = $this -> getRealCurrentLineNb ();
$parser = new Parser ( $c );
$parser -> refs =& $this -> refs ;
$block = $values [ 'value' ];
if ( ! $this -> isNextLineIndented ())
{
$block .= " \n " . $this -> getNextEmbedBlock ( $this -> getCurrentLineIndentation () + 2 );
}
$data [] = $parser -> parse ( $block );
}
else
{
$data [] = $this -> parseValue ( $values [ 'value' ]);
}
}
}
else if ( preg_match ( '#^(?P<key>' . Inline :: REGEX_QUOTED_STRING . '|[^ \'"].*?) *\:(\s+(?P<value>.+?))?\s*$#' , $this -> currentLine , $values ))
{
$key = Inline :: parseScalar ( $values [ 'key' ]);
if ( '<<' === $key )
{
if ( isset ( $values [ 'value' ]) && '*' === substr ( $values [ 'value' ], 0 , 1 ))
{
$isInPlace = substr ( $values [ 'value' ], 1 );
if ( ! array_key_exists ( $isInPlace , $this -> refs ))
{
throw new ParserException ( sprintf ( 'Reference "%s" does not exist at line %s (%s).' , $isInPlace , $this -> getRealCurrentLineNb () + 1 , $this -> currentLine ));
}
}
else
{
if ( isset ( $values [ 'value' ]) && $values [ 'value' ] !== '' )
{
$value = $values [ 'value' ];
}
else
{
$value = $this -> getNextEmbedBlock ();
}
$c = $this -> getRealCurrentLineNb () + 1 ;
$parser = new Parser ( $c );
$parser -> refs =& $this -> refs ;
$parsed = $parser -> parse ( $value );
$merged = array ();
if ( ! is_array ( $parsed ))
{
throw new ParserException ( sprintf ( " YAML merge keys used with a scalar value instead of an array at line %s (%s) " , $this -> getRealCurrentLineNb () + 1 , $this -> currentLine ));
}
else if ( isset ( $parsed [ 0 ]))
{
// Numeric array, merge individual elements
foreach ( array_reverse ( $parsed ) as $parsedItem )
{
if ( ! is_array ( $parsedItem ))
{
throw new ParserException ( sprintf ( " Merge items must be arrays at line %s (%s). " , $this -> getRealCurrentLineNb () + 1 , $parsedItem ));
}
$merged = array_merge ( $parsedItem , $merged );
}
}
else
{
// Associative array, merge
$merged = array_merge ( $merge , $parsed );
}
$isProcessed = $merged ;
}
}
else if ( isset ( $values [ 'value' ]) && preg_match ( '#^&(?P<ref>[^ ]+) *(?P<value>.*)#' , $values [ 'value' ], $matches ))
{
$isRef = $matches [ 'ref' ];
$values [ 'value' ] = $matches [ 'value' ];
}
if ( $isProcessed )
{
// Merge keys
$data = $isProcessed ;
}
// hash
else if ( ! isset ( $values [ 'value' ]) || '' == trim ( $values [ 'value' ], ' ' ) || 0 === strpos ( ltrim ( $values [ 'value' ], ' ' ), '#' ))
{
// if next line is less indented or equal, then it means that the current value is null
if ( $this -> isNextLineIndented ())
{
$data [ $key ] = null ;
}
else
{
$c = $this -> getRealCurrentLineNb () + 1 ;
$parser = new Parser ( $c );
$parser -> refs =& $this -> refs ;
$data [ $key ] = $parser -> parse ( $this -> getNextEmbedBlock ());
}
}
else
{
if ( $isInPlace )
{
$data = $this -> refs [ $isInPlace ];
}
else
{
$data [ $key ] = $this -> parseValue ( $values [ 'value' ]);
}
}
}
else
{
// 1-liner followed by newline
if ( 2 == count ( $this -> lines ) && empty ( $this -> lines [ 1 ]))
{
$value = Inline :: load ( $this -> lines [ 0 ]);
if ( is_array ( $value ))
{
$first = reset ( $value );
if ( '*' === substr ( $first , 0 , 1 ))
{
$data = array ();
foreach ( $value as $alias )
{
$data [] = $this -> refs [ substr ( $alias , 1 )];
}
$value = $data ;
}
}
return $value ;
}
switch ( preg_last_error ())
{
case PREG_INTERNAL_ERROR :
$error = 'Internal PCRE error on line' ;
break ;
case PREG_BACKTRACK_LIMIT_ERROR :
$error = 'pcre.backtrack_limit reached on line' ;
break ;
case PREG_RECURSION_LIMIT_ERROR :
$error = 'pcre.recursion_limit reached on line' ;
break ;
case PREG_BAD_UTF8_ERROR :
$error = 'Malformed UTF-8 data on line' ;
break ;
case PREG_BAD_UTF8_OFFSET_ERROR :
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point on line' ;
break ;
default :
$error = 'Unable to parse line' ;
}
throw new ParserException ( sprintf ( '%s %d (%s).' , $error , $this -> getRealCurrentLineNb () + 1 , $this -> currentLine ));
}
if ( $isRef )
{
$this -> refs [ $isRef ] = end ( $data );
}
}
return empty ( $data ) ? null : $data ;
}
/**
* Returns the current line number ( takes the offset into account ) .
*
* @ return integer The current line number
*/
protected function getRealCurrentLineNb ()
{
return $this -> currentLineNb + $this -> offset ;
}
/**
* Returns the current line indentation .
*
* @ return integer The current line indentation
*/
protected function getCurrentLineIndentation ()
{
return strlen ( $this -> currentLine ) - strlen ( ltrim ( $this -> currentLine , ' ' ));
}
/**
* Returns the next embed block of YAML .
*
* @ param integer $indentation The indent level at which the block is to be read , or null for default
*
* @ return string A YAML string
*/
protected function getNextEmbedBlock ( $indentation = null )
{
$this -> moveToNextLine ();
if ( null === $indentation )
{
$newIndent = $this -> getCurrentLineIndentation ();
if ( ! $this -> isCurrentLineEmpty () && 0 == $newIndent )
{
throw new ParserException ( sprintf ( 'Indentation problem at line %d (%s)' , $this -> getRealCurrentLineNb () + 1 , $this -> currentLine ));
}
}
else
{
$newIndent = $indentation ;
}
$data = array ( substr ( $this -> currentLine , $newIndent ));
while ( $this -> moveToNextLine ())
{
if ( $this -> isCurrentLineEmpty ())
{
if ( $this -> isCurrentLineBlank ())
{
$data [] = substr ( $this -> currentLine , $newIndent );
}
continue ;
}
$indent = $this -> getCurrentLineIndentation ();
if ( preg_match ( '#^(?P<text> *)$#' , $this -> currentLine , $match ))
{
// empty line
$data [] = $match [ 'text' ];
}
else if ( $indent >= $newIndent )
{
$data [] = substr ( $this -> currentLine , $newIndent );
}
else if ( 0 == $indent )
{
$this -> moveToPreviousLine ();
break ;
}
else
{
throw new ParserException ( sprintf ( 'Indentation problem at line %d (%s)' , $this -> getRealCurrentLineNb () + 1 , $this -> currentLine ));
}
}
return implode ( " \n " , $data );
}
/**
* Moves the parser to the next line .
*/
protected function moveToNextLine ()
{
if ( $this -> currentLineNb >= count ( $this -> lines ) - 1 )
{
return false ;
}
$this -> currentLine = $this -> lines [ ++ $this -> currentLineNb ];
return true ;
}
/**
* Moves the parser to the previous line .
*/
protected function moveToPreviousLine ()
{
$this -> currentLine = $this -> lines [ -- $this -> currentLineNb ];
}
/**
* Parses a YAML value .
*
* @ param string $value A YAML value
*
* @ return mixed A PHP value
*/
protected function parseValue ( $value )
{
if ( '*' === substr ( $value , 0 , 1 ))
{
if ( false !== $pos = strpos ( $value , '#' ))
{
$value = substr ( $value , 1 , $pos - 2 );
}
else
{
$value = substr ( $value , 1 );
}
if ( ! array_key_exists ( $value , $this -> refs ))
{
throw new ParserException ( sprintf ( 'Reference "%s" does not exist (%s).' , $value , $this -> currentLine ));
}
return $this -> refs [ $value ];
}
if ( preg_match ( '/^(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?$/' , $value , $matches ))
{
$modifiers = isset ( $matches [ 'modifiers' ]) ? $matches [ 'modifiers' ] : '' ;
return $this -> parseFoldedScalar ( $matches [ 'separator' ], preg_replace ( '#\d+#' , '' , $modifiers ), intval ( abs ( $modifiers )));
}
else
{
return Inline :: load ( $value );
}
}
/**
* Parses a folded scalar .
*
* @ param string $separator The separator that was used to begin this folded scalar ( | or > )
* @ param string $indicator The indicator that was used to begin this folded scalar ( + or - )
* @ param integer $indentation The indentation that was used to begin this folded scalar
*
* @ return string The text value
*/
protected function parseFoldedScalar ( $separator , $indicator = '' , $indentation = 0 )
{
$separator = '|' == $separator ? " \n " : ' ' ;
$text = '' ;
$notEOF = $this -> moveToNextLine ();
while ( $notEOF && $this -> isCurrentLineBlank ())
{
$text .= " \n " ;
$notEOF = $this -> moveToNextLine ();
}
if ( ! $notEOF )
{
return '' ;
}
if ( ! preg_match ( '#^(?P<indent>' . ( $indentation ? str_repeat ( ' ' , $indentation ) : ' +' ) . ')(?P<text>.*)$#' , $this -> currentLine , $matches ))
{
$this -> moveToPreviousLine ();
return '' ;
}
$textIndent = $matches [ 'indent' ];
$previousIndent = 0 ;
$text .= $matches [ 'text' ] . $separator ;
while ( $this -> currentLineNb + 1 < count ( $this -> lines ))
{
$this -> moveToNextLine ();
if ( preg_match ( '#^(?P<indent> {' . strlen ( $textIndent ) . ',})(?P<text>.+)$#' , $this -> currentLine , $matches ))
{
if ( ' ' == $separator && $previousIndent != $matches [ 'indent' ])
{
$text = substr ( $text , 0 , - 1 ) . " \n " ;
}
$previousIndent = $matches [ 'indent' ];
$text .= str_repeat ( ' ' , $diff = strlen ( $matches [ 'indent' ]) - strlen ( $textIndent )) . $matches [ 'text' ] . ( $diff ? " \n " : $separator );
}
else if ( preg_match ( '#^(?P<text> *)$#' , $this -> currentLine , $matches ))
{
$text .= preg_replace ( '#^ {1,' . strlen ( $textIndent ) . '}#' , '' , $matches [ 'text' ]) . " \n " ;
}
else
{
$this -> moveToPreviousLine ();
break ;
}
}
if ( ' ' == $separator )
{
// replace last separator by a newline
$text = preg_replace ( '/ (\n*)$/' , " \n $ 1 " , $text );
}
switch ( $indicator )
{
case '' :
$text = preg_replace ( '#\n+$#s' , " \n " , $text );
break ;
case '+' :
break ;
case '-' :
$text = preg_replace ( '#\n+$#s' , '' , $text );
break ;
}
return $text ;
}
/**
* Returns true if the next line is indented .
*
* @ return Boolean Returns true if the next line is indented , false otherwise
*/
protected function isNextLineIndented ()
{
$currentIndentation = $this -> getCurrentLineIndentation ();
$notEOF = $this -> moveToNextLine ();
while ( $notEOF && $this -> isCurrentLineEmpty ())
{
$notEOF = $this -> moveToNextLine ();
}
if ( false === $notEOF )
{
return false ;
}
$ret = false ;
if ( $this -> getCurrentLineIndentation () <= $currentIndentation )
{
$ret = true ;
}
$this -> moveToPreviousLine ();
return $ret ;
}
/**
* Returns true if the current line is blank or if it is a comment line .
*
* @ return Boolean Returns true if the current line is empty or if it is a comment line , false otherwise
*/
protected function isCurrentLineEmpty ()
{
return $this -> isCurrentLineBlank () || $this -> isCurrentLineComment ();
}
/**
* Returns true if the current line is blank .
*
* @ return Boolean Returns true if the current line is blank , false otherwise
*/
protected function isCurrentLineBlank ()
{
return '' == trim ( $this -> currentLine , ' ' );
}
/**
* Returns true if the current line is a comment line .
*
* @ return Boolean Returns true if the current line is a comment line , false otherwise
*/
protected function isCurrentLineComment ()
{
//checking explicitly the first char of the trim is faster than loops or strpos
$ltrimmedLine = ltrim ( $this -> currentLine , ' ' );
return $ltrimmedLine [ 0 ] === '#' ;
}
/**
* Cleanups a YAML string to be parsed .
*
* @ param string $value The input YAML string
*
* @ return string A cleaned up YAML string
*/
protected function cleanup ( $value )
{
$value = str_replace ( array ( " \r \n " , " \r " ), " \n " , $value );
if ( ! preg_match ( " # \n $ # " , $value ))
{
$value .= " \n " ;
}
// strip YAML header
$count = 0 ;
$value = preg_replace ( '#^\%YAML[: ][\d\.]+.*\n#s' , '' , $value , - 1 , $count );
$this -> offset += $count ;
// remove leading comments and/or ---
$trimmedValue = preg_replace ( '#^((\#.*?\n)|(\-\-\-.*?\n))*#s' , '' , $value , - 1 , $count );
if ( $count == 1 )
{
// items have been removed, update the offset
$this -> offset += substr_count ( $value , " \n " ) - substr_count ( $trimmedValue , " \n " );
$value = $trimmedValue ;
}
return $value ;
}
}