2015-07-15 23:05:46 +06:00
|
|
|
<?php
|
|
|
|
namespace GraphQL\Language;
|
|
|
|
|
2016-10-21 16:39:57 +07:00
|
|
|
use GraphQL\Error\SyntaxError;
|
2017-07-10 19:50:26 +07:00
|
|
|
use GraphQL\Utils\Utils;
|
2018-02-08 14:58:08 +01:00
|
|
|
use GraphQL\Utils\BlockString;
|
2015-07-15 23:05:46 +06:00
|
|
|
|
2016-10-17 02:53:50 +07:00
|
|
|
/**
|
|
|
|
* A Lexer is a stateful stream generator in that every time
|
|
|
|
* it is advanced, it returns the next token in the Source. Assuming the
|
|
|
|
* source lexes, the final Token emitted by the lexer will be of kind
|
|
|
|
* EOF, after which the lexer will repeatedly return the same EOF token
|
|
|
|
* whenever called.
|
2017-06-24 22:42:55 +07:00
|
|
|
*
|
|
|
|
* Algorithm is O(N) both on memory and time
|
2016-10-17 02:53:50 +07:00
|
|
|
*/
|
2015-07-15 23:05:46 +06:00
|
|
|
class Lexer
|
|
|
|
{
|
|
|
|
/**
|
2016-10-17 02:53:50 +07:00
|
|
|
* @var Source
|
|
|
|
*/
|
|
|
|
public $source;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
public $options;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The previously focused non-ignored token.
|
|
|
|
*
|
|
|
|
* @var Token
|
|
|
|
*/
|
|
|
|
public $lastToken;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The currently focused non-ignored token.
|
|
|
|
*
|
|
|
|
* @var Token
|
|
|
|
*/
|
|
|
|
public $token;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The (1-indexed) line containing the current token.
|
|
|
|
*
|
2015-07-15 23:05:46 +06:00
|
|
|
* @var int
|
|
|
|
*/
|
2016-10-17 02:53:50 +07:00
|
|
|
public $line;
|
2015-07-15 23:05:46 +06:00
|
|
|
|
|
|
|
/**
|
2016-10-17 02:53:50 +07:00
|
|
|
* The character offset at which the current line begins.
|
|
|
|
*
|
|
|
|
* @var int
|
2015-07-15 23:05:46 +06:00
|
|
|
*/
|
2016-10-17 02:53:50 +07:00
|
|
|
public $lineStart;
|
2015-07-15 23:05:46 +06:00
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
/**
|
|
|
|
* Current cursor position for UTF8 encoding of the source
|
|
|
|
*
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $position;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Current cursor position for ASCII representation of the source
|
|
|
|
*
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $byteStreamPosition;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Lexer constructor.
|
|
|
|
*
|
|
|
|
* @param Source $source
|
|
|
|
* @param array $options
|
|
|
|
*/
|
2016-10-17 02:53:50 +07:00
|
|
|
public function __construct(Source $source, array $options = [])
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
2016-10-17 02:53:50 +07:00
|
|
|
$startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null);
|
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
$this->source = $source;
|
2016-10-17 02:53:50 +07:00
|
|
|
$this->options = $options;
|
|
|
|
$this->lastToken = $startOfFileToken;
|
|
|
|
$this->token = $startOfFileToken;
|
|
|
|
$this->line = 1;
|
|
|
|
$this->lineStart = 0;
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->position = $this->byteStreamPosition = 0;
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return Token
|
|
|
|
*/
|
2016-10-17 02:53:50 +07:00
|
|
|
public function advance()
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
2018-02-08 19:33:54 +01:00
|
|
|
$this->lastToken = $this->token;
|
|
|
|
$token = $this->token = $this->lookahead();
|
|
|
|
return $token;
|
|
|
|
}
|
2016-10-17 02:53:50 +07:00
|
|
|
|
2018-02-08 19:33:54 +01:00
|
|
|
public function lookahead()
|
|
|
|
{
|
|
|
|
$token = $this->token;
|
2016-10-17 02:53:50 +07:00
|
|
|
if ($token->kind !== Token::EOF) {
|
|
|
|
do {
|
2018-02-08 19:33:54 +01:00
|
|
|
$token = $token->next ?: ($token->next = $this->readToken($token));
|
2016-10-17 02:53:50 +07:00
|
|
|
} while ($token->kind === Token::COMMENT);
|
|
|
|
}
|
2015-07-15 23:05:46 +06:00
|
|
|
return $token;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-10-17 02:53:50 +07:00
|
|
|
* @return Token
|
|
|
|
*/
|
|
|
|
public function nextToken()
|
|
|
|
{
|
|
|
|
trigger_error(__METHOD__ . ' is deprecated in favor of advance()', E_USER_DEPRECATED);
|
|
|
|
return $this->advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param Token $prev
|
2015-07-15 23:05:46 +06:00
|
|
|
* @return Token
|
2015-08-17 02:53:11 +06:00
|
|
|
* @throws SyntaxError
|
2015-07-15 23:05:46 +06:00
|
|
|
*/
|
2016-10-17 02:53:50 +07:00
|
|
|
private function readToken(Token $prev)
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
|
|
|
$bodyLength = $this->source->length;
|
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->positionAfterWhitespace();
|
|
|
|
$position = $this->position;
|
|
|
|
|
2016-10-17 02:53:50 +07:00
|
|
|
$line = $this->line;
|
|
|
|
$col = 1 + $position - $this->lineStart;
|
2015-07-15 23:05:46 +06:00
|
|
|
|
|
|
|
if ($position >= $bodyLength) {
|
2016-10-17 02:53:50 +07:00
|
|
|
return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
// Read next char and advance string cursor:
|
|
|
|
list (, $code, $bytes) = $this->readChar(true);
|
2016-04-23 19:43:10 +06:00
|
|
|
|
|
|
|
// SourceCharacter
|
|
|
|
if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
|
2016-11-11 13:19:42 +00:00
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
|
|
|
$position,
|
2016-11-19 17:08:20 +07:00
|
|
|
'Cannot contain the invalid character ' . Utils::printCharCode($code)
|
2016-11-11 13:19:42 +00:00
|
|
|
);
|
2016-04-23 19:43:10 +06:00
|
|
|
}
|
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
switch ($code) {
|
2016-11-11 13:19:42 +00:00
|
|
|
case 33: // !
|
|
|
|
return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 35: // #
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->moveStringCursor(-1, -1 * $bytes);
|
|
|
|
return $this->readComment($line, $col, $prev);
|
2016-11-11 13:19:42 +00:00
|
|
|
case 36: // $
|
|
|
|
return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 40: // (
|
|
|
|
return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 41: // )
|
|
|
|
return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 46: // .
|
2017-06-24 22:42:55 +07:00
|
|
|
list (, $charCode1) = $this->readChar(true);
|
|
|
|
list (, $charCode2) = $this->readChar(true);
|
|
|
|
|
|
|
|
if ($charCode1 === 46 && $charCode2 === 46) {
|
2016-10-17 02:53:50 +07:00
|
|
|
return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
break;
|
2016-11-11 13:19:42 +00:00
|
|
|
case 58: // :
|
|
|
|
return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 61: // =
|
|
|
|
return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 64: // @
|
|
|
|
return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 91: // [
|
|
|
|
return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 93: // ]
|
|
|
|
return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 123: // {
|
|
|
|
return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 124: // |
|
|
|
|
return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
|
|
|
|
case 125: // }
|
|
|
|
return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
|
2015-07-15 23:05:46 +06:00
|
|
|
// A-Z
|
|
|
|
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72:
|
|
|
|
case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80:
|
|
|
|
case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88:
|
|
|
|
case 89: case 90:
|
|
|
|
// _
|
|
|
|
case 95:
|
|
|
|
// a-z
|
|
|
|
case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104:
|
|
|
|
case 105: case 106: case 107: case 108: case 109: case 110: case 111:
|
|
|
|
case 112: case 113: case 114: case 115: case 116: case 117: case 118:
|
|
|
|
case 119: case 120: case 121: case 122:
|
2017-06-24 22:42:55 +07:00
|
|
|
return $this->moveStringCursor(-1, -1 * $bytes)
|
|
|
|
->readName($line, $col, $prev);
|
2015-07-15 23:05:46 +06:00
|
|
|
// -
|
|
|
|
case 45:
|
2016-04-23 19:43:10 +06:00
|
|
|
// 0-9
|
2015-07-15 23:05:46 +06:00
|
|
|
case 48: case 49: case 50: case 51: case 52:
|
|
|
|
case 53: case 54: case 55: case 56: case 57:
|
2017-06-24 22:42:55 +07:00
|
|
|
return $this->moveStringCursor(-1, -1 * $bytes)
|
|
|
|
->readNumber($line, $col, $prev);
|
2015-07-15 23:05:46 +06:00
|
|
|
// "
|
2016-11-11 13:19:42 +00:00
|
|
|
case 34:
|
2018-02-08 14:58:08 +01:00
|
|
|
list(,$nextCode) = $this->readChar();
|
|
|
|
list(,$nextNextCode) = $this->moveStringCursor(1, 1)->readChar();
|
|
|
|
|
|
|
|
if ($nextCode === 34 && $nextNextCode === 34) {
|
|
|
|
return $this->moveStringCursor(-2, (-1 * $bytes) - 1)
|
|
|
|
->readBlockString($line, $col, $prev);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $this->moveStringCursor(-2, (-1 * $bytes) - 1)
|
2017-06-24 22:42:55 +07:00
|
|
|
->readString($line, $col, $prev);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
2016-11-19 17:08:20 +07:00
|
|
|
$errMessage = $code === 39
|
2017-06-24 22:42:55 +07:00
|
|
|
? "Unexpected single quote character ('), did you mean to use ". 'a double quote (")?'
|
|
|
|
: 'Cannot parse the unexpected character ' . Utils::printCharCode($code) . '.';
|
2016-11-19 17:08:20 +07:00
|
|
|
|
2016-11-11 13:19:42 +00:00
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
|
|
|
$position,
|
2016-11-19 17:08:20 +07:00
|
|
|
$errMessage
|
2016-11-11 13:19:42 +00:00
|
|
|
);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads an alphanumeric + underscore name from the source.
|
|
|
|
*
|
|
|
|
* [_A-Za-z][_0-9A-Za-z]*
|
2016-10-17 02:53:50 +07:00
|
|
|
*
|
|
|
|
* @param int $line
|
|
|
|
* @param int $col
|
|
|
|
* @param Token $prev
|
2015-07-15 23:05:46 +06:00
|
|
|
* @return Token
|
|
|
|
*/
|
2017-06-24 22:42:55 +07:00
|
|
|
private function readName($line, $col, Token $prev)
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
2017-06-24 22:42:55 +07:00
|
|
|
$value = '';
|
|
|
|
$start = $this->position;
|
|
|
|
list ($char, $code) = $this->readChar();
|
|
|
|
|
|
|
|
while ($code && (
|
|
|
|
$code === 95 || // _
|
|
|
|
$code >= 48 && $code <= 57 || // 0-9
|
|
|
|
$code >= 65 && $code <= 90 || // A-Z
|
|
|
|
$code >= 97 && $code <= 122 // a-z
|
|
|
|
)) {
|
|
|
|
$value .= $char;
|
|
|
|
list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
2016-10-17 02:53:50 +07:00
|
|
|
return new Token(
|
|
|
|
Token::NAME,
|
2017-06-24 22:42:55 +07:00
|
|
|
$start,
|
|
|
|
$this->position,
|
2016-10-17 02:53:50 +07:00
|
|
|
$line,
|
|
|
|
$col,
|
|
|
|
$prev,
|
2017-06-24 22:42:55 +07:00
|
|
|
$value
|
2016-10-17 02:53:50 +07:00
|
|
|
);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads a number token from the source file, either a float
|
|
|
|
* or an int depending on whether a decimal point appears.
|
|
|
|
*
|
|
|
|
* Int: -?(0|[1-9][0-9]*)
|
2015-08-17 02:53:11 +06:00
|
|
|
* Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
|
2015-07-15 23:05:46 +06:00
|
|
|
*
|
2016-10-17 02:53:50 +07:00
|
|
|
* @param int $line
|
|
|
|
* @param int $col
|
|
|
|
* @param Token $prev
|
2015-07-15 23:05:46 +06:00
|
|
|
* @return Token
|
2015-08-17 02:53:11 +06:00
|
|
|
* @throws SyntaxError
|
2015-07-15 23:05:46 +06:00
|
|
|
*/
|
2017-06-24 22:42:55 +07:00
|
|
|
private function readNumber($line, $col, Token $prev)
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
2017-06-24 22:42:55 +07:00
|
|
|
$value = '';
|
|
|
|
$start = $this->position;
|
|
|
|
list ($char, $code) = $this->readChar();
|
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
$isFloat = false;
|
|
|
|
|
|
|
|
if ($code === 45) { // -
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $char;
|
|
|
|
list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
2016-11-11 13:19:42 +00:00
|
|
|
// guard against leading zero's
|
2015-07-15 23:05:46 +06:00
|
|
|
if ($code === 48) { // 0
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $char;
|
|
|
|
list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
|
2016-04-23 19:43:10 +06:00
|
|
|
|
|
|
|
if ($code >= 48 && $code <= 57) {
|
2017-06-24 22:42:55 +07:00
|
|
|
throw new SyntaxError($this->source, $this->position, "Invalid number, unexpected digit after 0: " . Utils::printCharCode($code));
|
2016-04-23 19:43:10 +06:00
|
|
|
}
|
2015-07-15 23:05:46 +06:00
|
|
|
} else {
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $this->readDigits();
|
|
|
|
list ($char, $code) = $this->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
if ($code === 46) { // .
|
|
|
|
$isFloat = true;
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->moveStringCursor(1, 1);
|
2015-07-15 23:05:46 +06:00
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $char;
|
|
|
|
$value .= $this->readDigits();
|
|
|
|
list ($char, $code) = $this->readChar();
|
2015-08-17 02:53:11 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
if ($code === 69 || $code === 101) { // E e
|
|
|
|
$isFloat = true;
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $char;
|
|
|
|
list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
|
2015-08-17 02:53:11 +06:00
|
|
|
if ($code === 43 || $code === 45) { // + -
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $char;
|
|
|
|
$this->moveStringCursor(1, 1);
|
2015-08-17 02:53:11 +06:00
|
|
|
}
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $this->readDigits();
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
2016-11-11 13:19:42 +00:00
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
return new Token(
|
|
|
|
$isFloat ? Token::FLOAT : Token::INT,
|
|
|
|
$start,
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->position,
|
2016-10-17 02:53:50 +07:00
|
|
|
$line,
|
|
|
|
$col,
|
|
|
|
$prev,
|
2017-06-24 22:42:55 +07:00
|
|
|
$value
|
2015-07-15 23:05:46 +06:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2016-04-23 19:43:10 +06:00
|
|
|
/**
|
2017-06-24 22:42:55 +07:00
|
|
|
* Returns string with all digits + changes current string cursor position to point to the first char after digits
|
2016-04-23 19:43:10 +06:00
|
|
|
*/
|
2017-06-24 22:42:55 +07:00
|
|
|
private function readDigits()
|
2016-04-23 19:43:10 +06:00
|
|
|
{
|
2017-06-24 22:42:55 +07:00
|
|
|
list ($char, $code) = $this->readChar();
|
2016-11-11 13:19:42 +00:00
|
|
|
|
2016-04-23 19:43:10 +06:00
|
|
|
if ($code >= 48 && $code <= 57) { // 0 - 9
|
2017-06-24 22:42:55 +07:00
|
|
|
$value = '';
|
|
|
|
|
2016-04-23 19:43:10 +06:00
|
|
|
do {
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $char;
|
|
|
|
list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
|
2016-04-23 19:43:10 +06:00
|
|
|
} while ($code >= 48 && $code <= 57); // 0 - 9
|
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
return $value;
|
2016-04-23 19:43:10 +06:00
|
|
|
}
|
2016-11-11 13:19:42 +00:00
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
if ($this->position > $this->source->length - 1) {
|
2016-04-23 19:43:10 +06:00
|
|
|
$code = null;
|
|
|
|
}
|
2016-11-11 13:19:42 +00:00
|
|
|
|
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->position,
|
2016-11-11 13:19:42 +00:00
|
|
|
'Invalid number, expected digit but got: ' . Utils::printCharCode($code)
|
|
|
|
);
|
2016-04-23 19:43:10 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-10-17 02:53:50 +07:00
|
|
|
* @param int $line
|
|
|
|
* @param int $col
|
|
|
|
* @param Token $prev
|
2016-04-23 19:43:10 +06:00
|
|
|
* @return Token
|
|
|
|
* @throws SyntaxError
|
|
|
|
*/
|
2017-06-24 22:42:55 +07:00
|
|
|
private function readString($line, $col, Token $prev)
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
2017-06-24 22:42:55 +07:00
|
|
|
$start = $this->position;
|
|
|
|
|
|
|
|
// Skip leading quote and read first string char:
|
|
|
|
list ($char, $code, $bytes) = $this->moveStringCursor(1, 1)->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
$chunk = '';
|
2015-07-15 23:05:46 +06:00
|
|
|
$value = '';
|
|
|
|
|
|
|
|
while (
|
2018-02-08 14:58:08 +01:00
|
|
|
$code !== null &&
|
2016-04-23 19:43:10 +06:00
|
|
|
// not LineTerminator
|
2018-02-08 14:58:08 +01:00
|
|
|
$code !== 10 && $code !== 13
|
2015-07-15 23:05:46 +06:00
|
|
|
) {
|
2018-02-08 14:58:08 +01:00
|
|
|
// Closing Quote (")
|
|
|
|
if ($code === 34) {
|
|
|
|
$value .= $chunk;
|
|
|
|
|
|
|
|
// Skip quote
|
|
|
|
$this->moveStringCursor(1, 1);
|
|
|
|
|
|
|
|
return new Token(
|
|
|
|
Token::STRING,
|
|
|
|
$start,
|
|
|
|
$this->position,
|
|
|
|
$line,
|
|
|
|
$col,
|
|
|
|
$prev,
|
|
|
|
$value
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->assertValidStringCharacterCode($code, $this->position);
|
|
|
|
$this->moveStringCursor(1, $bytes);
|
2016-04-23 19:43:10 +06:00
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
if ($code === 92) { // \
|
2017-06-24 22:42:55 +07:00
|
|
|
$value .= $chunk;
|
|
|
|
list (, $code) = $this->readChar(true);
|
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
switch ($code) {
|
|
|
|
case 34: $value .= '"'; break;
|
2016-07-14 13:32:57 -05:00
|
|
|
case 47: $value .= '/'; break;
|
2015-07-15 23:05:46 +06:00
|
|
|
case 92: $value .= '\\'; break;
|
2016-07-14 13:32:57 -05:00
|
|
|
case 98: $value .= chr(8); break; // \b (backspace)
|
|
|
|
case 102: $value .= "\f"; break;
|
|
|
|
case 110: $value .= "\n"; break;
|
|
|
|
case 114: $value .= "\r"; break;
|
|
|
|
case 116: $value .= "\t"; break;
|
2015-07-15 23:05:46 +06:00
|
|
|
case 117:
|
2017-06-24 22:42:55 +07:00
|
|
|
$position = $this->position;
|
|
|
|
list ($hex) = $this->readChars(4, true);
|
2015-07-15 23:05:46 +06:00
|
|
|
if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) {
|
2016-11-11 13:19:42 +00:00
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
2017-06-24 22:42:55 +07:00
|
|
|
$position - 1,
|
2016-11-11 13:19:42 +00:00
|
|
|
'Invalid character escape sequence: \\u' . $hex
|
|
|
|
);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
2016-04-23 19:43:10 +06:00
|
|
|
$code = hexdec($hex);
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->assertValidStringCharacterCode($code, $position - 2);
|
2016-04-23 19:43:10 +06:00
|
|
|
$value .= Utils::chr($code);
|
2015-07-15 23:05:46 +06:00
|
|
|
break;
|
|
|
|
default:
|
2016-11-11 13:19:42 +00:00
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->position - 1,
|
2016-11-11 13:19:42 +00:00
|
|
|
'Invalid character escape sequence: \\' . Utils::chr($code)
|
|
|
|
);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
2017-06-24 22:42:55 +07:00
|
|
|
$chunk = '';
|
|
|
|
} else {
|
|
|
|
$chunk .= $char;
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
2017-06-24 22:42:55 +07:00
|
|
|
|
|
|
|
list ($char, $code, $bytes) = $this->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
2018-02-08 14:58:08 +01:00
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
|
|
|
$this->position,
|
|
|
|
'Unterminated string.'
|
|
|
|
);
|
|
|
|
}
|
2015-07-15 23:05:46 +06:00
|
|
|
|
2018-02-08 14:58:08 +01:00
|
|
|
/**
|
|
|
|
* Reads a block string token from the source file.
|
|
|
|
*
|
|
|
|
* """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
|
|
|
|
*/
|
|
|
|
private function readBlockString($line, $col, Token $prev)
|
|
|
|
{
|
|
|
|
$start = $this->position;
|
2017-06-24 22:42:55 +07:00
|
|
|
|
2018-02-08 14:58:08 +01:00
|
|
|
// Skip leading quotes and read first string char:
|
|
|
|
list ($char, $code, $bytes) = $this->moveStringCursor(3, 3)->readChar();
|
2016-11-11 13:19:42 +00:00
|
|
|
|
2018-02-08 14:58:08 +01:00
|
|
|
$chunk = '';
|
|
|
|
$value = '';
|
|
|
|
|
|
|
|
while ($code !== null) {
|
|
|
|
// Closing Triple-Quote (""")
|
|
|
|
if ($code === 34) {
|
|
|
|
// Move 2 quotes
|
|
|
|
list(,$nextCode) = $this->moveStringCursor(1, 1)->readChar();
|
|
|
|
list(,$nextNextCode) = $this->moveStringCursor(1, 1)->readChar();
|
|
|
|
|
|
|
|
if ($nextCode === 34 && $nextNextCode === 34) {
|
|
|
|
$value .= $chunk;
|
|
|
|
|
|
|
|
$this->moveStringCursor(1, 1);
|
|
|
|
|
|
|
|
return new Token(
|
|
|
|
Token::BLOCK_STRING,
|
|
|
|
$start,
|
|
|
|
$this->position,
|
|
|
|
$line,
|
|
|
|
$col,
|
|
|
|
$prev,
|
|
|
|
BlockString::value($value)
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
// move cursor back to before the first quote
|
|
|
|
$this->moveStringCursor(-2, -2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->assertValidBlockStringCharacterCode($code, $this->position);
|
|
|
|
$this->moveStringCursor(1, $bytes);
|
|
|
|
|
|
|
|
list(,$nextCode) = $this->readChar();
|
|
|
|
list(,$nextNextCode) = $this->moveStringCursor(1, 1)->readChar();
|
|
|
|
list(,$nextNextNextCode) = $this->moveStringCursor(1, 1)->readChar();
|
|
|
|
|
|
|
|
// Escape Triple-Quote (\""")
|
|
|
|
if ($code === 92 &&
|
|
|
|
$nextCode === 34 &&
|
|
|
|
$nextNextCode === 34 &&
|
|
|
|
$nextNextNextCode === 34
|
|
|
|
) {
|
|
|
|
$this->moveStringCursor(1, 1);
|
|
|
|
$value .= $chunk . '"""';
|
|
|
|
$chunk = '';
|
|
|
|
} else {
|
|
|
|
$this->moveStringCursor(-2, -2);
|
|
|
|
$chunk .= $char;
|
|
|
|
}
|
|
|
|
|
|
|
|
list ($char, $code, $bytes) = $this->readChar();
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->position,
|
2018-02-08 14:58:08 +01:00
|
|
|
'Unterminated string.'
|
2016-11-11 13:19:42 +00:00
|
|
|
);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
|
|
|
|
2016-04-23 19:43:10 +06:00
|
|
|
private function assertValidStringCharacterCode($code, $position)
|
|
|
|
{
|
|
|
|
// SourceCharacter
|
|
|
|
if ($code < 0x0020 && $code !== 0x0009) {
|
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
|
|
|
$position,
|
2016-11-11 13:19:42 +00:00
|
|
|
'Invalid character within String: ' . Utils::printCharCode($code)
|
2016-04-23 19:43:10 +06:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-08 14:58:08 +01:00
|
|
|
private function assertValidBlockStringCharacterCode($code, $position)
|
|
|
|
{
|
|
|
|
// SourceCharacter
|
|
|
|
if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
|
|
|
|
throw new SyntaxError(
|
|
|
|
$this->source,
|
|
|
|
$position,
|
|
|
|
'Invalid character within String: ' . Utils::printCharCode($code)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-15 23:05:46 +06:00
|
|
|
/**
|
|
|
|
* Reads from body starting at startPosition until it finds a non-whitespace
|
2017-06-24 22:42:55 +07:00
|
|
|
* or commented character, then places cursor to the position of that character.
|
2015-07-15 23:05:46 +06:00
|
|
|
*/
|
2017-06-24 22:42:55 +07:00
|
|
|
private function positionAfterWhitespace()
|
2015-07-15 23:05:46 +06:00
|
|
|
{
|
2017-06-24 22:42:55 +07:00
|
|
|
while ($this->position < $this->source->length) {
|
|
|
|
list(, $code, $bytes) = $this->readChar();
|
2015-07-15 23:05:46 +06:00
|
|
|
|
|
|
|
// Skip whitespace
|
2016-10-17 02:53:50 +07:00
|
|
|
// tab | space | comma | BOM
|
|
|
|
if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) {
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->moveStringCursor(1, $bytes);
|
2016-10-17 02:53:50 +07:00
|
|
|
} else if ($code === 10) { // new line
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->moveStringCursor(1, $bytes);
|
2016-10-17 02:53:50 +07:00
|
|
|
$this->line++;
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->lineStart = $this->position;
|
2016-10-17 02:53:50 +07:00
|
|
|
} else if ($code === 13) { // carriage return
|
2017-06-24 22:42:55 +07:00
|
|
|
list(, $nextCode, $nextBytes) = $this->moveStringCursor(1, $bytes)->readChar();
|
|
|
|
|
|
|
|
if ($nextCode === 10) { // lf after cr
|
|
|
|
$this->moveStringCursor(1, $nextBytes);
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|
2016-10-17 02:53:50 +07:00
|
|
|
$this->line++;
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->lineStart = $this->position;
|
2015-07-15 23:05:46 +06:00
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-10-17 02:53:50 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads a comment token from the source file.
|
|
|
|
*
|
|
|
|
* #[\u0009\u0020-\uFFFF]*
|
|
|
|
*
|
|
|
|
* @param $line
|
|
|
|
* @param $col
|
|
|
|
* @param Token $prev
|
|
|
|
* @return Token
|
|
|
|
*/
|
2017-06-24 22:42:55 +07:00
|
|
|
private function readComment($line, $col, Token $prev)
|
2016-10-17 02:53:50 +07:00
|
|
|
{
|
2017-06-24 22:42:55 +07:00
|
|
|
$start = $this->position;
|
|
|
|
$value = '';
|
|
|
|
$bytes = 1;
|
2016-10-17 02:53:50 +07:00
|
|
|
|
|
|
|
do {
|
2017-06-24 22:42:55 +07:00
|
|
|
list ($char, $code, $bytes) = $this->moveStringCursor(1, $bytes)->readChar();
|
|
|
|
$value .= $char;
|
2016-10-17 02:53:50 +07:00
|
|
|
} while (
|
2017-06-24 22:42:55 +07:00
|
|
|
$code &&
|
2016-10-17 02:53:50 +07:00
|
|
|
// SourceCharacter but not LineTerminator
|
|
|
|
($code > 0x001F || $code === 0x0009)
|
|
|
|
);
|
|
|
|
|
|
|
|
return new Token(
|
|
|
|
Token::COMMENT,
|
|
|
|
$start,
|
2017-06-24 22:42:55 +07:00
|
|
|
$this->position,
|
2016-10-17 02:53:50 +07:00
|
|
|
$line,
|
|
|
|
$col,
|
|
|
|
$prev,
|
2017-06-24 22:42:55 +07:00
|
|
|
$value
|
2016-10-17 02:53:50 +07:00
|
|
|
);
|
|
|
|
}
|
2017-06-24 22:42:55 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads next UTF8Character from the byte stream, starting from $byteStreamPosition.
|
|
|
|
*
|
|
|
|
* @param bool $advance
|
|
|
|
* @param int $byteStreamPosition
|
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
private function readChar($advance = false, $byteStreamPosition = null)
|
|
|
|
{
|
|
|
|
if ($byteStreamPosition === null) {
|
|
|
|
$byteStreamPosition = $this->byteStreamPosition;
|
|
|
|
}
|
|
|
|
|
2018-02-08 14:58:08 +01:00
|
|
|
$code = null;
|
2017-06-24 22:42:55 +07:00
|
|
|
$utf8char = '';
|
|
|
|
$bytes = 0;
|
|
|
|
$positionOffset = 0;
|
|
|
|
|
|
|
|
if (isset($this->source->body[$byteStreamPosition])) {
|
|
|
|
$ord = ord($this->source->body[$byteStreamPosition]);
|
|
|
|
|
|
|
|
if ($ord < 128) {
|
|
|
|
$bytes = 1;
|
|
|
|
} else if ($ord < 224) {
|
|
|
|
$bytes = 2;
|
|
|
|
} elseif ($ord < 240) {
|
|
|
|
$bytes = 3;
|
|
|
|
} else {
|
|
|
|
$bytes = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
$utf8char = '';
|
|
|
|
for ($pos = $byteStreamPosition; $pos < $byteStreamPosition + $bytes; $pos++) {
|
|
|
|
$utf8char .= $this->source->body[$pos];
|
|
|
|
}
|
|
|
|
$positionOffset = 1;
|
|
|
|
$code = $bytes === 1 ? $ord : Utils::ord($utf8char);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($advance) {
|
|
|
|
$this->moveStringCursor($positionOffset, $bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
return [$utf8char, $code, $bytes];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads next $numberOfChars UTF8 characters from the byte stream, starting from $byteStreamPosition.
|
|
|
|
*
|
|
|
|
* @param $numberOfChars
|
|
|
|
* @param bool $advance
|
|
|
|
* @param null $byteStreamPosition
|
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
private function readChars($numberOfChars, $advance = false, $byteStreamPosition = null)
|
|
|
|
{
|
|
|
|
$result = '';
|
|
|
|
$totalBytes = 0;
|
|
|
|
$byteOffset = $byteStreamPosition ?: $this->byteStreamPosition;
|
|
|
|
|
|
|
|
for ($i = 0; $i < $numberOfChars; $i++) {
|
|
|
|
list ($char, $code, $bytes) = $this->readChar(false, $byteOffset);
|
|
|
|
$totalBytes += $bytes;
|
|
|
|
$byteOffset += $bytes;
|
|
|
|
$result .= $char;
|
|
|
|
}
|
|
|
|
if ($advance) {
|
|
|
|
$this->moveStringCursor($numberOfChars, $totalBytes);
|
|
|
|
}
|
|
|
|
return [$result, $totalBytes];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Moves internal string cursor position
|
|
|
|
*
|
|
|
|
* @param $positionOffset
|
|
|
|
* @param $byteStreamOffset
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
private function moveStringCursor($positionOffset, $byteStreamOffset)
|
|
|
|
{
|
|
|
|
$this->position += $positionOffset;
|
|
|
|
$this->byteStreamPosition += $byteStreamOffset;
|
|
|
|
return $this;
|
|
|
|
}
|
2015-07-15 23:05:46 +06:00
|
|
|
}
|