mirror of
https://github.com/retailcrm/graphql-php.git
synced 2025-02-06 07:49:24 +03:00
Reworked lexer: tokens are now represented as double linked list
This commit is contained in:
parent
f91cbf3409
commit
3eeb4d450b
@ -2,28 +2,55 @@
|
||||
namespace GraphQL\Language\AST;
|
||||
|
||||
use GraphQL\Language\Source;
|
||||
use GraphQL\Language\Token;
|
||||
|
||||
/**
|
||||
* Contains a range of UTF-8 character offsets and token references that
|
||||
* identify the region of the source from which the AST derived.
|
||||
*/
|
||||
class Location
|
||||
{
|
||||
/**
|
||||
* The character offset at which this Node begins.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $start;
|
||||
|
||||
/**
|
||||
* The character offset at which this Node ends.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $end;
|
||||
|
||||
/**
|
||||
* The Token at which this Node begins.
|
||||
*
|
||||
* @var Token
|
||||
*/
|
||||
public $startToken;
|
||||
|
||||
/**
|
||||
* The Token at which this Node ends.
|
||||
*
|
||||
* @var Token
|
||||
*/
|
||||
public $endToken;
|
||||
|
||||
/**
|
||||
* The Source document the AST represents.
|
||||
*
|
||||
* @var Source|null
|
||||
*/
|
||||
public $source;
|
||||
|
||||
public function __construct($start, $end, Source $source = null)
|
||||
public function __construct(Token $startToken, Token $endToken, Source $source = null)
|
||||
{
|
||||
$this->start = $start;
|
||||
$this->end = $end;
|
||||
$this->startToken = $startToken;
|
||||
$this->endToken = $endToken;
|
||||
$this->start = $startToken->start;
|
||||
$this->end = $endToken->end;
|
||||
$this->source = $source;
|
||||
}
|
||||
}
|
||||
|
@ -4,51 +4,106 @@ namespace GraphQL\Language;
|
||||
use GraphQL\SyntaxError;
|
||||
use GraphQL\Utils;
|
||||
|
||||
// language/lexer.js
|
||||
|
||||
/**
|
||||
* A Lexer is a stateful stream generator in that every time
|
||||
* it is advanced, it returns the next token in the Source. Assuming the
|
||||
* source lexes, the final Token emitted by the lexer will be of kind
|
||||
* EOF, after which the lexer will repeatedly return the same EOF token
|
||||
* whenever called.
|
||||
*/
|
||||
class Lexer
|
||||
{
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $prevPosition;
|
||||
|
||||
/**
|
||||
* @var Source
|
||||
*/
|
||||
private $source;
|
||||
public $source;
|
||||
|
||||
public function __construct(Source $source)
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
public $options;
|
||||
|
||||
/**
|
||||
* The previously focused non-ignored token.
|
||||
*
|
||||
* @var Token
|
||||
*/
|
||||
public $lastToken;
|
||||
|
||||
/**
|
||||
* The currently focused non-ignored token.
|
||||
*
|
||||
* @var Token
|
||||
*/
|
||||
public $token;
|
||||
|
||||
/**
|
||||
* The (1-indexed) line containing the current token.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $line;
|
||||
|
||||
/**
|
||||
* The character offset at which the current line begins.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $lineStart;
|
||||
|
||||
public function __construct(Source $source, array $options = [])
|
||||
{
|
||||
$this->prevPosition = 0;
|
||||
$startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null);
|
||||
|
||||
$this->source = $source;
|
||||
$this->options = $options;
|
||||
$this->lastToken = $startOfFileToken;
|
||||
$this->token = $startOfFileToken;
|
||||
$this->line = 1;
|
||||
$this->lineStart = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int|null $resetPosition
|
||||
* @return Token
|
||||
*/
|
||||
public function nextToken($resetPosition = null)
|
||||
public function advance()
|
||||
{
|
||||
$token = $this->readToken($resetPosition === null ? $this->prevPosition : $resetPosition);
|
||||
$this->prevPosition = $token->end;
|
||||
$token = $this->lastToken = $this->token;
|
||||
|
||||
if ($token->kind !== Token::EOF) {
|
||||
do {
|
||||
$token = $token->next = $this->readToken($token);
|
||||
} while ($token->kind === Token::COMMENT);
|
||||
$this->token = $token;
|
||||
}
|
||||
return $token;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $fromPosition
|
||||
* @return Token
|
||||
*/
|
||||
public function nextToken()
|
||||
{
|
||||
trigger_error(__METHOD__ . ' is deprecated in favor of advance()', E_USER_DEPRECATED);
|
||||
return $this->advance();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Token $prev
|
||||
* @return Token
|
||||
* @throws SyntaxError
|
||||
*/
|
||||
private function readToken($fromPosition)
|
||||
private function readToken(Token $prev)
|
||||
{
|
||||
$body = $this->source->body;
|
||||
$bodyLength = $this->source->length;
|
||||
|
||||
$position = $this->positionAfterWhitespace($body, $fromPosition);
|
||||
$position = $this->positionAfterWhitespace($prev->end);
|
||||
$line = $this->line;
|
||||
$col = 1 + $position - $this->lineStart;
|
||||
|
||||
if ($position >= $bodyLength) {
|
||||
return new Token(Token::EOF, $position, $position);
|
||||
return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev);
|
||||
}
|
||||
|
||||
$code = Utils::charCodeAt($body, $position);
|
||||
@ -60,36 +115,38 @@ class Lexer
|
||||
|
||||
switch ($code) {
|
||||
// !
|
||||
case 33: return new Token(Token::BANG, $position, $position + 1);
|
||||
case 33: return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
|
||||
// #
|
||||
case 35: return $this->readComment($position, $line, $col, $prev);
|
||||
// $
|
||||
case 36: return new Token(Token::DOLLAR, $position, $position + 1);
|
||||
case 36: return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
|
||||
// (
|
||||
case 40: return new Token(Token::PAREN_L, $position, $position + 1);
|
||||
case 40: return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
|
||||
// )
|
||||
case 41: return new Token(Token::PAREN_R, $position, $position + 1);
|
||||
case 41: return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
|
||||
// .
|
||||
case 46:
|
||||
if (Utils::charCodeAt($body, $position+1) === 46 &&
|
||||
Utils::charCodeAt($body, $position+2) === 46) {
|
||||
return new Token(Token::SPREAD, $position, $position + 3);
|
||||
return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev);
|
||||
}
|
||||
break;
|
||||
// :
|
||||
case 58: return new Token(Token::COLON, $position, $position + 1);
|
||||
case 58: return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
|
||||
// =
|
||||
case 61: return new Token(Token::EQUALS, $position, $position + 1);
|
||||
case 61: return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
|
||||
// @
|
||||
case 64: return new Token(Token::AT, $position, $position + 1);
|
||||
case 64: return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
|
||||
// [
|
||||
case 91: return new Token(Token::BRACKET_L, $position, $position + 1);
|
||||
case 91: return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
|
||||
// ]
|
||||
case 93: return new Token(Token::BRACKET_R, $position, $position + 1);
|
||||
case 93: return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
|
||||
// {
|
||||
case 123: return new Token(Token::BRACE_L, $position, $position + 1);
|
||||
case 123: return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
|
||||
// |
|
||||
case 124: return new Token(Token::PIPE, $position, $position + 1);
|
||||
case 124: return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
|
||||
// }
|
||||
case 125: return new Token(Token::BRACE_R, $position, $position + 1);
|
||||
case 125: return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
|
||||
// A-Z
|
||||
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72:
|
||||
case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80:
|
||||
@ -102,15 +159,15 @@ class Lexer
|
||||
case 105: case 106: case 107: case 108: case 109: case 110: case 111:
|
||||
case 112: case 113: case 114: case 115: case 116: case 117: case 118:
|
||||
case 119: case 120: case 121: case 122:
|
||||
return $this->readName($position);
|
||||
return $this->readName($position, $line, $col, $prev);
|
||||
// -
|
||||
case 45:
|
||||
// 0-9
|
||||
case 48: case 49: case 50: case 51: case 52:
|
||||
case 53: case 54: case 55: case 56: case 57:
|
||||
return $this->readNumber($position, $code);
|
||||
return $this->readNumber($position, $code, $line, $col, $prev);
|
||||
// "
|
||||
case 34: return $this->readString($position);
|
||||
case 34: return $this->readString($position, $line, $col, $prev);
|
||||
}
|
||||
|
||||
throw new SyntaxError($this->source, $position, 'Unexpected character ' . Utils::printCharCode($code));
|
||||
@ -120,10 +177,14 @@ class Lexer
|
||||
* Reads an alphanumeric + underscore name from the source.
|
||||
*
|
||||
* [_A-Za-z][_0-9A-Za-z]*
|
||||
*
|
||||
* @param int $position
|
||||
* @param int $line
|
||||
* @param int $col
|
||||
* @param Token $prev
|
||||
* @return Token
|
||||
*/
|
||||
private function readName($position)
|
||||
private function readName($position, $line, $col, Token $prev)
|
||||
{
|
||||
$body = $this->source->body;
|
||||
$bodyLength = $this->source->length;
|
||||
@ -141,7 +202,15 @@ class Lexer
|
||||
) {
|
||||
++$end;
|
||||
}
|
||||
return new Token(Token::NAME, $position, $end, mb_substr($body, $position, $end - $position, 'UTF-8'));
|
||||
return new Token(
|
||||
Token::NAME,
|
||||
$position,
|
||||
$end,
|
||||
$line,
|
||||
$col,
|
||||
$prev,
|
||||
mb_substr($body, $position, $end - $position, 'UTF-8')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -151,12 +220,15 @@ class Lexer
|
||||
* Int: -?(0|[1-9][0-9]*)
|
||||
* Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
|
||||
*
|
||||
* @param $start
|
||||
* @param $firstCode
|
||||
* @param int $start
|
||||
* @param string $firstCode
|
||||
* @param int $line
|
||||
* @param int $col
|
||||
* @param Token $prev
|
||||
* @return Token
|
||||
* @throws SyntaxError
|
||||
*/
|
||||
private function readNumber($start, $firstCode)
|
||||
private function readNumber($start, $firstCode, $line, $col, Token $prev)
|
||||
{
|
||||
$code = $firstCode;
|
||||
$body = $this->source->body;
|
||||
@ -199,6 +271,9 @@ class Lexer
|
||||
$isFloat ? Token::FLOAT : Token::INT,
|
||||
$start,
|
||||
$position,
|
||||
$line,
|
||||
$col,
|
||||
$prev,
|
||||
mb_substr($body, $start, $position - $start, 'UTF-8')
|
||||
);
|
||||
}
|
||||
@ -225,11 +300,14 @@ class Lexer
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $start
|
||||
* @param int $start
|
||||
* @param int $line
|
||||
* @param int $col
|
||||
* @param Token $prev
|
||||
* @return Token
|
||||
* @throws SyntaxError
|
||||
*/
|
||||
private function readString($start)
|
||||
private function readString($start, $line, $col, Token $prev)
|
||||
{
|
||||
$body = $this->source->body;
|
||||
$bodyLength = $this->source->length;
|
||||
@ -263,7 +341,7 @@ class Lexer
|
||||
case 114: $value .= "\r"; break;
|
||||
case 116: $value .= "\t"; break;
|
||||
case 117:
|
||||
$hex = mb_substr($body, $position + 1, 4);
|
||||
$hex = mb_substr($body, $position + 1, 4, 'UTF-8');
|
||||
if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) {
|
||||
throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\u' . $hex);
|
||||
}
|
||||
@ -285,7 +363,7 @@ class Lexer
|
||||
}
|
||||
|
||||
$value .= mb_substr($body, $chunkStart, $position - $chunkStart, 'UTF-8');
|
||||
return new Token(Token::STRING, $start, $position + 1, $value);
|
||||
return new Token(Token::STRING, $start, $position + 1, $line, $col, $prev, $value);
|
||||
}
|
||||
|
||||
private function assertValidStringCharacterCode($code, $position)
|
||||
@ -305,43 +383,73 @@ class Lexer
|
||||
* or commented character, then returns the position of that character for
|
||||
* lexing.
|
||||
*
|
||||
* @param $body
|
||||
* @param $startPosition
|
||||
* @return int
|
||||
*/
|
||||
private function positionAfterWhitespace($body, $startPosition)
|
||||
private function positionAfterWhitespace($startPosition)
|
||||
{
|
||||
$bodyLength = mb_strlen($body, 'UTF-8');
|
||||
$body = $this->source->body;
|
||||
$bodyLength = $this->source->length;
|
||||
$position = $startPosition;
|
||||
|
||||
while ($position < $bodyLength) {
|
||||
$code = Utils::charCodeAt($body, $position);
|
||||
|
||||
// Skip whitespace
|
||||
if (
|
||||
$code === 0xFEFF || // BOM
|
||||
$code === 0x0009 || // tab
|
||||
$code === 0x0020 || // space
|
||||
$code === 0x000A || // new line
|
||||
$code === 0x000D || // carriage return
|
||||
$code === 0x002C
|
||||
) {
|
||||
++$position;
|
||||
// Skip comments
|
||||
} else if ($code === 35) { // #
|
||||
++$position;
|
||||
while (
|
||||
$position < $bodyLength &&
|
||||
($code = Utils::charCodeAt($body, $position)) &&
|
||||
// SourceCharacter but not LineTerminator
|
||||
($code > 0x001F || $code === 0x0009) && $code !== 0x000A && $code !== 0x000D
|
||||
) {
|
||||
++$position;
|
||||
// tab | space | comma | BOM
|
||||
if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) {
|
||||
$position++;
|
||||
} else if ($code === 10) { // new line
|
||||
$position++;
|
||||
$this->line++;
|
||||
$this->lineStart = $position;
|
||||
} else if ($code === 13) { // carriage return
|
||||
if (Utils::charCodeAt($body, $position + 1) === 10) {
|
||||
$position += 2;
|
||||
} else {
|
||||
$position ++;
|
||||
}
|
||||
$this->line++;
|
||||
$this->lineStart = $position;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return $position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a comment token from the source file.
|
||||
*
|
||||
* #[\u0009\u0020-\uFFFF]*
|
||||
*
|
||||
* @param $start
|
||||
* @param $line
|
||||
* @param $col
|
||||
* @param Token $prev
|
||||
* @return Token
|
||||
*/
|
||||
private function readComment($start, $line, $col, Token $prev)
|
||||
{
|
||||
$body = $this->source->body;
|
||||
$position = $start;
|
||||
|
||||
do {
|
||||
$code = Utils::charCodeAt($body, ++$position);
|
||||
} while (
|
||||
$code !== null &&
|
||||
// SourceCharacter but not LineTerminator
|
||||
($code > 0x001F || $code === 0x0009)
|
||||
);
|
||||
|
||||
return new Token(
|
||||
Token::COMMENT,
|
||||
$start,
|
||||
$position,
|
||||
$line,
|
||||
$col,
|
||||
$prev,
|
||||
mb_substr($body, $start + 1, $position - $start + 1, 'UTF-8')
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -1,33 +1,45 @@
|
||||
<?php
|
||||
namespace GraphQL\Language;
|
||||
|
||||
// language/lexer.js
|
||||
|
||||
/**
|
||||
* Represents a range of characters represented by a lexical token
|
||||
* within a Source.
|
||||
*/
|
||||
class Token
|
||||
{
|
||||
const EOF = 1;
|
||||
const BANG = 2;
|
||||
const DOLLAR = 3;
|
||||
const PAREN_L = 4;
|
||||
const PAREN_R = 5;
|
||||
const SPREAD = 6;
|
||||
const COLON = 7;
|
||||
const EQUALS = 8;
|
||||
const AT = 9;
|
||||
const BRACKET_L = 10;
|
||||
const BRACKET_R = 11;
|
||||
const BRACE_L = 12;
|
||||
const PIPE = 13;
|
||||
const BRACE_R = 14;
|
||||
const NAME = 15;
|
||||
const INT = 17;
|
||||
const FLOAT = 18;
|
||||
const STRING = 19;
|
||||
// Each kind of token.
|
||||
const SOF = '<SOF>';
|
||||
const EOF = '<EOF>';
|
||||
const BANG = '!';
|
||||
const DOLLAR = '$';
|
||||
const PAREN_L = '(';
|
||||
const PAREN_R = ')';
|
||||
const SPREAD = '...';
|
||||
const COLON = ':';
|
||||
const EQUALS = '=';
|
||||
const AT = '@';
|
||||
const BRACKET_L = '[';
|
||||
const BRACKET_R = ']';
|
||||
const BRACE_L = '{';
|
||||
const PIPE = '|';
|
||||
const BRACE_R = '}';
|
||||
const NAME = 'Name';
|
||||
const INT = 'Int';
|
||||
const FLOAT = 'Float';
|
||||
const STRING = 'String';
|
||||
const COMMENT = 'Comment';
|
||||
|
||||
/**
|
||||
* @param $kind
|
||||
* @return mixed
|
||||
*/
|
||||
public static function getKindDescription($kind)
|
||||
{
|
||||
$description = array();
|
||||
$description[self::EOF] = 'EOF';
|
||||
trigger_error('Deprecated as of 16.10.2016 ($kind itself contains description string now)', E_USER_DEPRECATED);
|
||||
|
||||
$description = [];
|
||||
$description[self::SOF] = '<SOF>';
|
||||
$description[self::EOF] = '<EOF>';
|
||||
$description[self::BANG] = '!';
|
||||
$description[self::DOLLAR] = '$';
|
||||
$description[self::PAREN_L] = '(';
|
||||
@ -45,35 +57,84 @@ class Token
|
||||
$description[self::INT] = 'Int';
|
||||
$description[self::FLOAT] = 'Float';
|
||||
$description[self::STRING] = 'String';
|
||||
$description[self::COMMENT] = 'Comment';
|
||||
|
||||
return $description[$kind];
|
||||
}
|
||||
|
||||
/**
|
||||
* @var int
|
||||
* The kind of Token (see one of constants above).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $kind;
|
||||
|
||||
/**
|
||||
* The character offset at which this Node begins.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $start;
|
||||
|
||||
/**
|
||||
* The character offset at which this Node ends.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $end;
|
||||
|
||||
/**
|
||||
* The 1-indexed line number on which this Token appears.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $line;
|
||||
|
||||
/**
|
||||
* The 1-indexed column number at which this Token begins.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $column;
|
||||
|
||||
/**
|
||||
* @var string|null
|
||||
*/
|
||||
public $value;
|
||||
|
||||
public function __construct($kind, $start, $end, $value = null)
|
||||
/**
|
||||
* Tokens exist as nodes in a double-linked-list amongst all tokens
|
||||
* including ignored tokens. <SOF> is always the first node and <EOF>
|
||||
* the last.
|
||||
*
|
||||
* @var Token
|
||||
*/
|
||||
public $prev;
|
||||
|
||||
/**
|
||||
* @var Token
|
||||
*/
|
||||
public $next;
|
||||
|
||||
/**
|
||||
* Token constructor.
|
||||
* @param $kind
|
||||
* @param $start
|
||||
* @param $end
|
||||
* @param $line
|
||||
* @param $column
|
||||
* @param Token $previous
|
||||
* @param null $value
|
||||
*/
|
||||
public function __construct($kind, $start, $end, $line, $column, Token $previous = null, $value = null)
|
||||
{
|
||||
$this->kind = $kind;
|
||||
$this->start = (int) $start;
|
||||
$this->end = (int) $end;
|
||||
$this->line = (int) $line;
|
||||
$this->column = (int) $column;
|
||||
$this->prev = $previous;
|
||||
$this->next = null;
|
||||
$this->value = $value;
|
||||
}
|
||||
|
||||
@ -82,6 +143,19 @@ class Token
|
||||
*/
|
||||
public function getDescription()
|
||||
{
|
||||
return self::getKindDescription($this->kind) . ($this->value ? ' "' . $this->value . '"' : '');
|
||||
return $this->kind . ($this->value ? ' "' . $this->value . '"' : '');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function toArray()
|
||||
{
|
||||
return [
|
||||
'kind' => $this->kind,
|
||||
'value' => $this->value,
|
||||
'line' => $this->line,
|
||||
'column' => $this->column
|
||||
];
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
try {
|
||||
$char = Utils::chr(0x0007);
|
||||
$this->lexErr($char);
|
||||
$this->lexOne($char);
|
||||
$this->fail('Expected exception not thrown');
|
||||
} catch (SyntaxError $error) {
|
||||
$msg = mb_substr($error->getMessage(),0, 53);
|
||||
@ -33,13 +33,36 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
public function testAcceptsBomHeader()
|
||||
{
|
||||
$bom = Utils::chr(0xFEFF);
|
||||
$this->assertEquals(new Token(Token::NAME, 2, 5, 'foo'), $this->lexOne($bom . ' foo'));
|
||||
$expected = [
|
||||
'kind' => Token::NAME,
|
||||
'start' => 2,
|
||||
'end' => 5,
|
||||
'value' => 'foo'
|
||||
];
|
||||
|
||||
$this->assertArraySubset($expected, (array) $this->lexOne($bom . ' foo'));
|
||||
}
|
||||
|
||||
/**
|
||||
* @it skips whitespace
|
||||
* @it records line and column
|
||||
*/
|
||||
public function testSkipsWhitespaces()
|
||||
public function testRecordsLineAndColumn()
|
||||
{
|
||||
$expected = [
|
||||
'kind' => Token::NAME,
|
||||
'start' => 8,
|
||||
'end' => 11,
|
||||
'line' => 4,
|
||||
'column' => 3,
|
||||
'value' => 'foo'
|
||||
];
|
||||
$this->assertArraySubset($expected, (array) $this->lexOne("\n \r\n \r foo\n"));
|
||||
}
|
||||
|
||||
/**
|
||||
* @it skips whitespace and comments
|
||||
*/
|
||||
public function testSkipsWhitespacesAndComments()
|
||||
{
|
||||
$example1 = '
|
||||
|
||||
@ -47,17 +70,36 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
|
||||
';
|
||||
$this->assertEquals(new Token(Token::NAME, 6, 9, 'foo'), $this->lexOne($example1));
|
||||
$expected = [
|
||||
'kind' => Token::NAME,
|
||||
'start' => 6,
|
||||
'end' => 9,
|
||||
'value' => 'foo'
|
||||
];
|
||||
$this->assertArraySubset($expected, (array) $this->lexOne($example1));
|
||||
|
||||
$example2 = '
|
||||
#comment
|
||||
foo#comment
|
||||
';
|
||||
|
||||
$this->assertEquals(new Token(Token::NAME, 18, 21, 'foo'), $this->lexOne($example2));
|
||||
$expected = [
|
||||
'kind' => Token::NAME,
|
||||
'start' => 18,
|
||||
'end' => 21,
|
||||
'value' => 'foo'
|
||||
];
|
||||
$this->assertArraySubset($expected, (array) $this->lexOne($example2));
|
||||
|
||||
$expected = [
|
||||
'kind' => Token::NAME,
|
||||
'start' => 3,
|
||||
'end' => 6,
|
||||
'value' => 'foo'
|
||||
];
|
||||
|
||||
$example3 = ',,,foo,,,';
|
||||
$this->assertEquals(new Token(Token::NAME, 3, 6, 'foo'), $this->lexOne($example3));
|
||||
$this->assertArraySubset($expected, (array) $this->lexOne($example3));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -72,7 +114,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
";
|
||||
try {
|
||||
$this->lexErr($example);
|
||||
$this->lexOne($example);
|
||||
$this->fail('Expected exception not thrown');
|
||||
} catch (SyntaxError $e) {
|
||||
$this->assertEquals(
|
||||
@ -92,17 +134,63 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testLexesStrings()
|
||||
{
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 8, 'simple'), $this->lexOne('"simple"'));
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 15, ' white space '), $this->lexOne('" white space "'));
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 10, 'quote "'), $this->lexOne('"quote \\""'));
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 25, 'escaped \n\r\b\t\f'), $this->lexOne('"escaped \\\\n\\\\r\\\\b\\\\t\\\\f"'));
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 16, 'slashes \\ \/'), $this->lexOne('"slashes \\\\ \\\\/"'));
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 8,
|
||||
'value' => 'simple'
|
||||
], (array) $this->lexOne('"simple"'));
|
||||
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 13, 'unicode яуц'), $this->lexOne('"unicode яуц"'));
|
||||
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 15,
|
||||
'value' => ' white space '
|
||||
], (array) $this->lexOne('" white space "'));
|
||||
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 10,
|
||||
'value' => 'quote "'
|
||||
], (array) $this->lexOne('"quote \\""'));
|
||||
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 25,
|
||||
'value' => 'escaped \n\r\b\t\f'
|
||||
], (array) $this->lexOne('"escaped \\\\n\\\\r\\\\b\\\\t\\\\f"'));
|
||||
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 16,
|
||||
'value' => 'slashes \\ \/'
|
||||
], (array) $this->lexOne('"slashes \\\\ \\\\/"'));
|
||||
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 13,
|
||||
'value' => 'unicode яуц'
|
||||
], (array) $this->lexOne('"unicode яуц"'));
|
||||
|
||||
$unicode = json_decode('"\u1234\u5678\u90AB\uCDEF"');
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 34, 'unicode ' . $unicode), $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"'));
|
||||
$this->assertEquals(new Token(Token::STRING, 0, 26, $unicode), $this->lexOne('"\u1234\u5678\u90AB\uCDEF"'));
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 34,
|
||||
'value' => 'unicode ' . $unicode
|
||||
], (array) $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"'));
|
||||
|
||||
$this->assertArraySubset([
|
||||
'kind' => Token::STRING,
|
||||
'start' => 0,
|
||||
'end' => 26,
|
||||
'value' => $unicode
|
||||
], (array) $this->lexOne('"\u1234\u5678\u90AB\uCDEF"'));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -112,7 +200,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
$run = function($num, $str, $expectedMessage) {
|
||||
try {
|
||||
$this->lexErr($str);
|
||||
$this->lexOne($str);
|
||||
$this->fail('Expected exception not thrown in example: ' . $num);
|
||||
} catch (SyntaxError $e) {
|
||||
$this->assertEquals($expectedMessage, $e->getMessage(), "Test case $num failed");
|
||||
@ -139,69 +227,69 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testLexesNumbers()
|
||||
{
|
||||
$this->assertEquals(
|
||||
new Token(Token::INT, 0, 1, '4'),
|
||||
$this->lexOne('4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '4'],
|
||||
(array) $this->lexOne('4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 5, '4.123'),
|
||||
$this->lexOne('4.123')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '4.123'],
|
||||
(array) $this->lexOne('4.123')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::INT, 0, 2, '-4'),
|
||||
$this->lexOne('-4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::INT, 'start' => 0, 'end' => 2, 'value' => '-4'],
|
||||
(array) $this->lexOne('-4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::INT, 0, 1, '9'),
|
||||
$this->lexOne('9')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '9'],
|
||||
(array) $this->lexOne('9')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::INT, 0, 1, '0'),
|
||||
$this->lexOne('0')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '0'],
|
||||
(array) $this->lexOne('0')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 6, '-4.123'),
|
||||
$this->lexOne('-4.123')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '-4.123'],
|
||||
(array) $this->lexOne('-4.123')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 5, '0.123'),
|
||||
$this->lexOne('0.123')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '0.123'],
|
||||
(array) $this->lexOne('0.123')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 5, '123e4'),
|
||||
$this->lexOne('123e4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '123e4'],
|
||||
(array) $this->lexOne('123e4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 5, '123E4'),
|
||||
$this->lexOne('123E4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '123E4'],
|
||||
(array) $this->lexOne('123E4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 6, '123e-4'),
|
||||
$this->lexOne('123e-4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '123e-4'],
|
||||
(array) $this->lexOne('123e-4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 6, '123e+4'),
|
||||
$this->lexOne('123e+4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '123e+4'],
|
||||
(array) $this->lexOne('123e+4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 8, '-1.123e4'),
|
||||
$this->lexOne('-1.123e4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 8, 'value' => '-1.123e4'],
|
||||
(array) $this->lexOne('-1.123e4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 8, '-1.123E4'),
|
||||
$this->lexOne('-1.123E4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 8, 'value' => '-1.123E4'],
|
||||
(array) $this->lexOne('-1.123E4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 9, '-1.123e-4'),
|
||||
$this->lexOne('-1.123e-4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 9, 'value' => '-1.123e-4'],
|
||||
(array) $this->lexOne('-1.123e-4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 9, '-1.123e+4'),
|
||||
$this->lexOne('-1.123e+4')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 9, 'value' => '-1.123e+4'],
|
||||
(array) $this->lexOne('-1.123e+4')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::FLOAT, 0, 11, '-1.123e4567'),
|
||||
$this->lexOne('-1.123e4567')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::FLOAT, 'start' => 0, 'end' => 11, 'value' => '-1.123e4567'],
|
||||
(array) $this->lexOne('-1.123e4567')
|
||||
);
|
||||
}
|
||||
|
||||
@ -234,57 +322,57 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testLexesPunctuation()
|
||||
{
|
||||
$this->assertEquals(
|
||||
new Token(Token::BANG, 0, 1, null),
|
||||
$this->lexOne('!')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::BANG, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('!')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::DOLLAR, 0, 1, null),
|
||||
$this->lexOne('$')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::DOLLAR, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('$')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::PAREN_L, 0, 1, null),
|
||||
$this->lexOne('(')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::PAREN_L, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('(')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::PAREN_R, 0, 1, null),
|
||||
$this->lexOne(')')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::PAREN_R, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne(')')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::SPREAD, 0, 3, null),
|
||||
$this->lexOne('...')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::SPREAD, 'start' => 0, 'end' => 3, 'value' => null],
|
||||
(array) $this->lexOne('...')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::COLON, 0, 1, null),
|
||||
$this->lexOne(':')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::COLON, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne(':')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::EQUALS, 0, 1, null),
|
||||
$this->lexOne('=')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::EQUALS, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('=')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::AT, 0, 1, null),
|
||||
$this->lexOne('@')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::AT, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('@')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::BRACKET_L, 0, 1, null),
|
||||
$this->lexOne('[')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::BRACKET_L, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('[')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::BRACKET_R, 0, 1, null),
|
||||
$this->lexOne(']')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::BRACKET_R, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne(']')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::BRACE_L, 0, 1, null),
|
||||
$this->lexOne('{')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::BRACE_L, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('{')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::PIPE, 0, 1, null),
|
||||
$this->lexOne('|')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::PIPE, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('|')
|
||||
);
|
||||
$this->assertEquals(
|
||||
new Token(Token::BRACE_R, 0, 1, null),
|
||||
$this->lexOne('}')
|
||||
$this->assertArraySubset(
|
||||
['kind' => Token::BRACE_R, 'start' => 0, 'end' => 1, 'value' => null],
|
||||
(array) $this->lexOne('}')
|
||||
);
|
||||
}
|
||||
|
||||
@ -318,16 +406,58 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
$q = 'a-b';
|
||||
$lexer = new Lexer(new Source($q));
|
||||
$this->assertEquals(new Token(Token::NAME, 0, 1, 'a'), $lexer->nextToken());
|
||||
$this->assertArraySubset(['kind' => Token::NAME, 'start' => 0, 'end' => 1, 'value' => 'a'], (array) $lexer->advance());
|
||||
|
||||
try {
|
||||
$lexer->nextToken();
|
||||
$lexer->advance();
|
||||
$this->fail('Expected exception not thrown');
|
||||
} catch (SyntaxError $err) {
|
||||
$this->assertEquals('Syntax Error GraphQL (1:3) Invalid number, expected digit but got: "b"'."\n\n1: a-b\n ^\n", $err->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @it produces double linked list of tokens, including comments
|
||||
*/
|
||||
public function testDoubleLinkedList()
|
||||
{
|
||||
$lexer = new Lexer(new Source('{
|
||||
#comment
|
||||
field
|
||||
}'));
|
||||
|
||||
$startToken = $lexer->token;
|
||||
do {
|
||||
$endToken = $lexer->advance();
|
||||
// Lexer advances over ignored comment tokens to make writing parsers
|
||||
// easier, but will include them in the linked list result.
|
||||
$this->assertNotEquals('Comment', $endToken->kind);
|
||||
} while ($endToken->kind !== '<EOF>');
|
||||
|
||||
$this->assertEquals(null, $startToken->prev);
|
||||
$this->assertEquals(null, $endToken->next);
|
||||
|
||||
$tokens = [];
|
||||
for ($tok = $startToken; $tok; $tok = $tok->next) {
|
||||
if (!empty($tokens)) {
|
||||
// Tokens are double-linked, prev should point to last seen token.
|
||||
$this->assertSame($tokens[count($tokens) - 1], $tok->prev);
|
||||
}
|
||||
$tokens[] = $tok;
|
||||
}
|
||||
|
||||
$this->assertEquals([
|
||||
'<SOF>',
|
||||
'{',
|
||||
'Comment',
|
||||
'Name',
|
||||
'}',
|
||||
'<EOF>'
|
||||
], Utils::map($tokens, function ($tok) {
|
||||
return $tok->kind;
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $body
|
||||
* @return Token
|
||||
@ -335,16 +465,6 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
private function lexOne($body)
|
||||
{
|
||||
$lexer = new Lexer(new Source($body));
|
||||
return $lexer->nextToken();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $body
|
||||
* @return Token
|
||||
*/
|
||||
private function lexErr($body)
|
||||
{
|
||||
$lexer = new Lexer(new Source($body));
|
||||
return $lexer->nextToken();
|
||||
return $lexer->advance();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user