Reworked lexer: tokens are now represented as double linked list

This commit is contained in:
vladar 2016-10-17 02:53:50 +07:00
parent f91cbf3409
commit 3eeb4d450b
4 changed files with 540 additions and 211 deletions

View File

@ -2,28 +2,55 @@
namespace GraphQL\Language\AST;
use GraphQL\Language\Source;
use GraphQL\Language\Token;
/**
* Contains a range of UTF-8 character offsets and token references that
* identify the region of the source from which the AST derived.
*/
class Location
{
/**
* The character offset at which this Node begins.
*
* @var int
*/
public $start;
/**
* The character offset at which this Node ends.
*
* @var int
*/
public $end;
/**
* The Token at which this Node begins.
*
* @var Token
*/
public $startToken;
/**
* The Token at which this Node ends.
*
* @var Token
*/
public $endToken;
/**
* The Source document the AST represents.
*
* @var Source|null
*/
public $source;
public function __construct($start, $end, Source $source = null)
public function __construct(Token $startToken, Token $endToken, Source $source = null)
{
$this->start = $start;
$this->end = $end;
$this->startToken = $startToken;
$this->endToken = $endToken;
$this->start = $startToken->start;
$this->end = $endToken->end;
$this->source = $source;
}
}

View File

@ -4,51 +4,106 @@ namespace GraphQL\Language;
use GraphQL\SyntaxError;
use GraphQL\Utils;
// language/lexer.js
/**
* A Lexer is a stateful stream generator in that every time
* it is advanced, it returns the next token in the Source. Assuming the
* source lexes, the final Token emitted by the lexer will be of kind
* EOF, after which the lexer will repeatedly return the same EOF token
* whenever called.
*/
class Lexer
{
/**
* @var int
*/
private $prevPosition;
/**
* @var Source
*/
private $source;
public $source;
public function __construct(Source $source)
/**
* @var array
*/
public $options;
/**
* The previously focused non-ignored token.
*
* @var Token
*/
public $lastToken;
/**
* The currently focused non-ignored token.
*
* @var Token
*/
public $token;
/**
* The (1-indexed) line containing the current token.
*
* @var int
*/
public $line;
/**
* The character offset at which the current line begins.
*
* @var int
*/
public $lineStart;
public function __construct(Source $source, array $options = [])
{
$this->prevPosition = 0;
$startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null);
$this->source = $source;
$this->options = $options;
$this->lastToken = $startOfFileToken;
$this->token = $startOfFileToken;
$this->line = 1;
$this->lineStart = 0;
}
/**
* @param int|null $resetPosition
* @return Token
*/
public function nextToken($resetPosition = null)
public function advance()
{
$token = $this->readToken($resetPosition === null ? $this->prevPosition : $resetPosition);
$this->prevPosition = $token->end;
$token = $this->lastToken = $this->token;
if ($token->kind !== Token::EOF) {
do {
$token = $token->next = $this->readToken($token);
} while ($token->kind === Token::COMMENT);
$this->token = $token;
}
return $token;
}
/**
* @param int $fromPosition
* @return Token
*/
public function nextToken()
{
trigger_error(__METHOD__ . ' is deprecated in favor of advance()', E_USER_DEPRECATED);
return $this->advance();
}
/**
* @param Token $prev
* @return Token
* @throws SyntaxError
*/
private function readToken($fromPosition)
private function readToken(Token $prev)
{
$body = $this->source->body;
$bodyLength = $this->source->length;
$position = $this->positionAfterWhitespace($body, $fromPosition);
$position = $this->positionAfterWhitespace($prev->end);
$line = $this->line;
$col = 1 + $position - $this->lineStart;
if ($position >= $bodyLength) {
return new Token(Token::EOF, $position, $position);
return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev);
}
$code = Utils::charCodeAt($body, $position);
@ -60,36 +115,38 @@ class Lexer
switch ($code) {
// !
case 33: return new Token(Token::BANG, $position, $position + 1);
case 33: return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
// #
case 35: return $this->readComment($position, $line, $col, $prev);
// $
case 36: return new Token(Token::DOLLAR, $position, $position + 1);
case 36: return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
// (
case 40: return new Token(Token::PAREN_L, $position, $position + 1);
case 40: return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
// )
case 41: return new Token(Token::PAREN_R, $position, $position + 1);
case 41: return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
// .
case 46:
if (Utils::charCodeAt($body, $position+1) === 46 &&
Utils::charCodeAt($body, $position+2) === 46) {
return new Token(Token::SPREAD, $position, $position + 3);
return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev);
}
break;
// :
case 58: return new Token(Token::COLON, $position, $position + 1);
case 58: return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
// =
case 61: return new Token(Token::EQUALS, $position, $position + 1);
case 61: return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
// @
case 64: return new Token(Token::AT, $position, $position + 1);
case 64: return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
// [
case 91: return new Token(Token::BRACKET_L, $position, $position + 1);
case 91: return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
// ]
case 93: return new Token(Token::BRACKET_R, $position, $position + 1);
case 93: return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
// {
case 123: return new Token(Token::BRACE_L, $position, $position + 1);
case 123: return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
// |
case 124: return new Token(Token::PIPE, $position, $position + 1);
case 124: return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
// }
case 125: return new Token(Token::BRACE_R, $position, $position + 1);
case 125: return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
// A-Z
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72:
case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80:
@ -102,15 +159,15 @@ class Lexer
case 105: case 106: case 107: case 108: case 109: case 110: case 111:
case 112: case 113: case 114: case 115: case 116: case 117: case 118:
case 119: case 120: case 121: case 122:
return $this->readName($position);
return $this->readName($position, $line, $col, $prev);
// -
case 45:
// 0-9
case 48: case 49: case 50: case 51: case 52:
case 53: case 54: case 55: case 56: case 57:
return $this->readNumber($position, $code);
return $this->readNumber($position, $code, $line, $col, $prev);
// "
case 34: return $this->readString($position);
case 34: return $this->readString($position, $line, $col, $prev);
}
throw new SyntaxError($this->source, $position, 'Unexpected character ' . Utils::printCharCode($code));
@ -120,10 +177,14 @@ class Lexer
* Reads an alphanumeric + underscore name from the source.
*
* [_A-Za-z][_0-9A-Za-z]*
*
* @param int $position
* @param int $line
* @param int $col
* @param Token $prev
* @return Token
*/
private function readName($position)
private function readName($position, $line, $col, Token $prev)
{
$body = $this->source->body;
$bodyLength = $this->source->length;
@ -141,7 +202,15 @@ class Lexer
) {
++$end;
}
return new Token(Token::NAME, $position, $end, mb_substr($body, $position, $end - $position, 'UTF-8'));
return new Token(
Token::NAME,
$position,
$end,
$line,
$col,
$prev,
mb_substr($body, $position, $end - $position, 'UTF-8')
);
}
/**
@ -151,12 +220,15 @@ class Lexer
* Int: -?(0|[1-9][0-9]*)
* Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
*
* @param $start
* @param $firstCode
* @param int $start
* @param string $firstCode
* @param int $line
* @param int $col
* @param Token $prev
* @return Token
* @throws SyntaxError
*/
private function readNumber($start, $firstCode)
private function readNumber($start, $firstCode, $line, $col, Token $prev)
{
$code = $firstCode;
$body = $this->source->body;
@ -199,6 +271,9 @@ class Lexer
$isFloat ? Token::FLOAT : Token::INT,
$start,
$position,
$line,
$col,
$prev,
mb_substr($body, $start, $position - $start, 'UTF-8')
);
}
@ -225,11 +300,14 @@ class Lexer
}
/**
* @param $start
* @param int $start
* @param int $line
* @param int $col
* @param Token $prev
* @return Token
* @throws SyntaxError
*/
private function readString($start)
private function readString($start, $line, $col, Token $prev)
{
$body = $this->source->body;
$bodyLength = $this->source->length;
@ -263,7 +341,7 @@ class Lexer
case 114: $value .= "\r"; break;
case 116: $value .= "\t"; break;
case 117:
$hex = mb_substr($body, $position + 1, 4);
$hex = mb_substr($body, $position + 1, 4, 'UTF-8');
if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) {
throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\u' . $hex);
}
@ -285,7 +363,7 @@ class Lexer
}
$value .= mb_substr($body, $chunkStart, $position - $chunkStart, 'UTF-8');
return new Token(Token::STRING, $start, $position + 1, $value);
return new Token(Token::STRING, $start, $position + 1, $line, $col, $prev, $value);
}
private function assertValidStringCharacterCode($code, $position)
@ -305,43 +383,73 @@ class Lexer
* or commented character, then returns the position of that character for
* lexing.
*
* @param $body
* @param $startPosition
* @return int
*/
private function positionAfterWhitespace($body, $startPosition)
private function positionAfterWhitespace($startPosition)
{
$bodyLength = mb_strlen($body, 'UTF-8');
$body = $this->source->body;
$bodyLength = $this->source->length;
$position = $startPosition;
while ($position < $bodyLength) {
$code = Utils::charCodeAt($body, $position);
// Skip whitespace
if (
$code === 0xFEFF || // BOM
$code === 0x0009 || // tab
$code === 0x0020 || // space
$code === 0x000A || // new line
$code === 0x000D || // carriage return
$code === 0x002C
) {
++$position;
// Skip comments
} else if ($code === 35) { // #
++$position;
while (
$position < $bodyLength &&
($code = Utils::charCodeAt($body, $position)) &&
// SourceCharacter but not LineTerminator
($code > 0x001F || $code === 0x0009) && $code !== 0x000A && $code !== 0x000D
) {
++$position;
// tab | space | comma | BOM
if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) {
$position++;
} else if ($code === 10) { // new line
$position++;
$this->line++;
$this->lineStart = $position;
} else if ($code === 13) { // carriage return
if (Utils::charCodeAt($body, $position + 1) === 10) {
$position += 2;
} else {
$position ++;
}
$this->line++;
$this->lineStart = $position;
} else {
break;
}
}
return $position;
}
/**
* Reads a comment token from the source file.
*
* #[\u0009\u0020-\uFFFF]*
*
* @param $start
* @param $line
* @param $col
* @param Token $prev
* @return Token
*/
private function readComment($start, $line, $col, Token $prev)
{
$body = $this->source->body;
$position = $start;
do {
$code = Utils::charCodeAt($body, ++$position);
} while (
$code !== null &&
// SourceCharacter but not LineTerminator
($code > 0x001F || $code === 0x0009)
);
return new Token(
Token::COMMENT,
$start,
$position,
$line,
$col,
$prev,
mb_substr($body, $start + 1, $position - $start + 1, 'UTF-8')
);
}
}

View File

@ -1,33 +1,45 @@
<?php
namespace GraphQL\Language;
// language/lexer.js
/**
* Represents a range of characters represented by a lexical token
* within a Source.
*/
class Token
{
const EOF = 1;
const BANG = 2;
const DOLLAR = 3;
const PAREN_L = 4;
const PAREN_R = 5;
const SPREAD = 6;
const COLON = 7;
const EQUALS = 8;
const AT = 9;
const BRACKET_L = 10;
const BRACKET_R = 11;
const BRACE_L = 12;
const PIPE = 13;
const BRACE_R = 14;
const NAME = 15;
const INT = 17;
const FLOAT = 18;
const STRING = 19;
// Each kind of token.
const SOF = '<SOF>';
const EOF = '<EOF>';
const BANG = '!';
const DOLLAR = '$';
const PAREN_L = '(';
const PAREN_R = ')';
const SPREAD = '...';
const COLON = ':';
const EQUALS = '=';
const AT = '@';
const BRACKET_L = '[';
const BRACKET_R = ']';
const BRACE_L = '{';
const PIPE = '|';
const BRACE_R = '}';
const NAME = 'Name';
const INT = 'Int';
const FLOAT = 'Float';
const STRING = 'String';
const COMMENT = 'Comment';
/**
* @param $kind
* @return mixed
*/
public static function getKindDescription($kind)
{
$description = array();
$description[self::EOF] = 'EOF';
trigger_error('Deprecated as of 16.10.2016 ($kind itself contains description string now)', E_USER_DEPRECATED);
$description = [];
$description[self::SOF] = '<SOF>';
$description[self::EOF] = '<EOF>';
$description[self::BANG] = '!';
$description[self::DOLLAR] = '$';
$description[self::PAREN_L] = '(';
@ -45,35 +57,84 @@ class Token
$description[self::INT] = 'Int';
$description[self::FLOAT] = 'Float';
$description[self::STRING] = 'String';
$description[self::COMMENT] = 'Comment';
return $description[$kind];
}
/**
* @var int
* The kind of Token (see one of constants above).
*
* @var string
*/
public $kind;
/**
* The character offset at which this Node begins.
*
* @var int
*/
public $start;
/**
* The character offset at which this Node ends.
*
* @var int
*/
public $end;
/**
* The 1-indexed line number on which this Token appears.
*
* @var int
*/
public $line;
/**
* The 1-indexed column number at which this Token begins.
*
* @var int
*/
public $column;
/**
* @var string|null
*/
public $value;
public function __construct($kind, $start, $end, $value = null)
/**
* Tokens exist as nodes in a double-linked-list amongst all tokens
* including ignored tokens. <SOF> is always the first node and <EOF>
* the last.
*
* @var Token
*/
public $prev;
/**
* @var Token
*/
public $next;
/**
* Token constructor.
* @param $kind
* @param $start
* @param $end
* @param $line
* @param $column
* @param Token $previous
* @param null $value
*/
public function __construct($kind, $start, $end, $line, $column, Token $previous = null, $value = null)
{
$this->kind = $kind;
$this->start = (int) $start;
$this->end = (int) $end;
$this->line = (int) $line;
$this->column = (int) $column;
$this->prev = $previous;
$this->next = null;
$this->value = $value;
}
@ -82,6 +143,19 @@ class Token
*/
public function getDescription()
{
return self::getKindDescription($this->kind) . ($this->value ? ' "' . $this->value . '"' : '');
return $this->kind . ($this->value ? ' "' . $this->value . '"' : '');
}
/**
* @return array
*/
public function toArray()
{
return [
'kind' => $this->kind,
'value' => $this->value,
'line' => $this->line,
'column' => $this->column
];
}
}

View File

@ -16,7 +16,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
{
try {
$char = Utils::chr(0x0007);
$this->lexErr($char);
$this->lexOne($char);
$this->fail('Expected exception not thrown');
} catch (SyntaxError $error) {
$msg = mb_substr($error->getMessage(),0, 53);
@ -33,13 +33,36 @@ class LexerTest extends \PHPUnit_Framework_TestCase
public function testAcceptsBomHeader()
{
$bom = Utils::chr(0xFEFF);
$this->assertEquals(new Token(Token::NAME, 2, 5, 'foo'), $this->lexOne($bom . ' foo'));
$expected = [
'kind' => Token::NAME,
'start' => 2,
'end' => 5,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne($bom . ' foo'));
}
/**
* @it skips whitespace
* @it records line and column
*/
public function testSkipsWhitespaces()
public function testRecordsLineAndColumn()
{
$expected = [
'kind' => Token::NAME,
'start' => 8,
'end' => 11,
'line' => 4,
'column' => 3,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne("\n \r\n \r foo\n"));
}
/**
* @it skips whitespace and comments
*/
public function testSkipsWhitespacesAndComments()
{
$example1 = '
@ -47,17 +70,36 @@ class LexerTest extends \PHPUnit_Framework_TestCase
';
$this->assertEquals(new Token(Token::NAME, 6, 9, 'foo'), $this->lexOne($example1));
$expected = [
'kind' => Token::NAME,
'start' => 6,
'end' => 9,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne($example1));
$example2 = '
#comment
foo#comment
';
$this->assertEquals(new Token(Token::NAME, 18, 21, 'foo'), $this->lexOne($example2));
$expected = [
'kind' => Token::NAME,
'start' => 18,
'end' => 21,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne($example2));
$expected = [
'kind' => Token::NAME,
'start' => 3,
'end' => 6,
'value' => 'foo'
];
$example3 = ',,,foo,,,';
$this->assertEquals(new Token(Token::NAME, 3, 6, 'foo'), $this->lexOne($example3));
$this->assertArraySubset($expected, (array) $this->lexOne($example3));
}
/**
@ -72,7 +114,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
";
try {
$this->lexErr($example);
$this->lexOne($example);
$this->fail('Expected exception not thrown');
} catch (SyntaxError $e) {
$this->assertEquals(
@ -92,17 +134,63 @@ class LexerTest extends \PHPUnit_Framework_TestCase
*/
public function testLexesStrings()
{
$this->assertEquals(new Token(Token::STRING, 0, 8, 'simple'), $this->lexOne('"simple"'));
$this->assertEquals(new Token(Token::STRING, 0, 15, ' white space '), $this->lexOne('" white space "'));
$this->assertEquals(new Token(Token::STRING, 0, 10, 'quote "'), $this->lexOne('"quote \\""'));
$this->assertEquals(new Token(Token::STRING, 0, 25, 'escaped \n\r\b\t\f'), $this->lexOne('"escaped \\\\n\\\\r\\\\b\\\\t\\\\f"'));
$this->assertEquals(new Token(Token::STRING, 0, 16, 'slashes \\ \/'), $this->lexOne('"slashes \\\\ \\\\/"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 8,
'value' => 'simple'
], (array) $this->lexOne('"simple"'));
$this->assertEquals(new Token(Token::STRING, 0, 13, 'unicode яуц'), $this->lexOne('"unicode яуц"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 15,
'value' => ' white space '
], (array) $this->lexOne('" white space "'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 10,
'value' => 'quote "'
], (array) $this->lexOne('"quote \\""'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 25,
'value' => 'escaped \n\r\b\t\f'
], (array) $this->lexOne('"escaped \\\\n\\\\r\\\\b\\\\t\\\\f"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 16,
'value' => 'slashes \\ \/'
], (array) $this->lexOne('"slashes \\\\ \\\\/"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 13,
'value' => 'unicode яуц'
], (array) $this->lexOne('"unicode яуц"'));
$unicode = json_decode('"\u1234\u5678\u90AB\uCDEF"');
$this->assertEquals(new Token(Token::STRING, 0, 34, 'unicode ' . $unicode), $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"'));
$this->assertEquals(new Token(Token::STRING, 0, 26, $unicode), $this->lexOne('"\u1234\u5678\u90AB\uCDEF"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 34,
'value' => 'unicode ' . $unicode
], (array) $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 26,
'value' => $unicode
], (array) $this->lexOne('"\u1234\u5678\u90AB\uCDEF"'));
}
/**
@ -112,7 +200,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
{
$run = function($num, $str, $expectedMessage) {
try {
$this->lexErr($str);
$this->lexOne($str);
$this->fail('Expected exception not thrown in example: ' . $num);
} catch (SyntaxError $e) {
$this->assertEquals($expectedMessage, $e->getMessage(), "Test case $num failed");
@ -139,69 +227,69 @@ class LexerTest extends \PHPUnit_Framework_TestCase
*/
public function testLexesNumbers()
{
$this->assertEquals(
new Token(Token::INT, 0, 1, '4'),
$this->lexOne('4')
$this->assertArraySubset(
['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '4'],
(array) $this->lexOne('4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 5, '4.123'),
$this->lexOne('4.123')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '4.123'],
(array) $this->lexOne('4.123')
);
$this->assertEquals(
new Token(Token::INT, 0, 2, '-4'),
$this->lexOne('-4')
$this->assertArraySubset(
['kind' => Token::INT, 'start' => 0, 'end' => 2, 'value' => '-4'],
(array) $this->lexOne('-4')
);
$this->assertEquals(
new Token(Token::INT, 0, 1, '9'),
$this->lexOne('9')
$this->assertArraySubset(
['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '9'],
(array) $this->lexOne('9')
);
$this->assertEquals(
new Token(Token::INT, 0, 1, '0'),
$this->lexOne('0')
$this->assertArraySubset(
['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '0'],
(array) $this->lexOne('0')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 6, '-4.123'),
$this->lexOne('-4.123')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '-4.123'],
(array) $this->lexOne('-4.123')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 5, '0.123'),
$this->lexOne('0.123')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '0.123'],
(array) $this->lexOne('0.123')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 5, '123e4'),
$this->lexOne('123e4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '123e4'],
(array) $this->lexOne('123e4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 5, '123E4'),
$this->lexOne('123E4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '123E4'],
(array) $this->lexOne('123E4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 6, '123e-4'),
$this->lexOne('123e-4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '123e-4'],
(array) $this->lexOne('123e-4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 6, '123e+4'),
$this->lexOne('123e+4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '123e+4'],
(array) $this->lexOne('123e+4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 8, '-1.123e4'),
$this->lexOne('-1.123e4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 8, 'value' => '-1.123e4'],
(array) $this->lexOne('-1.123e4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 8, '-1.123E4'),
$this->lexOne('-1.123E4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 8, 'value' => '-1.123E4'],
(array) $this->lexOne('-1.123E4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 9, '-1.123e-4'),
$this->lexOne('-1.123e-4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 9, 'value' => '-1.123e-4'],
(array) $this->lexOne('-1.123e-4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 9, '-1.123e+4'),
$this->lexOne('-1.123e+4')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 9, 'value' => '-1.123e+4'],
(array) $this->lexOne('-1.123e+4')
);
$this->assertEquals(
new Token(Token::FLOAT, 0, 11, '-1.123e4567'),
$this->lexOne('-1.123e4567')
$this->assertArraySubset(
['kind' => Token::FLOAT, 'start' => 0, 'end' => 11, 'value' => '-1.123e4567'],
(array) $this->lexOne('-1.123e4567')
);
}
@ -234,57 +322,57 @@ class LexerTest extends \PHPUnit_Framework_TestCase
*/
public function testLexesPunctuation()
{
$this->assertEquals(
new Token(Token::BANG, 0, 1, null),
$this->lexOne('!')
$this->assertArraySubset(
['kind' => Token::BANG, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('!')
);
$this->assertEquals(
new Token(Token::DOLLAR, 0, 1, null),
$this->lexOne('$')
$this->assertArraySubset(
['kind' => Token::DOLLAR, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('$')
);
$this->assertEquals(
new Token(Token::PAREN_L, 0, 1, null),
$this->lexOne('(')
$this->assertArraySubset(
['kind' => Token::PAREN_L, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('(')
);
$this->assertEquals(
new Token(Token::PAREN_R, 0, 1, null),
$this->lexOne(')')
$this->assertArraySubset(
['kind' => Token::PAREN_R, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne(')')
);
$this->assertEquals(
new Token(Token::SPREAD, 0, 3, null),
$this->lexOne('...')
$this->assertArraySubset(
['kind' => Token::SPREAD, 'start' => 0, 'end' => 3, 'value' => null],
(array) $this->lexOne('...')
);
$this->assertEquals(
new Token(Token::COLON, 0, 1, null),
$this->lexOne(':')
$this->assertArraySubset(
['kind' => Token::COLON, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne(':')
);
$this->assertEquals(
new Token(Token::EQUALS, 0, 1, null),
$this->lexOne('=')
$this->assertArraySubset(
['kind' => Token::EQUALS, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('=')
);
$this->assertEquals(
new Token(Token::AT, 0, 1, null),
$this->lexOne('@')
$this->assertArraySubset(
['kind' => Token::AT, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('@')
);
$this->assertEquals(
new Token(Token::BRACKET_L, 0, 1, null),
$this->lexOne('[')
$this->assertArraySubset(
['kind' => Token::BRACKET_L, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('[')
);
$this->assertEquals(
new Token(Token::BRACKET_R, 0, 1, null),
$this->lexOne(']')
$this->assertArraySubset(
['kind' => Token::BRACKET_R, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne(']')
);
$this->assertEquals(
new Token(Token::BRACE_L, 0, 1, null),
$this->lexOne('{')
$this->assertArraySubset(
['kind' => Token::BRACE_L, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('{')
);
$this->assertEquals(
new Token(Token::PIPE, 0, 1, null),
$this->lexOne('|')
$this->assertArraySubset(
['kind' => Token::PIPE, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('|')
);
$this->assertEquals(
new Token(Token::BRACE_R, 0, 1, null),
$this->lexOne('}')
$this->assertArraySubset(
['kind' => Token::BRACE_R, 'start' => 0, 'end' => 1, 'value' => null],
(array) $this->lexOne('}')
);
}
@ -318,16 +406,58 @@ class LexerTest extends \PHPUnit_Framework_TestCase
{
$q = 'a-b';
$lexer = new Lexer(new Source($q));
$this->assertEquals(new Token(Token::NAME, 0, 1, 'a'), $lexer->nextToken());
$this->assertArraySubset(['kind' => Token::NAME, 'start' => 0, 'end' => 1, 'value' => 'a'], (array) $lexer->advance());
try {
$lexer->nextToken();
$lexer->advance();
$this->fail('Expected exception not thrown');
} catch (SyntaxError $err) {
$this->assertEquals('Syntax Error GraphQL (1:3) Invalid number, expected digit but got: "b"'."\n\n1: a-b\n ^\n", $err->getMessage());
}
}
/**
* @it produces double linked list of tokens, including comments
*/
public function testDoubleLinkedList()
{
$lexer = new Lexer(new Source('{
#comment
field
}'));
$startToken = $lexer->token;
do {
$endToken = $lexer->advance();
// Lexer advances over ignored comment tokens to make writing parsers
// easier, but will include them in the linked list result.
$this->assertNotEquals('Comment', $endToken->kind);
} while ($endToken->kind !== '<EOF>');
$this->assertEquals(null, $startToken->prev);
$this->assertEquals(null, $endToken->next);
$tokens = [];
for ($tok = $startToken; $tok; $tok = $tok->next) {
if (!empty($tokens)) {
// Tokens are double-linked, prev should point to last seen token.
$this->assertSame($tokens[count($tokens) - 1], $tok->prev);
}
$tokens[] = $tok;
}
$this->assertEquals([
'<SOF>',
'{',
'Comment',
'Name',
'}',
'<EOF>'
], Utils::map($tokens, function ($tok) {
return $tok->kind;
}));
}
/**
* @param string $body
* @return Token
@ -335,16 +465,6 @@ class LexerTest extends \PHPUnit_Framework_TestCase
private function lexOne($body)
{
$lexer = new Lexer(new Source($body));
return $lexer->nextToken();
}
/**
* @param $body
* @return Token
*/
private function lexErr($body)
{
$lexer = new Lexer(new Source($body));
return $lexer->nextToken();
return $lexer->advance();
}
}