Reworked lexer: tokens are now represented as double linked list

This commit is contained in:
vladar 2016-10-17 02:53:50 +07:00
parent f91cbf3409
commit 3eeb4d450b
4 changed files with 540 additions and 211 deletions

View File

@ -2,28 +2,55 @@
namespace GraphQL\Language\AST; namespace GraphQL\Language\AST;
use GraphQL\Language\Source; use GraphQL\Language\Source;
use GraphQL\Language\Token;
/**
* Contains a range of UTF-8 character offsets and token references that
* identify the region of the source from which the AST derived.
*/
class Location class Location
{ {
/** /**
* The character offset at which this Node begins.
*
* @var int * @var int
*/ */
public $start; public $start;
/** /**
* The character offset at which this Node ends.
*
* @var int * @var int
*/ */
public $end; public $end;
/** /**
* The Token at which this Node begins.
*
* @var Token
*/
public $startToken;
/**
* The Token at which this Node ends.
*
* @var Token
*/
public $endToken;
/**
* The Source document the AST represents.
*
* @var Source|null * @var Source|null
*/ */
public $source; public $source;
public function __construct($start, $end, Source $source = null) public function __construct(Token $startToken, Token $endToken, Source $source = null)
{ {
$this->start = $start; $this->startToken = $startToken;
$this->end = $end; $this->endToken = $endToken;
$this->start = $startToken->start;
$this->end = $endToken->end;
$this->source = $source; $this->source = $source;
} }
} }

View File

@ -4,51 +4,106 @@ namespace GraphQL\Language;
use GraphQL\SyntaxError; use GraphQL\SyntaxError;
use GraphQL\Utils; use GraphQL\Utils;
// language/lexer.js /**
* A Lexer is a stateful stream generator in that every time
* it is advanced, it returns the next token in the Source. Assuming the
* source lexes, the final Token emitted by the lexer will be of kind
* EOF, after which the lexer will repeatedly return the same EOF token
* whenever called.
*/
class Lexer class Lexer
{ {
/**
* @var int
*/
private $prevPosition;
/** /**
* @var Source * @var Source
*/ */
private $source; public $source;
public function __construct(Source $source) /**
* @var array
*/
public $options;
/**
* The previously focused non-ignored token.
*
* @var Token
*/
public $lastToken;
/**
* The currently focused non-ignored token.
*
* @var Token
*/
public $token;
/**
* The (1-indexed) line containing the current token.
*
* @var int
*/
public $line;
/**
* The character offset at which the current line begins.
*
* @var int
*/
public $lineStart;
public function __construct(Source $source, array $options = [])
{ {
$this->prevPosition = 0; $startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null);
$this->source = $source; $this->source = $source;
$this->options = $options;
$this->lastToken = $startOfFileToken;
$this->token = $startOfFileToken;
$this->line = 1;
$this->lineStart = 0;
} }
/** /**
* @param int|null $resetPosition
* @return Token * @return Token
*/ */
public function nextToken($resetPosition = null) public function advance()
{ {
$token = $this->readToken($resetPosition === null ? $this->prevPosition : $resetPosition); $token = $this->lastToken = $this->token;
$this->prevPosition = $token->end;
if ($token->kind !== Token::EOF) {
do {
$token = $token->next = $this->readToken($token);
} while ($token->kind === Token::COMMENT);
$this->token = $token;
}
return $token; return $token;
} }
/** /**
* @param int $fromPosition * @return Token
*/
public function nextToken()
{
trigger_error(__METHOD__ . ' is deprecated in favor of advance()', E_USER_DEPRECATED);
return $this->advance();
}
/**
* @param Token $prev
* @return Token * @return Token
* @throws SyntaxError * @throws SyntaxError
*/ */
private function readToken($fromPosition) private function readToken(Token $prev)
{ {
$body = $this->source->body; $body = $this->source->body;
$bodyLength = $this->source->length; $bodyLength = $this->source->length;
$position = $this->positionAfterWhitespace($body, $fromPosition); $position = $this->positionAfterWhitespace($prev->end);
$line = $this->line;
$col = 1 + $position - $this->lineStart;
if ($position >= $bodyLength) { if ($position >= $bodyLength) {
return new Token(Token::EOF, $position, $position); return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev);
} }
$code = Utils::charCodeAt($body, $position); $code = Utils::charCodeAt($body, $position);
@ -60,36 +115,38 @@ class Lexer
switch ($code) { switch ($code) {
// ! // !
case 33: return new Token(Token::BANG, $position, $position + 1); case 33: return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
// #
case 35: return $this->readComment($position, $line, $col, $prev);
// $ // $
case 36: return new Token(Token::DOLLAR, $position, $position + 1); case 36: return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
// ( // (
case 40: return new Token(Token::PAREN_L, $position, $position + 1); case 40: return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
// ) // )
case 41: return new Token(Token::PAREN_R, $position, $position + 1); case 41: return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
// . // .
case 46: case 46:
if (Utils::charCodeAt($body, $position+1) === 46 && if (Utils::charCodeAt($body, $position+1) === 46 &&
Utils::charCodeAt($body, $position+2) === 46) { Utils::charCodeAt($body, $position+2) === 46) {
return new Token(Token::SPREAD, $position, $position + 3); return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev);
} }
break; break;
// : // :
case 58: return new Token(Token::COLON, $position, $position + 1); case 58: return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
// = // =
case 61: return new Token(Token::EQUALS, $position, $position + 1); case 61: return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
// @ // @
case 64: return new Token(Token::AT, $position, $position + 1); case 64: return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
// [ // [
case 91: return new Token(Token::BRACKET_L, $position, $position + 1); case 91: return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
// ] // ]
case 93: return new Token(Token::BRACKET_R, $position, $position + 1); case 93: return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
// { // {
case 123: return new Token(Token::BRACE_L, $position, $position + 1); case 123: return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
// | // |
case 124: return new Token(Token::PIPE, $position, $position + 1); case 124: return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
// } // }
case 125: return new Token(Token::BRACE_R, $position, $position + 1); case 125: return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
// A-Z // A-Z
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72:
case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80: case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80:
@ -102,15 +159,15 @@ class Lexer
case 105: case 106: case 107: case 108: case 109: case 110: case 111: case 105: case 106: case 107: case 108: case 109: case 110: case 111:
case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 112: case 113: case 114: case 115: case 116: case 117: case 118:
case 119: case 120: case 121: case 122: case 119: case 120: case 121: case 122:
return $this->readName($position); return $this->readName($position, $line, $col, $prev);
// - // -
case 45: case 45:
// 0-9 // 0-9
case 48: case 49: case 50: case 51: case 52: case 48: case 49: case 50: case 51: case 52:
case 53: case 54: case 55: case 56: case 57: case 53: case 54: case 55: case 56: case 57:
return $this->readNumber($position, $code); return $this->readNumber($position, $code, $line, $col, $prev);
// " // "
case 34: return $this->readString($position); case 34: return $this->readString($position, $line, $col, $prev);
} }
throw new SyntaxError($this->source, $position, 'Unexpected character ' . Utils::printCharCode($code)); throw new SyntaxError($this->source, $position, 'Unexpected character ' . Utils::printCharCode($code));
@ -120,10 +177,14 @@ class Lexer
* Reads an alphanumeric + underscore name from the source. * Reads an alphanumeric + underscore name from the source.
* *
* [_A-Za-z][_0-9A-Za-z]* * [_A-Za-z][_0-9A-Za-z]*
*
* @param int $position * @param int $position
* @param int $line
* @param int $col
* @param Token $prev
* @return Token * @return Token
*/ */
private function readName($position) private function readName($position, $line, $col, Token $prev)
{ {
$body = $this->source->body; $body = $this->source->body;
$bodyLength = $this->source->length; $bodyLength = $this->source->length;
@ -141,7 +202,15 @@ class Lexer
) { ) {
++$end; ++$end;
} }
return new Token(Token::NAME, $position, $end, mb_substr($body, $position, $end - $position, 'UTF-8')); return new Token(
Token::NAME,
$position,
$end,
$line,
$col,
$prev,
mb_substr($body, $position, $end - $position, 'UTF-8')
);
} }
/** /**
@ -151,12 +220,15 @@ class Lexer
* Int: -?(0|[1-9][0-9]*) * Int: -?(0|[1-9][0-9]*)
* Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)? * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
* *
* @param $start * @param int $start
* @param $firstCode * @param string $firstCode
* @param int $line
* @param int $col
* @param Token $prev
* @return Token * @return Token
* @throws SyntaxError * @throws SyntaxError
*/ */
private function readNumber($start, $firstCode) private function readNumber($start, $firstCode, $line, $col, Token $prev)
{ {
$code = $firstCode; $code = $firstCode;
$body = $this->source->body; $body = $this->source->body;
@ -199,6 +271,9 @@ class Lexer
$isFloat ? Token::FLOAT : Token::INT, $isFloat ? Token::FLOAT : Token::INT,
$start, $start,
$position, $position,
$line,
$col,
$prev,
mb_substr($body, $start, $position - $start, 'UTF-8') mb_substr($body, $start, $position - $start, 'UTF-8')
); );
} }
@ -225,11 +300,14 @@ class Lexer
} }
/** /**
* @param $start * @param int $start
* @param int $line
* @param int $col
* @param Token $prev
* @return Token * @return Token
* @throws SyntaxError * @throws SyntaxError
*/ */
private function readString($start) private function readString($start, $line, $col, Token $prev)
{ {
$body = $this->source->body; $body = $this->source->body;
$bodyLength = $this->source->length; $bodyLength = $this->source->length;
@ -263,7 +341,7 @@ class Lexer
case 114: $value .= "\r"; break; case 114: $value .= "\r"; break;
case 116: $value .= "\t"; break; case 116: $value .= "\t"; break;
case 117: case 117:
$hex = mb_substr($body, $position + 1, 4); $hex = mb_substr($body, $position + 1, 4, 'UTF-8');
if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) { if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) {
throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\u' . $hex); throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\u' . $hex);
} }
@ -285,7 +363,7 @@ class Lexer
} }
$value .= mb_substr($body, $chunkStart, $position - $chunkStart, 'UTF-8'); $value .= mb_substr($body, $chunkStart, $position - $chunkStart, 'UTF-8');
return new Token(Token::STRING, $start, $position + 1, $value); return new Token(Token::STRING, $start, $position + 1, $line, $col, $prev, $value);
} }
private function assertValidStringCharacterCode($code, $position) private function assertValidStringCharacterCode($code, $position)
@ -305,43 +383,73 @@ class Lexer
* or commented character, then returns the position of that character for * or commented character, then returns the position of that character for
* lexing. * lexing.
* *
* @param $body
* @param $startPosition * @param $startPosition
* @return int * @return int
*/ */
private function positionAfterWhitespace($body, $startPosition) private function positionAfterWhitespace($startPosition)
{ {
$bodyLength = mb_strlen($body, 'UTF-8'); $body = $this->source->body;
$bodyLength = $this->source->length;
$position = $startPosition; $position = $startPosition;
while ($position < $bodyLength) { while ($position < $bodyLength) {
$code = Utils::charCodeAt($body, $position); $code = Utils::charCodeAt($body, $position);
// Skip whitespace // Skip whitespace
if ( // tab | space | comma | BOM
$code === 0xFEFF || // BOM if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) {
$code === 0x0009 || // tab $position++;
$code === 0x0020 || // space } else if ($code === 10) { // new line
$code === 0x000A || // new line $position++;
$code === 0x000D || // carriage return $this->line++;
$code === 0x002C $this->lineStart = $position;
) { } else if ($code === 13) { // carriage return
++$position; if (Utils::charCodeAt($body, $position + 1) === 10) {
// Skip comments $position += 2;
} else if ($code === 35) { // # } else {
++$position; $position ++;
while (
$position < $bodyLength &&
($code = Utils::charCodeAt($body, $position)) &&
// SourceCharacter but not LineTerminator
($code > 0x001F || $code === 0x0009) && $code !== 0x000A && $code !== 0x000D
) {
++$position;
} }
$this->line++;
$this->lineStart = $position;
} else { } else {
break; break;
} }
} }
return $position; return $position;
} }
/**
* Reads a comment token from the source file.
*
* #[\u0009\u0020-\uFFFF]*
*
* @param $start
* @param $line
* @param $col
* @param Token $prev
* @return Token
*/
private function readComment($start, $line, $col, Token $prev)
{
$body = $this->source->body;
$position = $start;
do {
$code = Utils::charCodeAt($body, ++$position);
} while (
$code !== null &&
// SourceCharacter but not LineTerminator
($code > 0x001F || $code === 0x0009)
);
return new Token(
Token::COMMENT,
$start,
$position,
$line,
$col,
$prev,
mb_substr($body, $start + 1, $position - $start + 1, 'UTF-8')
);
}
} }

View File

@ -1,33 +1,45 @@
<?php <?php
namespace GraphQL\Language; namespace GraphQL\Language;
// language/lexer.js /**
* Represents a range of characters represented by a lexical token
* within a Source.
*/
class Token class Token
{ {
const EOF = 1; // Each kind of token.
const BANG = 2; const SOF = '<SOF>';
const DOLLAR = 3; const EOF = '<EOF>';
const PAREN_L = 4; const BANG = '!';
const PAREN_R = 5; const DOLLAR = '$';
const SPREAD = 6; const PAREN_L = '(';
const COLON = 7; const PAREN_R = ')';
const EQUALS = 8; const SPREAD = '...';
const AT = 9; const COLON = ':';
const BRACKET_L = 10; const EQUALS = '=';
const BRACKET_R = 11; const AT = '@';
const BRACE_L = 12; const BRACKET_L = '[';
const PIPE = 13; const BRACKET_R = ']';
const BRACE_R = 14; const BRACE_L = '{';
const NAME = 15; const PIPE = '|';
const INT = 17; const BRACE_R = '}';
const FLOAT = 18; const NAME = 'Name';
const STRING = 19; const INT = 'Int';
const FLOAT = 'Float';
const STRING = 'String';
const COMMENT = 'Comment';
/**
* @param $kind
* @return mixed
*/
public static function getKindDescription($kind) public static function getKindDescription($kind)
{ {
$description = array(); trigger_error('Deprecated as of 16.10.2016 ($kind itself contains description string now)', E_USER_DEPRECATED);
$description[self::EOF] = 'EOF';
$description = [];
$description[self::SOF] = '<SOF>';
$description[self::EOF] = '<EOF>';
$description[self::BANG] = '!'; $description[self::BANG] = '!';
$description[self::DOLLAR] = '$'; $description[self::DOLLAR] = '$';
$description[self::PAREN_L] = '('; $description[self::PAREN_L] = '(';
@ -45,35 +57,84 @@ class Token
$description[self::INT] = 'Int'; $description[self::INT] = 'Int';
$description[self::FLOAT] = 'Float'; $description[self::FLOAT] = 'Float';
$description[self::STRING] = 'String'; $description[self::STRING] = 'String';
$description[self::COMMENT] = 'Comment';
return $description[$kind]; return $description[$kind];
} }
/** /**
* @var int * The kind of Token (see one of constants above).
*
* @var string
*/ */
public $kind; public $kind;
/** /**
* The character offset at which this Node begins.
*
* @var int * @var int
*/ */
public $start; public $start;
/** /**
* The character offset at which this Node ends.
*
* @var int * @var int
*/ */
public $end; public $end;
/**
* The 1-indexed line number on which this Token appears.
*
* @var int
*/
public $line;
/**
* The 1-indexed column number at which this Token begins.
*
* @var int
*/
public $column;
/** /**
* @var string|null * @var string|null
*/ */
public $value; public $value;
public function __construct($kind, $start, $end, $value = null) /**
* Tokens exist as nodes in a double-linked-list amongst all tokens
* including ignored tokens. <SOF> is always the first node and <EOF>
* the last.
*
* @var Token
*/
public $prev;
/**
* @var Token
*/
public $next;
/**
* Token constructor.
* @param $kind
* @param $start
* @param $end
* @param $line
* @param $column
* @param Token $previous
* @param null $value
*/
public function __construct($kind, $start, $end, $line, $column, Token $previous = null, $value = null)
{ {
$this->kind = $kind; $this->kind = $kind;
$this->start = (int) $start; $this->start = (int) $start;
$this->end = (int) $end; $this->end = (int) $end;
$this->line = (int) $line;
$this->column = (int) $column;
$this->prev = $previous;
$this->next = null;
$this->value = $value; $this->value = $value;
} }
@ -82,6 +143,19 @@ class Token
*/ */
public function getDescription() public function getDescription()
{ {
return self::getKindDescription($this->kind) . ($this->value ? ' "' . $this->value . '"' : ''); return $this->kind . ($this->value ? ' "' . $this->value . '"' : '');
}
/**
* @return array
*/
public function toArray()
{
return [
'kind' => $this->kind,
'value' => $this->value,
'line' => $this->line,
'column' => $this->column
];
} }
} }

View File

@ -16,7 +16,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
{ {
try { try {
$char = Utils::chr(0x0007); $char = Utils::chr(0x0007);
$this->lexErr($char); $this->lexOne($char);
$this->fail('Expected exception not thrown'); $this->fail('Expected exception not thrown');
} catch (SyntaxError $error) { } catch (SyntaxError $error) {
$msg = mb_substr($error->getMessage(),0, 53); $msg = mb_substr($error->getMessage(),0, 53);
@ -33,13 +33,36 @@ class LexerTest extends \PHPUnit_Framework_TestCase
public function testAcceptsBomHeader() public function testAcceptsBomHeader()
{ {
$bom = Utils::chr(0xFEFF); $bom = Utils::chr(0xFEFF);
$this->assertEquals(new Token(Token::NAME, 2, 5, 'foo'), $this->lexOne($bom . ' foo')); $expected = [
'kind' => Token::NAME,
'start' => 2,
'end' => 5,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne($bom . ' foo'));
} }
/** /**
* @it skips whitespace * @it records line and column
*/ */
public function testSkipsWhitespaces() public function testRecordsLineAndColumn()
{
$expected = [
'kind' => Token::NAME,
'start' => 8,
'end' => 11,
'line' => 4,
'column' => 3,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne("\n \r\n \r foo\n"));
}
/**
* @it skips whitespace and comments
*/
public function testSkipsWhitespacesAndComments()
{ {
$example1 = ' $example1 = '
@ -47,17 +70,36 @@ class LexerTest extends \PHPUnit_Framework_TestCase
'; ';
$this->assertEquals(new Token(Token::NAME, 6, 9, 'foo'), $this->lexOne($example1)); $expected = [
'kind' => Token::NAME,
'start' => 6,
'end' => 9,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne($example1));
$example2 = ' $example2 = '
#comment #comment
foo#comment foo#comment
'; ';
$this->assertEquals(new Token(Token::NAME, 18, 21, 'foo'), $this->lexOne($example2)); $expected = [
'kind' => Token::NAME,
'start' => 18,
'end' => 21,
'value' => 'foo'
];
$this->assertArraySubset($expected, (array) $this->lexOne($example2));
$expected = [
'kind' => Token::NAME,
'start' => 3,
'end' => 6,
'value' => 'foo'
];
$example3 = ',,,foo,,,'; $example3 = ',,,foo,,,';
$this->assertEquals(new Token(Token::NAME, 3, 6, 'foo'), $this->lexOne($example3)); $this->assertArraySubset($expected, (array) $this->lexOne($example3));
} }
/** /**
@ -72,7 +114,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
"; ";
try { try {
$this->lexErr($example); $this->lexOne($example);
$this->fail('Expected exception not thrown'); $this->fail('Expected exception not thrown');
} catch (SyntaxError $e) { } catch (SyntaxError $e) {
$this->assertEquals( $this->assertEquals(
@ -92,17 +134,63 @@ class LexerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testLexesStrings() public function testLexesStrings()
{ {
$this->assertEquals(new Token(Token::STRING, 0, 8, 'simple'), $this->lexOne('"simple"')); $this->assertArraySubset([
$this->assertEquals(new Token(Token::STRING, 0, 15, ' white space '), $this->lexOne('" white space "')); 'kind' => Token::STRING,
$this->assertEquals(new Token(Token::STRING, 0, 10, 'quote "'), $this->lexOne('"quote \\""')); 'start' => 0,
$this->assertEquals(new Token(Token::STRING, 0, 25, 'escaped \n\r\b\t\f'), $this->lexOne('"escaped \\\\n\\\\r\\\\b\\\\t\\\\f"')); 'end' => 8,
$this->assertEquals(new Token(Token::STRING, 0, 16, 'slashes \\ \/'), $this->lexOne('"slashes \\\\ \\\\/"')); 'value' => 'simple'
], (array) $this->lexOne('"simple"'));
$this->assertEquals(new Token(Token::STRING, 0, 13, 'unicode яуц'), $this->lexOne('"unicode яуц"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 15,
'value' => ' white space '
], (array) $this->lexOne('" white space "'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 10,
'value' => 'quote "'
], (array) $this->lexOne('"quote \\""'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 25,
'value' => 'escaped \n\r\b\t\f'
], (array) $this->lexOne('"escaped \\\\n\\\\r\\\\b\\\\t\\\\f"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 16,
'value' => 'slashes \\ \/'
], (array) $this->lexOne('"slashes \\\\ \\\\/"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 13,
'value' => 'unicode яуц'
], (array) $this->lexOne('"unicode яуц"'));
$unicode = json_decode('"\u1234\u5678\u90AB\uCDEF"'); $unicode = json_decode('"\u1234\u5678\u90AB\uCDEF"');
$this->assertEquals(new Token(Token::STRING, 0, 34, 'unicode ' . $unicode), $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"')); $this->assertArraySubset([
$this->assertEquals(new Token(Token::STRING, 0, 26, $unicode), $this->lexOne('"\u1234\u5678\u90AB\uCDEF"')); 'kind' => Token::STRING,
'start' => 0,
'end' => 34,
'value' => 'unicode ' . $unicode
], (array) $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"'));
$this->assertArraySubset([
'kind' => Token::STRING,
'start' => 0,
'end' => 26,
'value' => $unicode
], (array) $this->lexOne('"\u1234\u5678\u90AB\uCDEF"'));
} }
/** /**
@ -112,7 +200,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase
{ {
$run = function($num, $str, $expectedMessage) { $run = function($num, $str, $expectedMessage) {
try { try {
$this->lexErr($str); $this->lexOne($str);
$this->fail('Expected exception not thrown in example: ' . $num); $this->fail('Expected exception not thrown in example: ' . $num);
} catch (SyntaxError $e) { } catch (SyntaxError $e) {
$this->assertEquals($expectedMessage, $e->getMessage(), "Test case $num failed"); $this->assertEquals($expectedMessage, $e->getMessage(), "Test case $num failed");
@ -139,69 +227,69 @@ class LexerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testLexesNumbers() public function testLexesNumbers()
{ {
$this->assertEquals( $this->assertArraySubset(
new Token(Token::INT, 0, 1, '4'), ['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '4'],
$this->lexOne('4') (array) $this->lexOne('4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 5, '4.123'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '4.123'],
$this->lexOne('4.123') (array) $this->lexOne('4.123')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::INT, 0, 2, '-4'), ['kind' => Token::INT, 'start' => 0, 'end' => 2, 'value' => '-4'],
$this->lexOne('-4') (array) $this->lexOne('-4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::INT, 0, 1, '9'), ['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '9'],
$this->lexOne('9') (array) $this->lexOne('9')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::INT, 0, 1, '0'), ['kind' => Token::INT, 'start' => 0, 'end' => 1, 'value' => '0'],
$this->lexOne('0') (array) $this->lexOne('0')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 6, '-4.123'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '-4.123'],
$this->lexOne('-4.123') (array) $this->lexOne('-4.123')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 5, '0.123'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '0.123'],
$this->lexOne('0.123') (array) $this->lexOne('0.123')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 5, '123e4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '123e4'],
$this->lexOne('123e4') (array) $this->lexOne('123e4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 5, '123E4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 5, 'value' => '123E4'],
$this->lexOne('123E4') (array) $this->lexOne('123E4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 6, '123e-4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '123e-4'],
$this->lexOne('123e-4') (array) $this->lexOne('123e-4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 6, '123e+4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 6, 'value' => '123e+4'],
$this->lexOne('123e+4') (array) $this->lexOne('123e+4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 8, '-1.123e4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 8, 'value' => '-1.123e4'],
$this->lexOne('-1.123e4') (array) $this->lexOne('-1.123e4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 8, '-1.123E4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 8, 'value' => '-1.123E4'],
$this->lexOne('-1.123E4') (array) $this->lexOne('-1.123E4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 9, '-1.123e-4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 9, 'value' => '-1.123e-4'],
$this->lexOne('-1.123e-4') (array) $this->lexOne('-1.123e-4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 9, '-1.123e+4'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 9, 'value' => '-1.123e+4'],
$this->lexOne('-1.123e+4') (array) $this->lexOne('-1.123e+4')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::FLOAT, 0, 11, '-1.123e4567'), ['kind' => Token::FLOAT, 'start' => 0, 'end' => 11, 'value' => '-1.123e4567'],
$this->lexOne('-1.123e4567') (array) $this->lexOne('-1.123e4567')
); );
} }
@ -234,57 +322,57 @@ class LexerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testLexesPunctuation() public function testLexesPunctuation()
{ {
$this->assertEquals( $this->assertArraySubset(
new Token(Token::BANG, 0, 1, null), ['kind' => Token::BANG, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('!') (array) $this->lexOne('!')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::DOLLAR, 0, 1, null), ['kind' => Token::DOLLAR, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('$') (array) $this->lexOne('$')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::PAREN_L, 0, 1, null), ['kind' => Token::PAREN_L, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('(') (array) $this->lexOne('(')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::PAREN_R, 0, 1, null), ['kind' => Token::PAREN_R, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne(')') (array) $this->lexOne(')')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::SPREAD, 0, 3, null), ['kind' => Token::SPREAD, 'start' => 0, 'end' => 3, 'value' => null],
$this->lexOne('...') (array) $this->lexOne('...')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::COLON, 0, 1, null), ['kind' => Token::COLON, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne(':') (array) $this->lexOne(':')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::EQUALS, 0, 1, null), ['kind' => Token::EQUALS, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('=') (array) $this->lexOne('=')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::AT, 0, 1, null), ['kind' => Token::AT, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('@') (array) $this->lexOne('@')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::BRACKET_L, 0, 1, null), ['kind' => Token::BRACKET_L, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('[') (array) $this->lexOne('[')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::BRACKET_R, 0, 1, null), ['kind' => Token::BRACKET_R, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne(']') (array) $this->lexOne(']')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::BRACE_L, 0, 1, null), ['kind' => Token::BRACE_L, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('{') (array) $this->lexOne('{')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::PIPE, 0, 1, null), ['kind' => Token::PIPE, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('|') (array) $this->lexOne('|')
); );
$this->assertEquals( $this->assertArraySubset(
new Token(Token::BRACE_R, 0, 1, null), ['kind' => Token::BRACE_R, 'start' => 0, 'end' => 1, 'value' => null],
$this->lexOne('}') (array) $this->lexOne('}')
); );
} }
@ -318,16 +406,58 @@ class LexerTest extends \PHPUnit_Framework_TestCase
{ {
$q = 'a-b'; $q = 'a-b';
$lexer = new Lexer(new Source($q)); $lexer = new Lexer(new Source($q));
$this->assertEquals(new Token(Token::NAME, 0, 1, 'a'), $lexer->nextToken()); $this->assertArraySubset(['kind' => Token::NAME, 'start' => 0, 'end' => 1, 'value' => 'a'], (array) $lexer->advance());
try { try {
$lexer->nextToken(); $lexer->advance();
$this->fail('Expected exception not thrown'); $this->fail('Expected exception not thrown');
} catch (SyntaxError $err) { } catch (SyntaxError $err) {
$this->assertEquals('Syntax Error GraphQL (1:3) Invalid number, expected digit but got: "b"'."\n\n1: a-b\n ^\n", $err->getMessage()); $this->assertEquals('Syntax Error GraphQL (1:3) Invalid number, expected digit but got: "b"'."\n\n1: a-b\n ^\n", $err->getMessage());
} }
} }
/**
* @it produces double linked list of tokens, including comments
*/
public function testDoubleLinkedList()
{
$lexer = new Lexer(new Source('{
#comment
field
}'));
$startToken = $lexer->token;
do {
$endToken = $lexer->advance();
// Lexer advances over ignored comment tokens to make writing parsers
// easier, but will include them in the linked list result.
$this->assertNotEquals('Comment', $endToken->kind);
} while ($endToken->kind !== '<EOF>');
$this->assertEquals(null, $startToken->prev);
$this->assertEquals(null, $endToken->next);
$tokens = [];
for ($tok = $startToken; $tok; $tok = $tok->next) {
if (!empty($tokens)) {
// Tokens are double-linked, prev should point to last seen token.
$this->assertSame($tokens[count($tokens) - 1], $tok->prev);
}
$tokens[] = $tok;
}
$this->assertEquals([
'<SOF>',
'{',
'Comment',
'Name',
'}',
'<EOF>'
], Utils::map($tokens, function ($tok) {
return $tok->kind;
}));
}
/** /**
* @param string $body * @param string $body
* @return Token * @return Token
@ -335,16 +465,6 @@ class LexerTest extends \PHPUnit_Framework_TestCase
private function lexOne($body) private function lexOne($body)
{ {
$lexer = new Lexer(new Source($body)); $lexer = new Lexer(new Source($body));
return $lexer->nextToken(); return $lexer->advance();
}
/**
* @param $body
* @return Token
*/
private function lexErr($body)
{
$lexer = new Lexer(new Source($body));
return $lexer->nextToken();
} }
} }