diff --git a/src/Language/Lexer.php b/src/Language/Lexer.php index a8b1e91..04485dd 100644 --- a/src/Language/Lexer.php +++ b/src/Language/Lexer.php @@ -46,12 +46,18 @@ class Lexer $bodyLength = $this->source->length; $position = $this->positionAfterWhitespace($body, $fromPosition); - $code = Utils::charCodeAt($body, $position); if ($position >= $bodyLength) { return new Token(Token::EOF, $position, $position); } + $code = Utils::charCodeAt($body, $position); + + // SourceCharacter + if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) { + throw new SyntaxError($this->source, $position, 'Invalid character ' . Utils::printCharCode($code)); + } + switch ($code) { // ! case 33: return new Token(Token::BANG, $position, $position + 1); @@ -99,7 +105,7 @@ class Lexer return $this->readName($position); // - case 45: - // 0-9 + // 0-9 case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: return $this->readNumber($position, $code); @@ -107,7 +113,7 @@ class Lexer case 34: return $this->readString($position); } - throw new SyntaxError($this->source, $position, 'Unexpected character "' . Utils::chr($code). '"'); + throw new SyntaxError($this->source, $position, 'Unexpected character ' . Utils::printCharCode($code)); } /** @@ -163,25 +169,21 @@ class Lexer if ($code === 48) { // 0 $code = Utils::charCodeAt($body, ++$position); - } else if ($code >= 49 && $code <= 57) { // 1 - 9 - do { - $code = Utils::charCodeAt($body, ++$position); - } while ($code >= 48 && $code <= 57); // 0 - 9 + + if ($code >= 48 && $code <= 57) { + throw new SyntaxError($this->source, $position, "Invalid number, unexpected digit after 0: " . Utils::printCharCode($code)); + } } else { - throw new SyntaxError($this->source, $position, 'Invalid number'); + $position = $this->readDigits($position, $code); + $code = Utils::charCodeAt($body, $position); } if ($code === 46) { // . $isFloat = true; $code = Utils::charCodeAt($body, ++$position); - if ($code >= 48 && $code <= 57) { // 0 - 9 - do { - $code = Utils::charCodeAt($body, ++$position); - } while ($code >= 48 && $code <= 57); // 0 - 9 - } else { - throw new SyntaxError($this->source, $position, 'Invalid number'); - } + $position = $this->readDigits($position, $code); + $code = Utils::charCodeAt($body, $position); } if ($code === 69 || $code === 101) { // E e @@ -191,13 +193,7 @@ class Lexer if ($code === 43 || $code === 45) { // + - $code = Utils::charCodeAt($body, ++$position); } - if ($code >= 48 && $code <= 57) { // 0 - 9 - do { - $code = Utils::charCodeAt($body, ++$position); - } while ($code >= 48 && $code <= 57); // 0 - 9 - } else { - throw new SyntaxError($this->source, $position, 'Invalid number'); - } + $position = $this->readDigits($position, $code); } return new Token( $isFloat ? Token::FLOAT : Token::INT, @@ -207,6 +203,32 @@ class Lexer ); } + /** + * Returns the new position in the source after reading digits. + */ + private function readDigits($start, $firstCode) + { + $body = $this->source->body; + $position = $start; + $code = $firstCode; + if ($code >= 48 && $code <= 57) { // 0 - 9 + do { + $code = Utils::charCodeAt($body, ++$position); + } while ($code >= 48 && $code <= 57); // 0 - 9 + + return $position; + } + if ($position > $this->source->length - 1) { + $code = null; + } + throw new SyntaxError($this->source, $position, "Invalid number, expected digit but got: " . Utils::printCharCode($code)); + } + + /** + * @param $start + * @return Token + * @throws SyntaxError + */ private function readString($start) { $body = $this->source->body; @@ -220,9 +242,13 @@ class Lexer while ( $position < $bodyLength && ($code = Utils::charCodeAt($body, $position)) && - $code !== 34 && - $code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 + // not LineTerminator + $code !== 0x000A && $code !== 0x000D && + // not Quote (") + $code !== 34 ) { + $this->assertValidStringCharacterCode($code, $position); + ++$position; if ($code === 92) { // \ $value .= mb_substr($body, $chunkStart, $position - 1 - $chunkStart, 'UTF-8'); @@ -239,13 +265,15 @@ class Lexer case 117: $hex = mb_substr($body, $position + 1, 4); if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) { - throw new SyntaxError($this->source, $position, 'Bad character escape sequence'); + throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\u' . $hex); } - $value .= Utils::chr(hexdec($hex)); + $code = hexdec($hex); + $this->assertValidStringCharacterCode($code, $position - 1); + $value .= Utils::chr($code); $position += 4; break; default: - throw new SyntaxError($this->source, $position, 'Bad character escape sequence'); + throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\' . Utils::chr($code)); } ++$position; $chunkStart = $position; @@ -260,6 +288,18 @@ class Lexer return new Token(Token::STRING, $start, $position + 1, $value); } + private function assertValidStringCharacterCode($code, $position) + { + // SourceCharacter + if ($code < 0x0020 && $code !== 0x0009) { + throw new SyntaxError( + $this->source, + $position, + "Invalid character within String: " . Utils::printCharCode($code) + ); + } + } + /** * Reads from body starting at startPosition until it finds a non-whitespace * or commented character, then returns the position of that character for @@ -279,12 +319,12 @@ class Lexer // Skip whitespace if ( - $code === 32 || // space - $code === 44 || // comma - $code === 160 || // '\xa0' - $code === 0x2028 || // line separator - $code === 0x2029 || // paragraph separator - $code > 8 && $code < 14 // whitespace + $code === 0xFEFF || // BOM + $code === 0x0009 || // tab + $code === 0x0020 || // space + $code === 0x000A || // new line + $code === 0x000D || // carriage return + $code === 0x002C ) { ++$position; // Skip comments @@ -293,7 +333,8 @@ class Lexer while ( $position < $bodyLength && ($code = Utils::charCodeAt($body, $position)) && - $code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 + // SourceCharacter but not LineTerminator + ($code > 0x001F || $code === 0x0009) && $code !== 0x000A && $code !== 0x000D ) { ++$position; } diff --git a/src/Utils.php b/src/Utils.php index 4dee896..d1d99a8 100644 --- a/src/Utils.php +++ b/src/Utils.php @@ -247,4 +247,20 @@ class Utils $char = mb_substr($string, $position, 1, 'UTF-8'); return self::ord($char); } + + /** + * @param $code + * @return string + */ + public static function printCharCode($code) + { + if (null === $code) { + return ''; + } + return $code < 0x007F + // Trust JSON for ASCII. + ? json_encode(Utils::chr($code)) + // Otherwise print the escaped form. + : '"\\u' . dechex($code) . '"'; + } } diff --git a/tests/Language/LexerTest.php b/tests/Language/LexerTest.php index 65ae845..74f917f 100644 --- a/tests/Language/LexerTest.php +++ b/tests/Language/LexerTest.php @@ -5,9 +5,40 @@ use GraphQL\Language\Lexer; use GraphQL\Language\Source; use GraphQL\Language\Token; use GraphQL\SyntaxError; +use GraphQL\Utils; class LexerTest extends \PHPUnit_Framework_TestCase { + /** + * @it disallows uncommon control characters + */ + public function testDissallowsUncommonControlCharacters() + { + try { + $char = Utils::chr(0x0007); + $this->lexErr($char); + $this->fail('Expected exception not thrown'); + } catch (SyntaxError $error) { + $msg = mb_substr($error->getMessage(),0, 53); + $this->assertEquals( + 'Syntax Error GraphQL (1:1) Invalid character "\u0007"', + $msg + ); + } + } + + /** + * @it accepts BOM header + */ + public function testAcceptsBomHeader() + { + $bom = Utils::chr(0xFEFF); + $this->assertEquals(new Token(Token::NAME, 2, 5, 'foo'), $this->lexOne($bom . ' foo')); + } + + /** + * @it skips whitespace + */ public function testSkipsWhitespaces() { $example1 = ' @@ -29,6 +60,9 @@ class LexerTest extends \PHPUnit_Framework_TestCase $this->assertEquals(new Token(Token::NAME, 3, 6, 'foo'), $this->lexOne($example3)); } + /** + * @it errors respect whitespace + */ public function testErrorsRespectWhitespace() { $example = " @@ -38,7 +72,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase "; try { - $this->lexOne($example); + $this->lexErr($example); $this->fail('Expected exception not thrown'); } catch (SyntaxError $e) { $this->assertEquals( @@ -53,6 +87,9 @@ class LexerTest extends \PHPUnit_Framework_TestCase } } + /** + * @it lexes strings + */ public function testLexesStrings() { $this->assertEquals(new Token(Token::STRING, 0, 8, 'simple'), $this->lexOne('"simple"')); @@ -65,58 +102,43 @@ class LexerTest extends \PHPUnit_Framework_TestCase $unicode = json_decode('"\u1234\u5678\u90AB\uCDEF"'); $this->assertEquals(new Token(Token::STRING, 0, 34, 'unicode ' . $unicode), $this->lexOne('"unicode \u1234\u5678\u90AB\uCDEF"')); + $this->assertEquals(new Token(Token::STRING, 0, 26, $unicode), $this->lexOne('"\u1234\u5678\u90AB\uCDEF"')); } + /** + * @it lex reports useful string errors + */ public function testReportsUsefulErrors() { $run = function($num, $str, $expectedMessage) { try { - $this->lexOne($str); + $this->lexErr($str); $this->fail('Expected exception not thrown in example: ' . $num); } catch (SyntaxError $e) { $this->assertEquals($expectedMessage, $e->getMessage(), "Test case $num failed"); } }; - $run(1, '"no end quote', "Syntax Error GraphQL (1:14) Unterminated string\n\n1: \"no end quote\n ^\n"); - $run(2, '"multi'."\n".'line"', "Syntax Error GraphQL (1:7) Unterminated string\n\n1: \"multi\n ^\n2: line\"\n"); - $run(3, '"multi'."\r".'line"', "Syntax Error GraphQL (1:7) Unterminated string\n\n1: \"multi\n ^\n2: line\"\n"); - $run(4, '"multi' . json_decode('"\u2028"') . 'line"', "Syntax Error GraphQL (1:7) Unterminated string\n\n1: \"multi\n ^\n2: line\"\n"); - $run(5, '"multi' . json_decode('"\u2029"') . 'line"', "Syntax Error GraphQL (1:7) Unterminated string\n\n1: \"multi\n ^\n2: line\"\n"); - $run(6, '"bad \\z esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\z esc\"\n ^\n"); - $run(7, '"bad \\x esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\x esc\"\n ^\n"); - $run(8, '"bad \\u1 esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\u1 esc\"\n ^\n"); - $run(9, '"bad \\u0XX1 esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\u0XX1 esc\"\n ^\n"); - $run(10, '"bad \\uXXXX esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\uXXXX esc\"\n ^\n"); - $run(11, '"bad \\uFXXX esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\uFXXX esc\"\n ^\n"); - $run(12, '"bad \\uXXXF esc"', "Syntax Error GraphQL (1:7) Bad character escape sequence\n\n1: \"bad \\uXXXF esc\"\n ^\n"); + $run(1, '"', "Syntax Error GraphQL (1:2) Unterminated string\n\n1: \"\n ^\n"); + $run(2, '"no end quote', "Syntax Error GraphQL (1:14) Unterminated string\n\n1: \"no end quote\n ^\n"); + $run(3, '"contains unescaped \u0007 control char"', "Syntax Error GraphQL (1:21) Invalid character within String: \"\\u0007\"\n\n1: \"contains unescaped \\u0007 control char\"\n ^\n"); + $run(4, '"null-byte is not \u0000 end of file"', 'Syntax Error GraphQL (1:19) Invalid character within String: "\\u0000"'."\n\n1: \"null-byte is not \\u0000 end of file\"\n ^\n"); + $run(5, '"multi'."\n".'line"', "Syntax Error GraphQL (1:7) Unterminated string\n\n1: \"multi\n ^\n2: line\"\n"); + $run(6, '"multi'."\r".'line"', "Syntax Error GraphQL (1:7) Unterminated string\n\n1: \"multi\n ^\n2: line\"\n"); + $run(7, '"bad \\z esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\z\n\n1: \"bad \\z esc\"\n ^\n"); + $run(8, '"bad \\x esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\x\n\n1: \"bad \\x esc\"\n ^\n"); + $run(9, '"bad \\u1 esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\u1 es\n\n1: \"bad \\u1 esc\"\n ^\n"); + $run(10, '"bad \\u0XX1 esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\u0XX1\n\n1: \"bad \\u0XX1 esc\"\n ^\n"); + $run(11, '"bad \\uXXXX esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uXXXX\n\n1: \"bad \\uXXXX esc\"\n ^\n"); + $run(12, '"bad \\uFXXX esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uFXXX\n\n1: \"bad \\uFXXX esc\"\n ^\n"); + $run(13, '"bad \\uXXXF esc"', "Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uXXXF\n\n1: \"bad \\uXXXF esc\"\n ^\n"); } + /** + * @it lexes numbers + */ public function testLexesNumbers() { - // lexes numbers -/* - $this->assertEquals( - new Token(Token::STRING, 0, 8, 'simple'), - $this->lexOne('"simple"') - ); - $this->assertEquals( - new Token(Token::STRING, 0, 15, ' white space '), - $this->lexOne('" white space "') - ); - $this->assertEquals( - new Token(Token::STRING, 0, 20, 'escaped \n\r\b\t\f'), - $this->lexOne('"escaped \\n\\r\\b\\t\\f"') - ); - $this->assertEquals( - new Token(Token::STRING, 0, 15, 'slashes \\ \/'), - $this->lexOne('"slashes \\\\ \\/"') - ); - $this->assertEquals( - new Token(Token::STRING, 0, 34, 'unicode ' . json_decode('"\u1234\u5678\u90AB\uCDEF"')), - $this->lexOne('"unicode \\u1234\\u5678\\u90AB\\uCDEF"') - );*/ - $this->assertEquals( new Token(Token::INT, 0, 1, '4'), $this->lexOne('4') @@ -137,10 +159,6 @@ class LexerTest extends \PHPUnit_Framework_TestCase new Token(Token::INT, 0, 1, '0'), $this->lexOne('0') ); - $this->assertEquals( - new Token(Token::INT, 0, 1, '0'), - $this->lexOne('00') - ); $this->assertEquals( new Token(Token::FLOAT, 0, 6, '-4.123'), $this->lexOne('-4.123') @@ -187,6 +205,9 @@ class LexerTest extends \PHPUnit_Framework_TestCase ); } + /** + * @it lex reports useful number errors + */ public function testReportsUsefulNumberErrors() { $run = function($num, $str, $expectedMessage) { @@ -198,15 +219,19 @@ class LexerTest extends \PHPUnit_Framework_TestCase } }; + $run(0, '00', "Syntax Error GraphQL (1:2) Invalid number, unexpected digit after 0: \"0\"\n\n1: 00\n ^\n"); $run(1, '+1', "Syntax Error GraphQL (1:1) Unexpected character \"+\"\n\n1: +1\n ^\n"); - $run(2, '1.', "Syntax Error GraphQL (1:3) Invalid number\n\n1: 1.\n ^\n"); + $run(2, '1.', "Syntax Error GraphQL (1:3) Invalid number, expected digit but got: \n\n1: 1.\n ^\n"); $run(3, '.123', "Syntax Error GraphQL (1:1) Unexpected character \".\"\n\n1: .123\n ^\n"); - $run(4, '1.A', "Syntax Error GraphQL (1:3) Invalid number\n\n1: 1.A\n ^\n"); - $run(5, '-A', "Syntax Error GraphQL (1:2) Invalid number\n\n1: -A\n ^\n"); - $run(6, '1.0e', "Syntax Error GraphQL (1:5) Invalid number\n\n1: 1.0e\n ^\n"); - $run(7, '1.0eA', "Syntax Error GraphQL (1:5) Invalid number\n\n1: 1.0eA\n ^\n"); + $run(4, '1.A', "Syntax Error GraphQL (1:3) Invalid number, expected digit but got: \"A\"\n\n1: 1.A\n ^\n"); + $run(5, '-A', "Syntax Error GraphQL (1:2) Invalid number, expected digit but got: \"A\"\n\n1: -A\n ^\n"); + $run(6, '1.0e', "Syntax Error GraphQL (1:5) Invalid number, expected digit but got: \n\n1: 1.0e\n ^\n"); + $run(7, '1.0eA', "Syntax Error GraphQL (1:5) Invalid number, expected digit but got: \"A\"\n\n1: 1.0eA\n ^\n"); } + /** + * @it lexes punctuation + */ public function testLexesPunctuation() { $this->assertEquals( @@ -253,16 +278,19 @@ class LexerTest extends \PHPUnit_Framework_TestCase new Token(Token::BRACE_L, 0, 1, null), $this->lexOne('{') ); - $this->assertEquals( - new Token(Token::BRACE_R, 0, 1, null), - $this->lexOne('}') - ); $this->assertEquals( new Token(Token::PIPE, 0, 1, null), $this->lexOne('|') ); + $this->assertEquals( + new Token(Token::BRACE_R, 0, 1, null), + $this->lexOne('}') + ); } + /** + * @it lex reports useful unknown character error + */ public function testReportsUsefulUnknownCharErrors() { $run = function($num, $str, $expectedMessage) { @@ -277,7 +305,27 @@ class LexerTest extends \PHPUnit_Framework_TestCase $run(2, '?', "Syntax Error GraphQL (1:1) Unexpected character \"?\"\n\n1: ?\n ^\n"); $unicode = json_decode('"\u203B"'); - $run(3, $unicode, "Syntax Error GraphQL (1:1) Unexpected character \"$unicode\"\n\n1: $unicode\n ^\n"); + $run(3, $unicode, "Syntax Error GraphQL (1:1) Unexpected character \"\\u203b\"\n\n1: $unicode\n ^\n"); + + $unicode = json_decode('"\u200b"'); + $run(4, $unicode, "Syntax Error GraphQL (1:1) Unexpected character \"\\u200b\"\n\n1: $unicode\n ^\n"); + } + + /** + * @it lex reports useful information for dashes in names + */ + public function testReportsUsefulDashesInfo() + { + $q = 'a-b'; + $lexer = new Lexer(new Source($q)); + $this->assertEquals(new Token(Token::NAME, 0, 1, 'a'), $lexer->nextToken()); + + try { + $lexer->nextToken(); + $this->fail('Expected exception not thrown'); + } catch (SyntaxError $err) { + $this->assertEquals('Syntax Error GraphQL (1:3) Invalid number, expected digit but got: "b"'."\n\n1: a-b\n ^\n", $err->getMessage()); + } } /** @@ -289,4 +337,14 @@ class LexerTest extends \PHPUnit_Framework_TestCase $lexer = new Lexer(new Source($body)); return $lexer->nextToken(); } + + /** + * @param $body + * @return Token + */ + private function lexErr($body) + { + $lexer = new Lexer(new Source($body)); + return $lexer->nextToken(); + } }