prevPosition = 0; $this->source = $source; } /** * @param int|null $resetPosition * @return Token */ public function nextToken($resetPosition = null) { $token = $this->readToken($resetPosition === null ? $this->prevPosition : $resetPosition); $this->prevPosition = $token->end; return $token; } /** * @param int $fromPosition * @return Token * @throws SyntaxError */ private function readToken($fromPosition) { $body = $this->source->body; $bodyLength = $this->source->length; $position = $this->positionAfterWhitespace($body, $fromPosition); if ($position >= $bodyLength) { return new Token(Token::EOF, $position, $position); } $code = Utils::charCodeAt($body, $position); // SourceCharacter if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) { throw new SyntaxError($this->source, $position, 'Invalid character ' . Utils::printCharCode($code)); } switch ($code) { // ! case 33: return new Token(Token::BANG, $position, $position + 1); // $ case 36: return new Token(Token::DOLLAR, $position, $position + 1); // ( case 40: return new Token(Token::PAREN_L, $position, $position + 1); // ) case 41: return new Token(Token::PAREN_R, $position, $position + 1); // . case 46: if (Utils::charCodeAt($body, $position+1) === 46 && Utils::charCodeAt($body, $position+2) === 46) { return new Token(Token::SPREAD, $position, $position + 3); } break; // : case 58: return new Token(Token::COLON, $position, $position + 1); // = case 61: return new Token(Token::EQUALS, $position, $position + 1); // @ case 64: return new Token(Token::AT, $position, $position + 1); // [ case 91: return new Token(Token::BRACKET_L, $position, $position + 1); // ] case 93: return new Token(Token::BRACKET_R, $position, $position + 1); // { case 123: return new Token(Token::BRACE_L, $position, $position + 1); // | case 124: return new Token(Token::PIPE, $position, $position + 1); // } case 125: return new Token(Token::BRACE_R, $position, $position + 1); // A-Z case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80: case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: case 89: case 90: // _ case 95: // a-z case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: case 105: case 106: case 107: case 108: case 109: case 110: case 111: case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 119: case 120: case 121: case 122: return $this->readName($position); // - case 45: // 0-9 case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: return $this->readNumber($position, $code); // " case 34: return $this->readString($position); } throw new SyntaxError($this->source, $position, 'Unexpected character ' . Utils::printCharCode($code)); } /** * Reads an alphanumeric + underscore name from the source. * * [_A-Za-z][_0-9A-Za-z]* * @param int $position * @return Token */ private function readName($position) { $body = $this->source->body; $bodyLength = $this->source->length; $end = $position + 1; while ( $end !== $bodyLength && ($code = Utils::charCodeAt($body, $end)) && ( $code === 95 || // _ $code >= 48 && $code <= 57 || // 0-9 $code >= 65 && $code <= 90 || // A-Z $code >= 97 && $code <= 122 // a-z ) ) { ++$end; } return new Token(Token::NAME, $position, $end, mb_substr($body, $position, $end - $position, 'UTF-8')); } /** * Reads a number token from the source file, either a float * or an int depending on whether a decimal point appears. * * Int: -?(0|[1-9][0-9]*) * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)? * * @param $start * @param $firstCode * @return Token * @throws SyntaxError */ private function readNumber($start, $firstCode) { $code = $firstCode; $body = $this->source->body; $position = $start; $isFloat = false; if ($code === 45) { // - $code = Utils::charCodeAt($body, ++$position); } if ($code === 48) { // 0 $code = Utils::charCodeAt($body, ++$position); if ($code >= 48 && $code <= 57) { throw new SyntaxError($this->source, $position, "Invalid number, unexpected digit after 0: " . Utils::printCharCode($code)); } } else { $position = $this->readDigits($position, $code); $code = Utils::charCodeAt($body, $position); } if ($code === 46) { // . $isFloat = true; $code = Utils::charCodeAt($body, ++$position); $position = $this->readDigits($position, $code); $code = Utils::charCodeAt($body, $position); } if ($code === 69 || $code === 101) { // E e $isFloat = true; $code = Utils::charCodeAt($body, ++$position); if ($code === 43 || $code === 45) { // + - $code = Utils::charCodeAt($body, ++$position); } $position = $this->readDigits($position, $code); } return new Token( $isFloat ? Token::FLOAT : Token::INT, $start, $position, mb_substr($body, $start, $position - $start, 'UTF-8') ); } /** * Returns the new position in the source after reading digits. */ private function readDigits($start, $firstCode) { $body = $this->source->body; $position = $start; $code = $firstCode; if ($code >= 48 && $code <= 57) { // 0 - 9 do { $code = Utils::charCodeAt($body, ++$position); } while ($code >= 48 && $code <= 57); // 0 - 9 return $position; } if ($position > $this->source->length - 1) { $code = null; } throw new SyntaxError($this->source, $position, "Invalid number, expected digit but got: " . Utils::printCharCode($code)); } /** * @param $start * @return Token * @throws SyntaxError */ private function readString($start) { $body = $this->source->body; $bodyLength = $this->source->length; $position = $start + 1; $chunkStart = $position; $code = null; $value = ''; while ( $position < $bodyLength && ($code = Utils::charCodeAt($body, $position)) && // not LineTerminator $code !== 0x000A && $code !== 0x000D && // not Quote (") $code !== 34 ) { $this->assertValidStringCharacterCode($code, $position); ++$position; if ($code === 92) { // \ $value .= mb_substr($body, $chunkStart, $position - 1 - $chunkStart, 'UTF-8'); $code = Utils::charCodeAt($body, $position); switch ($code) { case 34: $value .= '"'; break; case 47: $value .= '/'; break; case 92: $value .= '\\'; break; case 98: $value .= chr(8); break; // \b (backspace) case 102: $value .= "\f"; break; case 110: $value .= "\n"; break; case 114: $value .= "\r"; break; case 116: $value .= "\t"; break; case 117: $hex = mb_substr($body, $position + 1, 4); if (!preg_match('/[0-9a-fA-F]{4}/', $hex)) { throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\u' . $hex); } $code = hexdec($hex); $this->assertValidStringCharacterCode($code, $position - 1); $value .= Utils::chr($code); $position += 4; break; default: throw new SyntaxError($this->source, $position, 'Invalid character escape sequence: \\' . Utils::chr($code)); } ++$position; $chunkStart = $position; } } if ($code !== 34) { throw new SyntaxError($this->source, $position, 'Unterminated string'); } $value .= mb_substr($body, $chunkStart, $position - $chunkStart, 'UTF-8'); return new Token(Token::STRING, $start, $position + 1, $value); } private function assertValidStringCharacterCode($code, $position) { // SourceCharacter if ($code < 0x0020 && $code !== 0x0009) { throw new SyntaxError( $this->source, $position, "Invalid character within String: " . Utils::printCharCode($code) ); } } /** * Reads from body starting at startPosition until it finds a non-whitespace * or commented character, then returns the position of that character for * lexing. * * @param $body * @param $startPosition * @return int */ private function positionAfterWhitespace($body, $startPosition) { $bodyLength = mb_strlen($body, 'UTF-8'); $position = $startPosition; while ($position < $bodyLength) { $code = Utils::charCodeAt($body, $position); // Skip whitespace if ( $code === 0xFEFF || // BOM $code === 0x0009 || // tab $code === 0x0020 || // space $code === 0x000A || // new line $code === 0x000D || // carriage return $code === 0x002C ) { ++$position; // Skip comments } else if ($code === 35) { // # ++$position; while ( $position < $bodyLength && ($code = Utils::charCodeAt($body, $position)) && // SourceCharacter but not LineTerminator ($code > 0x001F || $code === 0x0009) && $code !== 0x000A && $code !== 0x000D ) { ++$position; } } else { break; } } return $position; } }