gccrs: Add Lexer for Rust front-end

The lexer is referred to as a ManagedTokenSource within the parser. This
lexer does not currently support Unicode, but serves as a starting point
to do so.

	gcc/rust/
	* lex/rust-codepoint.h: New.
	* lex/rust-lex.cc: New.
	* lex/rust-lex.h: New.
	* lex/rust-token.cc: New.
	* lex/rust-token.h: New.
	* rust-buffered-queue.h: New.

Co-authored-by: Philip Herron <philip.herron@embecosm.com>
Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
Co-authored-by: Mark Wielaard <mark@klomp.org>

Signed-off-by: Joel Phillips <simplytheother@gmail.com>
This commit is contained in:
Joel Phillips
2022-08-23 16:11:00 +01:00
committed by Arthur Cohen
parent 5b981e9c74
commit 18f6990f84
6 changed files with 3831 additions and 0 deletions

View File

@@ -0,0 +1,46 @@
// Copyright (C) 2020-2022 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#ifndef RUST_CODEPOINT_H
#define RUST_CODEPOINT_H
#include "rust-system.h"
namespace Rust {
struct Codepoint
{
uint32_t value;
// Creates a zero codepoint.
Codepoint () : value (0) {}
// Creates a codepoint from an encoded UTF-8 value.
Codepoint (uint32_t value) : value (value) {}
static Codepoint eof () { return Codepoint (UINT32_MAX); }
bool is_eof () const { return value == UINT32_MAX; }
// Returns a C++ string containing string value of codepoint.
std::string as_string ();
bool operator== (Codepoint other) const { return value == other.value; }
bool operator!= (Codepoint other) const { return !operator== (other); }
};
} // namespace Rust
#endif

2728
gcc/rust/lex/rust-lex.cc Normal file

File diff suppressed because it is too large Load Diff

271
gcc/rust/lex/rust-lex.h Normal file
View File

@@ -0,0 +1,271 @@
// Copyright (C) 2020-2022 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#ifndef RUST_LEX_H
#define RUST_LEX_H
#include "rust-linemap.h"
#include "rust-buffered-queue.h"
#include "rust-token.h"
namespace Rust {
// Simple wrapper for FILE* that simplifies destruction.
struct RAIIFile
{
private:
FILE *file;
const char *filename;
void close ()
{
if (file != nullptr && file != stdin)
fclose (file);
}
public:
RAIIFile (const char *filename) : filename (filename)
{
if (strcmp (filename, "-") == 0)
file = stdin;
else
file = fopen (filename, "r");
}
/**
* Create a RAIIFile from an existing instance of FILE*
*/
RAIIFile (FILE *raw, const char *filename = nullptr)
: file (raw), filename (filename)
{}
RAIIFile (const RAIIFile &other) = delete;
RAIIFile &operator= (const RAIIFile &other) = delete;
// have to specify setting file to nullptr, otherwise unintended fclose occurs
RAIIFile (RAIIFile &&other) : file (other.file), filename (other.filename)
{
other.file = nullptr;
}
RAIIFile &operator= (RAIIFile &&other)
{
close ();
file = other.file;
filename = other.filename;
other.file = nullptr;
return *this;
}
static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); }
~RAIIFile () { close (); }
FILE *get_raw () { return file; }
const char *get_filename () { return filename; }
bool ok () const { return file; }
};
class Lexer
{
private:
// Request new Location for current column in line_table
Location get_current_location ();
// Skips the current input char.
void skip_input ();
// Advances current input char to n + 1 chars ahead of current position.
void skip_input (int n);
// Returns char n chars ahead of current position.
int peek_input ();
// Peeks the current char.
int peek_input (int n);
// Classifies keyword (i.e. gets id for keyword).
TokenId classify_keyword (const std::string &str);
// Builds a token from the input queue.
TokenPtr build_token ();
std::tuple<std::string, int, bool> parse_in_decimal ();
std::pair<std::string, int> parse_in_exponent_part ();
std::pair<PrimitiveCoreType, int> parse_in_type_suffix ();
std::tuple<char, int, bool> parse_escape (char opening_char);
std::tuple<Codepoint, int, bool> parse_utf8_escape (char opening_char);
int parse_partial_string_continue ();
std::pair<long, int> parse_partial_hex_escape ();
std::pair<Codepoint, int> parse_partial_unicode_escape ();
int get_input_codepoint_length ();
int test_get_input_codepoint_n_length (int n_start_offset);
Codepoint peek_codepoint_input ();
Codepoint test_peek_codepoint_input (int n);
void skip_codepoint_input ();
void skip_broken_string_input (int current_char);
TokenPtr parse_byte_char (Location loc);
TokenPtr parse_byte_string (Location loc);
TokenPtr parse_raw_byte_string (Location loc);
TokenPtr parse_raw_identifier (Location loc);
TokenPtr parse_string (Location loc);
TokenPtr maybe_parse_raw_string (Location loc);
TokenPtr parse_raw_string (Location loc, int initial_hash_count);
TokenPtr parse_non_decimal_int_literals (Location loc);
TokenPtr parse_decimal_int_or_float (Location loc);
TokenPtr parse_char_or_lifetime (Location loc);
TokenPtr parse_identifier_or_keyword (Location loc);
template <typename IsDigitFunc>
TokenPtr parse_non_decimal_int_literal (Location loc,
IsDigitFunc is_digit_func,
std::string existent_str, int base);
public:
// Construct lexer with input file and filename provided
Lexer (const char *filename, RAIIFile input, Linemap *linemap);
// Lex the contents of a string instead of a file
Lexer (const std::string &input);
// dtor
~Lexer ();
// don't allow copy semantics (for now, at least)
Lexer (const Lexer &other) = delete;
Lexer &operator= (const Lexer &other) = delete;
// enable move semantics
Lexer (Lexer &&other) = default;
Lexer &operator= (Lexer &&other) = default;
// Returns token n tokens ahead of current position.
const_TokenPtr peek_token (int n) { return token_queue.peek (n); }
// Peeks the current token.
const_TokenPtr peek_token () { return peek_token (0); }
// Advances current token to n + 1 tokens ahead of current position.
void skip_token (int n) { token_queue.skip (n); }
// Skips the current token.
void skip_token () { skip_token (0); }
// Replaces the current token with a specified token.
void replace_current_token (TokenPtr replacement);
// FIXME: don't use anymore
/* Splits the current token into two. Intended for use with nested generics
* closes (i.e. T<U<X>> where >> is wrongly lexed as one token). Note that
* this will only work with "simple" tokens like punctuation. */
void split_current_token (TokenId new_left, TokenId new_right);
Linemap *get_line_map () { return line_map; }
std::string get_filename () { return std::string (input.get_filename ()); }
private:
void start_line (int current_line, int current_column);
// File for use as input.
RAIIFile input;
// TODO is this actually required? could just have file storage in InputSource
// Current line number.
int current_line;
// Current column number.
int current_column;
// Current character.
int current_char;
// Line map.
Linemap *line_map;
/* Max column number that can be quickly allocated - higher may require
* allocating new linemap */
static const int max_column_hint = 80;
// Input source wrapper thing.
class InputSource
{
public:
virtual ~InputSource () {}
// Overload operator () to return next char from input stream.
virtual int next () = 0;
};
class FileInputSource : public InputSource
{
private:
// Input source file.
FILE *input;
public:
// Create new input source from file.
FileInputSource (FILE *input) : input (input) {}
int next () override { return fgetc (input); }
};
class BufferInputSource : public InputSource
{
private:
const std::string &buffer;
size_t offs;
public:
// Create new input source from file.
BufferInputSource (const std::string &b, size_t offset)
: buffer (b), offs (offset)
{}
int next () override
{
if (offs >= buffer.size ())
return EOF;
return buffer.at (offs++);
}
};
// The input source for the lexer.
// InputSource input_source;
// Input file queue.
std::unique_ptr<InputSource> raw_input_source;
buffered_queue<int, InputSource &> input_queue;
// Token source wrapper thing.
struct TokenSource
{
// The lexer object that will use this TokenSource.
Lexer *lexer;
// Create a new TokenSource with given lexer.
TokenSource (Lexer *parLexer) : lexer (parLexer) {}
// Overload operator () to build token in lexer.
TokenPtr next () { return lexer->build_token (); }
};
// The token source for the lexer.
// TokenSource token_source;
// Token stream queue.
buffered_queue<std::shared_ptr<Token>, TokenSource> token_queue;
};
} // namespace Rust
#endif

134
gcc/rust/lex/rust-token.cc Normal file
View File

@@ -0,0 +1,134 @@
// Copyright (C) 2020-2022 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#include "rust-token.h"
#include "rust-diagnostics.h"
namespace Rust {
// Hackily defined way to get token description for enum value using x-macros
const char *
get_token_description (TokenId id)
{
switch (id)
{
#define RS_TOKEN(name, descr) \
case name: \
return descr;
#define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y)
RS_TOKEN_LIST
#undef RS_TOKEN_KEYWORD
#undef RS_TOKEN
default:
gcc_unreachable ();
}
}
/* Hackily defined way to get token description as a string for enum value using
* x-macros */
const char *
token_id_to_str (TokenId id)
{
switch (id)
{
#define RS_TOKEN(name, _) \
case name: \
return #name;
#define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y)
RS_TOKEN_LIST
#undef RS_TOKEN_KEYWORD
#undef RS_TOKEN
default:
gcc_unreachable ();
}
}
const char *
get_type_hint_string (PrimitiveCoreType type)
{
switch (type)
{
case CORETYPE_BOOL:
return "bool";
case CORETYPE_CHAR:
return "char";
case CORETYPE_STR:
return "str";
// case CORETYPE_INT:
case CORETYPE_ISIZE:
return "isize";
// case CORETYPE_UINT:
case CORETYPE_USIZE:
return "usize";
case CORETYPE_F32:
return "f32";
case CORETYPE_F64:
return "f64";
case CORETYPE_I8:
return "i8";
case CORETYPE_I16:
return "i16";
case CORETYPE_I32:
return "i32";
case CORETYPE_I64:
return "i64";
case CORETYPE_I128:
return "i128";
case CORETYPE_U8:
return "u8";
case CORETYPE_U16:
return "u16";
case CORETYPE_U32:
return "u32";
case CORETYPE_U64:
return "u64";
case CORETYPE_U128:
return "u128";
case CORETYPE_PURE_DECIMAL:
return "pure_decimal";
case CORETYPE_UNKNOWN:
default:
return "unknown";
}
}
const char *
Token::get_type_hint_str () const
{
return get_type_hint_string (type_hint);
}
const std::string &
Token::get_str () const
{
// FIXME: attempt to return null again
// gcc_assert(str != NULL);
// HACK: allow referencing an empty string
static const std::string empty = "";
if (str == NULL)
{
rust_error_at (get_locus (),
"attempted to get string for %<%s%>, which has no string. "
"returning empty string instead",
get_token_description ());
return empty;
}
return *str;
}
} // namespace Rust

448
gcc/rust/lex/rust-token.h Normal file
View File

@@ -0,0 +1,448 @@
// Copyright (C) 2020-2022 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#ifndef RUST_TOKEN_H
#define RUST_TOKEN_H
#include "rust-system.h"
#include "rust-linemap.h"
#include "rust-codepoint.h"
namespace Rust {
// "Primitive core types" in Rust - the different int and float types, as well
// as some others
enum PrimitiveCoreType
{
CORETYPE_UNKNOWN,
// named primitives
CORETYPE_BOOL,
CORETYPE_CHAR,
CORETYPE_STR,
// okay technically int and uint are arch-dependent (pointer size)
CORETYPE_INT,
CORETYPE_UINT,
// numbered number primitives
CORETYPE_F32,
CORETYPE_F64,
CORETYPE_I8,
CORETYPE_I16,
CORETYPE_I32,
CORETYPE_I64,
CORETYPE_I128,
CORETYPE_U8,
CORETYPE_U16,
CORETYPE_U32,
CORETYPE_U64,
CORETYPE_U128,
// Pure decimals are used for tuple index.
// Also means there is no type hint.
CORETYPE_PURE_DECIMAL,
// arch-dependent pointer sizes
CORETYPE_ISIZE = CORETYPE_INT,
CORETYPE_USIZE = CORETYPE_UINT
};
// RS_TOKEN(name, description)
// RS_TOKEN_KEYWORD(name, identifier)
//
// Keep RS_TOKEN_KEYWORD sorted
/* note that abstract, async, become, box, do, final, macro, override, priv,
* try, typeof, unsized, virtual, and yield are unused */
#define RS_TOKEN_LIST \
RS_TOKEN (FIRST_TOKEN, "<first-token-marker>") \
RS_TOKEN (END_OF_FILE, "end of file") \
RS_TOKEN (EXCLAM, "!") \
RS_TOKEN (NOT_EQUAL, "!=") \
RS_TOKEN (PERCENT, "%") \
RS_TOKEN (PERCENT_EQ, "%=") \
RS_TOKEN (AMP, "&") \
RS_TOKEN (AMP_EQ, "&=") \
RS_TOKEN (LOGICAL_AND, "&&") \
RS_TOKEN (ASTERISK, "*") \
RS_TOKEN (ASTERISK_EQ, "*=") \
RS_TOKEN (PLUS, "+") \
RS_TOKEN (PLUS_EQ, "+=") \
RS_TOKEN (COMMA, ",") \
RS_TOKEN (MINUS, "-") \
RS_TOKEN (MINUS_EQ, "-=") \
RS_TOKEN (RETURN_TYPE, "->") \
RS_TOKEN (DOT, ".") \
RS_TOKEN (DOT_DOT, "..") \
RS_TOKEN (DOT_DOT_EQ, "..=") \
RS_TOKEN (ELLIPSIS, "...") \
RS_TOKEN (DIV, "/") \
RS_TOKEN (DIV_EQ, "/=") \
RS_TOKEN (COLON, ":") \
RS_TOKEN (SEMICOLON, ";") \
RS_TOKEN (LEFT_SHIFT, "<<") \
RS_TOKEN (LEFT_SHIFT_EQ, "<<=") \
RS_TOKEN (LEFT_ANGLE, "<") \
RS_TOKEN (LESS_OR_EQUAL, "<=") \
RS_TOKEN (EQUAL, "=") \
RS_TOKEN (EQUAL_EQUAL, "==") \
RS_TOKEN (MATCH_ARROW, "=>") \
RS_TOKEN (RIGHT_ANGLE, ">") \
RS_TOKEN (GREATER_OR_EQUAL, ">=") \
RS_TOKEN (RIGHT_SHIFT, ">>") \
RS_TOKEN (RIGHT_SHIFT_EQ, ">>=") \
RS_TOKEN (PATTERN_BIND, "@") \
RS_TOKEN (TILDE, "~") \
RS_TOKEN (BACKSLASH, "\\") \
RS_TOKEN (BACKTICK, "`") \
RS_TOKEN (CARET, "^") \
RS_TOKEN (CARET_EQ, "^=") \
RS_TOKEN (PIPE, "|") \
RS_TOKEN (PIPE_EQ, "|=") \
RS_TOKEN (OR, "||") \
RS_TOKEN (QUESTION_MARK, "?") \
RS_TOKEN (HASH, "#") \
/* from here on, dodgy and may not be correct. not operators and may be \
* symbols */ \
/* RS_TOKEN(SPACE, " ") probably too dodgy */ \
/* RS_TOKEN(NEWLINE, "\n")*/ \
RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */ \
RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */ \
RS_TOKEN (DOUBLE_QUOTE, "\"") \
RS_TOKEN (UNDERSCORE, \
"_") /* TODO: treat as reserved word like mrustc instead? */ \
RS_TOKEN (IDENTIFIER, "identifier") \
RS_TOKEN (INT_LITERAL, \
"integer literal") /* do different int and float types need \
different literal types? */ \
RS_TOKEN (FLOAT_LITERAL, "float literal") \
RS_TOKEN (STRING_LITERAL, "string literal") \
RS_TOKEN (CHAR_LITERAL, "character literal") \
RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal") \
RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal") \
RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */ \
/* Have "interpolated" tokens (whatever that means)? identifer, path, type, \
* pattern, */ \
/* expression, statement, block, meta, item in mrustc (but not directly in \
* lexer). */ \
RS_TOKEN (LEFT_PAREN, "(") \
RS_TOKEN (RIGHT_PAREN, ")") \
RS_TOKEN (LEFT_CURLY, "{") \
RS_TOKEN (RIGHT_CURLY, "}") \
RS_TOKEN (LEFT_SQUARE, "[") \
RS_TOKEN (RIGHT_SQUARE, "]") \
/* Macros */ \
RS_TOKEN (DOLLAR_SIGN, "$") \
/* Doc Comments */ \
RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \
RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \
/* have "weak" union and 'static keywords? */ \
RS_TOKEN_KEYWORD (ABSTRACT, "abstract") /* unused */ \
RS_TOKEN_KEYWORD (AS, "as") \
RS_TOKEN_KEYWORD (ASYNC, "async") /* unused */ \
RS_TOKEN_KEYWORD (BECOME, "become") /* unused */ \
RS_TOKEN_KEYWORD (BOX, "box") /* unused */ \
RS_TOKEN_KEYWORD (BREAK, "break") \
RS_TOKEN_KEYWORD (CONST, "const") \
RS_TOKEN_KEYWORD (CONTINUE, "continue") \
RS_TOKEN_KEYWORD (CRATE, "crate") \
/* FIXME: Do we need to add $crate (DOLLAR_CRATE) as a reserved kw? */ \
RS_TOKEN_KEYWORD (DO, "do") /* unused */ \
RS_TOKEN_KEYWORD (DYN, "dyn") \
RS_TOKEN_KEYWORD (ELSE, "else") \
RS_TOKEN_KEYWORD (ENUM_TOK, "enum") \
RS_TOKEN_KEYWORD (EXTERN_TOK, "extern") \
RS_TOKEN_KEYWORD (FALSE_LITERAL, "false") \
RS_TOKEN_KEYWORD (FINAL_TOK, "final") /* unused */ \
RS_TOKEN_KEYWORD (FN_TOK, "fn") \
RS_TOKEN_KEYWORD (FOR, "for") \
RS_TOKEN_KEYWORD (IF, "if") \
RS_TOKEN_KEYWORD (IMPL, "impl") \
RS_TOKEN_KEYWORD (IN, "in") \
RS_TOKEN_KEYWORD (LET, "let") \
RS_TOKEN_KEYWORD (LOOP, "loop") \
RS_TOKEN_KEYWORD (MACRO, "macro") /* unused */ \
RS_TOKEN_KEYWORD (MATCH_TOK, "match") \
RS_TOKEN_KEYWORD (MOD, "mod") \
RS_TOKEN_KEYWORD (MOVE, "move") \
RS_TOKEN_KEYWORD (MUT, "mut") \
RS_TOKEN_KEYWORD (OVERRIDE_TOK, "override") /* unused */ \
RS_TOKEN_KEYWORD (PRIV, "priv") /* unused */ \
RS_TOKEN_KEYWORD (PUB, "pub") \
RS_TOKEN_KEYWORD (REF, "ref") \
RS_TOKEN_KEYWORD (RETURN_TOK, "return") \
RS_TOKEN_KEYWORD (SELF_ALIAS, \
"Self") /* mrustc does not treat this as a reserved word*/ \
RS_TOKEN_KEYWORD (SELF, "self") \
RS_TOKEN_KEYWORD (STATIC_TOK, "static") \
RS_TOKEN_KEYWORD (STRUCT_TOK, "struct") \
RS_TOKEN_KEYWORD (SUPER, "super") \
RS_TOKEN_KEYWORD (TRAIT, "trait") \
RS_TOKEN_KEYWORD (TRUE_LITERAL, "true") \
RS_TOKEN_KEYWORD (TRY, "try") /* unused */ \
RS_TOKEN_KEYWORD (TYPE, "type") \
RS_TOKEN_KEYWORD (TYPEOF, "typeof") /* unused */ \
RS_TOKEN_KEYWORD (UNSAFE, "unsafe") \
RS_TOKEN_KEYWORD (UNSIZED, "unsized") /* unused */ \
RS_TOKEN_KEYWORD (USE, "use") \
RS_TOKEN_KEYWORD (VIRTUAL, "virtual") /* unused */ \
RS_TOKEN_KEYWORD (WHERE, "where") \
RS_TOKEN_KEYWORD (WHILE, "while") \
RS_TOKEN_KEYWORD (YIELD, "yield") /* unused */ \
RS_TOKEN (LAST_TOKEN, "<last-token-marker>")
// Contains all token types. Crappy implementation via x-macros.
enum TokenId
{
#define RS_TOKEN(name, _) name,
#define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y)
RS_TOKEN_LIST
#undef RS_TOKEN_KEYWORD
#undef RS_TOKEN
};
// dodgy "TokenPtr" declaration with Token forward declaration
class Token;
// A smart pointer (shared_ptr) to Token.
typedef std::shared_ptr<Token> TokenPtr;
// A smart pointer (shared_ptr) to a constant Token.
typedef std::shared_ptr<const Token> const_TokenPtr;
// Hackily defined way to get token description for enum value using x-macros
const char *
get_token_description (TokenId id);
/* Hackily defined way to get token description as a string for enum value using
* x-macros */
const char *
token_id_to_str (TokenId id);
// Get type hint description as a string.
const char *
get_type_hint_string (PrimitiveCoreType type);
// Represents a single token. Create using factory static methods.
class Token
{
private:
// Token kind.
TokenId token_id;
// Token location.
Location locus;
// Associated text (if any) of token.
std::unique_ptr<std::string> str;
// TODO: maybe remove issues and just store std::string as value?
/* Type hint for token based on lexer data (e.g. type suffix). Does not exist
* for most tokens. */
PrimitiveCoreType type_hint;
// Token constructor from token id and location. Has a null string.
Token (TokenId token_id, Location location)
: token_id (token_id), locus (location), str (nullptr),
type_hint (CORETYPE_UNKNOWN)
{}
// Token constructor from token id, location, and a string.
Token (TokenId token_id, Location location, std::string &&paramStr)
: token_id (token_id), locus (location),
str (new std::string (std::move (paramStr))), type_hint (CORETYPE_UNKNOWN)
{}
// Token constructor from token id, location, and a char.
Token (TokenId token_id, Location location, char paramChar)
: token_id (token_id), locus (location),
str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN)
{}
// Token constructor from token id, location, and a "codepoint".
Token (TokenId token_id, Location location, Codepoint paramCodepoint)
: token_id (token_id), locus (location),
str (new std::string (paramCodepoint.as_string ())),
type_hint (CORETYPE_UNKNOWN)
{}
// Token constructor from token id, location, a string, and type hint.
Token (TokenId token_id, Location location, std::string &&paramStr,
PrimitiveCoreType parType)
: token_id (token_id), locus (location),
str (new std::string (std::move (paramStr))), type_hint (parType)
{}
public:
// No default constructor.
Token () = delete;
// Do not copy/assign tokens.
Token (const Token &) = delete;
Token &operator= (const Token &) = delete;
// Allow moving tokens.
Token (Token &&other) = default;
Token &operator= (Token &&other) = default;
~Token () = default;
/* TODO: make_shared (which saves a heap allocation) does not work with the
* private constructor */
// Makes and returns a new TokenPtr (with null string).
static TokenPtr make (TokenId token_id, Location locus)
{
// return std::make_shared<Token> (token_id, locus);
return TokenPtr (new Token (token_id, locus));
}
// Makes and returns a new TokenPtr of type IDENTIFIER.
static TokenPtr make_identifier (Location locus, std::string &&str)
{
// return std::make_shared<Token> (IDENTIFIER, locus, str);
return TokenPtr (new Token (IDENTIFIER, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type INT_LITERAL.
static TokenPtr make_int (Location locus, std::string &&str,
PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
{
// return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
return TokenPtr (
new Token (INT_LITERAL, locus, std::move (str), type_hint));
}
// Makes and returns a new TokenPtr of type FLOAT_LITERAL.
static TokenPtr make_float (Location locus, std::string &&str,
PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
{
// return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
return TokenPtr (
new Token (FLOAT_LITERAL, locus, std::move (str), type_hint));
}
// Makes and returns a new TokenPtr of type STRING_LITERAL.
static TokenPtr make_string (Location locus, std::string &&str)
{
// return std::make_shared<Token> (STRING_LITERAL, locus, str,
// CORETYPE_STR);
return TokenPtr (
new Token (STRING_LITERAL, locus, std::move (str), CORETYPE_STR));
}
// Makes and returns a new TokenPtr of type CHAR_LITERAL.
static TokenPtr make_char (Location locus, Codepoint char_lit)
{
// return std::make_shared<Token> (CHAR_LITERAL, locus, char_lit);
return TokenPtr (new Token (CHAR_LITERAL, locus, char_lit));
}
// Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL.
static TokenPtr make_byte_char (Location locus, char byte_char)
{
// return std::make_shared<Token> (BYTE_CHAR_LITERAL, locus, byte_char);
return TokenPtr (new Token (BYTE_CHAR_LITERAL, locus, byte_char));
}
// Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix).
static TokenPtr make_byte_string (Location locus, std::string &&str)
{
// return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str);
return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type INNER_DOC_COMMENT.
static TokenPtr make_inner_doc_comment (Location locus, std::string &&str)
{
return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT.
static TokenPtr make_outer_doc_comment (Location locus, std::string &&str)
{
return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type LIFETIME.
static TokenPtr make_lifetime (Location locus, std::string &&str)
{
// return std::make_shared<Token> (LIFETIME, locus, str);
return TokenPtr (new Token (LIFETIME, locus, std::move (str)));
}
// Gets id of the token.
TokenId get_id () const { return token_id; }
// Gets location of the token.
Location get_locus () const { return locus; }
// Gets string description of the token.
const std::string &
get_str () const; /*{
// FIXME: put in header again when fix null problem
//gcc_assert(str != nullptr);
if (str == nullptr) {
error_at(get_locus(), "attempted to get string for '%s', which has no string.
returning empty string instead.", get_token_description()); return "";
}
return *str;
}*/
// Gets token's type hint info.
PrimitiveCoreType get_type_hint () const
{
return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint;
}
// diagnostics (error reporting)
const char *get_token_description () const
{
return Rust::get_token_description (token_id);
}
// debugging
const char *token_id_to_str () const
{
return Rust::token_id_to_str (token_id);
}
// debugging
const char *get_type_hint_str () const;
/* Returns whether the token is a literal of any type (int, float, char,
* string, byte char, byte string). */
bool is_literal () const
{
switch (token_id)
{
case INT_LITERAL:
case FLOAT_LITERAL:
case CHAR_LITERAL:
case STRING_LITERAL:
case BYTE_CHAR_LITERAL:
case BYTE_STRING_LITERAL:
return true;
default:
return false;
}
}
/* Returns whether the token actually has a string (regardless of whether it
* should or not). */
bool has_str () const { return str != nullptr; }
// Returns whether the token should have a string.
bool should_have_str () const
{
return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME;
}
// Returns whether the token is a pure decimal int literal
bool is_pure_decimal () const { return type_hint == CORETYPE_PURE_DECIMAL; }
};
} // namespace Rust
#endif

View File

@@ -0,0 +1,204 @@
// Copyright (C) 2020-2022 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#ifndef RUST_BUFFERED_QUEUE_H
#define RUST_BUFFERED_QUEUE_H
#include "rust-system.h"
namespace Rust {
/* Buffered queue implementation. Items are of type T, queue source is of type
* Source. Note that this is owning of the source. */
template <typename T, typename Source> class buffered_queue
{
public:
// Construct empty queue from Source src.
buffered_queue (Source src) : source (src), start (0), end (0), buffer () {}
/* disable copying (since source is probably non-copyable)
* TODO is this actually a good idea? If source is non-copyable, it would
* just delete the copy constructor anyway.*/
buffered_queue (const buffered_queue &other) = delete;
buffered_queue &operator= (const buffered_queue &other) = delete;
// enable moving
buffered_queue (buffered_queue &&other) = default;
buffered_queue &operator= (buffered_queue &&other) = default;
// Returns token at position start + n (i.e. n tokens ahead).
T peek (int n)
{
// n should not be behind
rust_assert (n >= 0);
int num_queued_items = end - start;
int num_items_required = n + 1;
// if required items go past end of queue, add them to queue
if (num_items_required > num_queued_items)
{
int num_items_to_read = num_items_required - num_queued_items;
/* if queue length + extra items is larger than buffer size, expand
* buffer */
if (end + num_items_to_read > (int) buffer.size ())
{
// Resize the buffer by 1.5x
int new_size = (buffer.size () + num_items_to_read);
new_size += (new_size >> 1);
// old method:
/*
// create new queue buffer with new size
std::vector<T> new_queue (new_size);
std::copy (buffer.begin () + start, buffer.begin () + end,
new_queue.begin ());
start = 0;
end = num_queued_items;
// TODO: would move be better here? optimisation for move with
// shared pointer?
// swap member buffer and new queue buffer
std::swap (buffer, new_queue);
*/
// TODO: determine overhead of this approach vs copy. Should be
// lower.
std::vector<T> new_queue;
new_queue.reserve (new_size);
new_queue.insert (new_queue.begin (),
std::make_move_iterator (buffer.begin () + start),
std::make_move_iterator (buffer.begin () + end));
start = 0;
end = num_queued_items;
// fill up rest of vector with junk so that indexing can work
new_queue.insert (new_queue.begin () + end,
new_size - new_queue.size (), T ());
buffer = std::move (new_queue);
/* this should be best method - std::move(range) would have
* allocation problems; initial construction would require
* reallocation upon resizing */
// validate that buffer is large enough now
rust_assert (end + num_items_to_read <= (int) buffer.size ());
}
/* iterate through buffer and invoke operator () on source on values
* past original end */
for (int i = 0; i < num_items_to_read; i++)
buffer[end + i] = source.next ();
// move end based on additional items added
end += num_items_to_read;
}
rust_assert (0 <= start);
rust_assert (start <= end);
rust_assert (end <= (int) buffer.size ());
rust_assert (start + n < end);
// return value at start + n in buffer
return buffer[start + n];
}
/* TODO: add faster peek current token to remove overhead of conditional
* branches? */
// Advances start by n + 1.
void skip (int n)
{
// Call peek to ensure requested n is actually in queue.
peek (n);
// Clear queue values from start to n (inclusive).
for (int i = 0; i < (n + 1); i++)
buffer[start + i] = T ();
// Move start forward by n + 1.
start += (n + 1);
// Ensure start is not impossible somehow
rust_assert (0 <= start);
rust_assert (start <= end);
// Compact buffer if empty
if (start == end)
start = end = 0;
}
/* Inserts element at front of vector. Really dirty hack with terrible
* performance, only use when really needed. */
void insert_at_front (T elem_to_insert)
{
// TODO: test as this may not work properly
// Insert actual element in buffer at start.
buffer.insert (buffer.begin (), elem_to_insert);
/* Increase the end number since added element means all others have shifted
* one along */
end++;
}
// Insert at arbitrary position (attempt)
void insert (int index, T elem_to_insert)
{
// TODO: test as this may not work properly
// n should not be behind
rust_assert (index >= 0);
// call peek to ensure that the items behind this (at least) are in queue
if (index >= 1)
peek (index - 1);
else
peek (index);
buffer.insert (buffer.begin () + start + index, std::move (elem_to_insert));
end++;
}
// Replaces the current value in the buffer. Total HACK.
void replace_current_value (T replacement)
{
// call peek to ensure value exists
peek (0);
buffer[start] = std::move (replacement);
// don't move start or end
}
private:
// Source of tokens for queue.
Source source;
// Begin of range in buffer, inclusive.
int start;
// End of range in buffer, exclusive.
int end;
// Queue buffer.
std::vector<T> buffer;
};
} // namespace Rust
#endif