314 lines
12 KiB
Rust
314 lines
12 KiB
Rust
use std::fs;
|
|
use toml::{Table, Value};
|
|
|
|
#[derive(PartialEq)]
|
|
pub enum TokenType {
|
|
OPERAND,
|
|
TERMINATOR,
|
|
IDENTIFIER,
|
|
KEYWORD,
|
|
}
|
|
|
|
// Tokenizer
|
|
// Tokenizer and underlying functions to turn code into tokens
|
|
pub struct Tokenizer {
|
|
pub token_list: Vec<String>,
|
|
// BUG:
|
|
pub tokens: Vec<Token>,
|
|
// Grammar options from toml file
|
|
pub configuration: Table,
|
|
}
|
|
|
|
// Token
|
|
// This is a token with a token type.
|
|
pub struct Token {
|
|
pub token: String,
|
|
pub token_type: TokenType,
|
|
}
|
|
// Implementation of Tokenizer
|
|
// Functions associated with the tokenizer struct and module.
|
|
impl Tokenizer {
|
|
// @name read_configuration_from_file
|
|
// @return
|
|
// @brief Try to read configuration from an external file
|
|
// @param &mut self, configuration_filename: &str
|
|
pub fn read_configuration_from_file(&mut self, configuration_filename: &str) {
|
|
let configuration_string: String = fs::read_to_string(configuration_filename).expect(
|
|
(String::from("Could not open configuration file at: ") + configuration_filename)
|
|
.as_str(),
|
|
);
|
|
|
|
let configuration = gtoml::parse(configuration_string.as_str()).expect("TOML invalid!");
|
|
self.configuration = Table::try_from(configuration).unwrap();
|
|
|
|
// Check for token section in config, panic if not present
|
|
if !self.configuration.contains_key("token") {
|
|
panic!("Token section is not present!");
|
|
}
|
|
|
|
// Check for semantics section in config, panic if not present
|
|
if !self.configuration.contains_key("semantics") {
|
|
panic!("Section semantics is not present!");
|
|
}
|
|
}
|
|
|
|
// @name new
|
|
// @return Tokenizer
|
|
// @brief Create a new Tokenizer
|
|
// @param
|
|
pub fn new() -> Tokenizer {
|
|
let empty_tokens: Vec<Token> = vec![];
|
|
let empty_value: toml::map::Map<String, Value> = toml::map::Map::new();
|
|
let empty_token_list: Vec<String> = vec![];
|
|
Tokenizer {
|
|
tokens: empty_tokens,
|
|
token_list: empty_token_list,
|
|
configuration: empty_value,
|
|
}
|
|
}
|
|
|
|
// @name eat
|
|
// @return
|
|
// @brief Consumes a string and safes the tokens
|
|
// @param line: &str
|
|
pub fn eat(&mut self, line: &str) {
|
|
// Get token vectors from configuration
|
|
let token_table_value: &Value = self.configuration.get("token").unwrap();
|
|
let token_table: Table = Table::try_from(token_table_value).unwrap();
|
|
let mut tokens: Vec<String> = vec![line.to_string()];
|
|
let mut new_tokens: Vec<String> = vec![];
|
|
let mut token_buffer: String = String::from("");
|
|
|
|
// Iterate over tokens in token table and split tokens.
|
|
if token_table.contains_key("separator") {
|
|
let separator: Vec<Value> = token_table
|
|
.get_key_value("separator")
|
|
.unwrap()
|
|
.1
|
|
.as_array()
|
|
.unwrap()
|
|
.clone();
|
|
if separator.len() > 0 {
|
|
for token in tokens.iter() {
|
|
let mut token_feed = token.clone();
|
|
while !token_feed.is_empty() {
|
|
let mut no_match: bool = true;
|
|
for sep in separator.iter() {
|
|
if token_feed.starts_with(sep.as_str().unwrap()) {
|
|
// Reset and add token
|
|
no_match = false;
|
|
if token_buffer.len() > 0 {
|
|
new_tokens.push(token_buffer.clone());
|
|
token_buffer = String::from("");
|
|
}
|
|
let new_feed: String =
|
|
token_feed.split_off(sep.as_str().unwrap().len());
|
|
token_feed = new_feed;
|
|
}
|
|
}
|
|
if no_match {
|
|
let new_feed: String = token_feed.split_off(1);
|
|
token_buffer = token_buffer
|
|
+ String::from(token_feed.chars().next().unwrap()).as_str();
|
|
token_feed = new_feed;
|
|
}
|
|
}
|
|
// empty token
|
|
new_tokens.push(token_buffer.clone());
|
|
token_buffer = String::from("");
|
|
}
|
|
// empty token
|
|
new_tokens.push(token_buffer.clone());
|
|
token_buffer = String::from("");
|
|
}
|
|
}
|
|
tokens = new_tokens.clone();
|
|
new_tokens = vec![];
|
|
if token_table.contains_key("operands") {
|
|
let operands: Vec<Value> = token_table
|
|
.get_key_value("operands")
|
|
.unwrap()
|
|
.1
|
|
.as_array()
|
|
.unwrap()
|
|
.clone();
|
|
if operands.len() > 0 {
|
|
for token in tokens.iter() {
|
|
let mut token_feed = token.clone();
|
|
while !token_feed.is_empty() {
|
|
let mut no_match: bool = true;
|
|
for op in operands.iter() {
|
|
if token_feed.starts_with(op.as_str().unwrap()) {
|
|
// Reset and add token
|
|
no_match = false;
|
|
if token_buffer.len() > 0 {
|
|
new_tokens.push(token_buffer.clone());
|
|
}
|
|
token_buffer = String::from("");
|
|
new_tokens.push(op.as_str().unwrap().to_string());
|
|
let new_feed: String =
|
|
token_feed.split_off(op.as_str().unwrap().len());
|
|
token_feed = new_feed;
|
|
}
|
|
}
|
|
if no_match {
|
|
let new_feed: String = token_feed.split_off(1);
|
|
token_buffer = token_buffer
|
|
+ String::from(token_feed.chars().next().unwrap()).as_str();
|
|
token_feed = new_feed;
|
|
}
|
|
}
|
|
// empty token
|
|
new_tokens.push(token_buffer.clone());
|
|
token_buffer = String::from("");
|
|
}
|
|
// empty token
|
|
new_tokens.push(token_buffer.clone());
|
|
token_buffer = String::from("");
|
|
}
|
|
}
|
|
tokens = new_tokens.clone();
|
|
new_tokens = vec![];
|
|
if token_table.contains_key("terminator") {
|
|
let terminator: Vec<Value> = token_table
|
|
.get_key_value("terminator")
|
|
.unwrap()
|
|
.1
|
|
.as_array()
|
|
.unwrap()
|
|
.clone();
|
|
if terminator.len() > 0 {
|
|
for token in tokens.iter() {
|
|
let mut token_feed = token.clone();
|
|
while !token_feed.is_empty() {
|
|
let mut no_match: bool = true;
|
|
for term in terminator.iter() {
|
|
if token_feed.starts_with(term.as_str().unwrap()) {
|
|
// Reset and add token
|
|
no_match = false;
|
|
if token_buffer.len() > 0 {
|
|
new_tokens.push(token_buffer.clone());
|
|
}
|
|
token_buffer = String::from("");
|
|
new_tokens.push(term.as_str().unwrap().to_string());
|
|
let new_feed: String =
|
|
token_feed.split_off(term.as_str().unwrap().len());
|
|
token_feed = new_feed;
|
|
}
|
|
}
|
|
if no_match {
|
|
let new_feed: String = token_feed.split_off(1);
|
|
token_buffer = token_buffer
|
|
+ String::from(token_feed.chars().next().unwrap()).as_str();
|
|
token_feed = new_feed;
|
|
}
|
|
}
|
|
// empty token as token ended
|
|
new_tokens.push(token_buffer.clone());
|
|
token_buffer = String::from("");
|
|
}
|
|
// empty token
|
|
new_tokens.push(token_buffer.clone());
|
|
}
|
|
}
|
|
self.token_list.append(&mut new_tokens);
|
|
|
|
// Clean up token list
|
|
let mut cleaned_token_list: Vec<String> = vec![];
|
|
for token in self.token_list.iter() {
|
|
if token.as_str() != "" {
|
|
cleaned_token_list.push(token.to_string());
|
|
}
|
|
}
|
|
self.token_list = cleaned_token_list;
|
|
}
|
|
|
|
// @name identify_tokens
|
|
// @return
|
|
// @brief Go through all tokens and try to find them.
|
|
// @param &mut self
|
|
pub fn identify_tokens(&mut self) {
|
|
// Go through token list
|
|
let mut token_identities: Vec<Token> = vec![];
|
|
let mut found_token: bool;
|
|
let token_section: Table =
|
|
Table::try_from(self.configuration.get("token").unwrap()).unwrap();
|
|
let semantics_section: Table =
|
|
Table::try_from(self.configuration.get("semantics").unwrap()).unwrap();
|
|
|
|
for token in self.token_list.iter() {
|
|
found_token = false;
|
|
|
|
if token.as_str() == "" {
|
|
continue;
|
|
}
|
|
|
|
// Check if token is an operand
|
|
if token_section.contains_key("operands") {
|
|
let operands: Vec<Value> = token_section
|
|
.get_key_value("operands")
|
|
.unwrap()
|
|
.1
|
|
.as_array()
|
|
.unwrap()
|
|
.clone();
|
|
for operand in operands.iter() {
|
|
if operand.as_str().unwrap() == token.as_str() {
|
|
token_identities.push(Token {
|
|
token: token.clone(),
|
|
token_type: TokenType::OPERAND,
|
|
});
|
|
found_token = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if token_section.contains_key("terminator") && !found_token {
|
|
let terminator: Vec<Value> = token_section
|
|
.get_key_value("terminator")
|
|
.unwrap()
|
|
.1
|
|
.as_array()
|
|
.unwrap()
|
|
.clone();
|
|
for term in terminator.iter() {
|
|
if term.as_str().unwrap() == token.as_str() {
|
|
token_identities.push(Token {
|
|
token: token.clone(),
|
|
token_type: TokenType::TERMINATOR,
|
|
});
|
|
found_token = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if semantics_section.contains_key("keywords") && !found_token {
|
|
let keywords: Vec<Value> = semantics_section
|
|
.get_key_value("keywords")
|
|
.unwrap()
|
|
.1
|
|
.as_array()
|
|
.unwrap()
|
|
.clone();
|
|
for keyword in keywords.iter() {
|
|
if keyword.as_str().unwrap() == token.as_str() {
|
|
token_identities.push(Token {
|
|
token: token.clone(),
|
|
token_type: TokenType::KEYWORD,
|
|
});
|
|
found_token = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if !found_token {
|
|
token_identities.push(Token {
|
|
token: token.clone(),
|
|
token_type: TokenType::IDENTIFIER,
|
|
});
|
|
}
|
|
}
|
|
self.tokens = token_identities;
|
|
}
|
|
}
|