Compare commits
7 Commits
4a5488dff7
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2a846a5f53 | |||
| f67c79c65b | |||
| 42fa5affb5 | |||
| 015de5dc0a | |||
| ddba3423df | |||
| 9e4141fc96 | |||
| 0b6073b5bb |
@@ -4,3 +4,6 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
gtoml = "0.1.2"
|
||||||
|
toml = "0.9.3"
|
||||||
|
regex = "1.11.1"
|
||||||
|
|||||||
3
example.mlc
Normal file
3
example.mlc
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
variable:=-3; c := (a+b- 3) * 23 + variable; d := c - a;Natural : Number (n) := {n >= 0};faculty : Natural (n) -> Natural := if n = 0 then 1 else faculty (n-1) * n end;
|
||||||
|
String Natural (n) := {Character * n};hello_word -> String := "Hello World!";
|
||||||
|
first_letter -> Character := 'a';
|
||||||
79
language.toml
Normal file
79
language.toml
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
# Meta rules are separate rules with priority over all other rules.
|
||||||
|
# They can be compared to preprocessor directives, but are more powerful.
|
||||||
|
|
||||||
|
# Pattern matching in preprocessor style, is running at highest priority before anything else.
|
||||||
|
[meta.replacements]
|
||||||
|
comments = ["^--.*", ""]
|
||||||
|
|
||||||
|
# Interpolation with a shell, replaces the meta pattern by the interpolation result.
|
||||||
|
# Passing arguments is supported through groups and #<parameter number> in the shell command.
|
||||||
|
[meta.interpolation]
|
||||||
|
with = ["^#with ([\\w./]+)", "cat $1"]
|
||||||
|
date = ["#date_now", "date"]
|
||||||
|
user = ["#user", "user"]
|
||||||
|
test = ["#test", "cat ./mathlib.mlc"]
|
||||||
|
|
||||||
|
# Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer.
|
||||||
|
# All special tokens are treated as constants
|
||||||
|
[meta.token]
|
||||||
|
string_constant = "\".*?\""
|
||||||
|
char_constant = "'.'"
|
||||||
|
|
||||||
|
# Every key below is used as type in an enumerate to sort the tokens
|
||||||
|
# -> Replacement in order
|
||||||
|
# -> Every amount of other symbols is saved as some kind of value
|
||||||
|
# -> Those are using the default type "identifier"
|
||||||
|
[token]
|
||||||
|
separator = [" ", ",", "\n"]
|
||||||
|
operands = [":=", "->", "<=", ">=", "<", ">", "!", "+", "-", "/", "*", "(", ")", "[", "]", "{", "}", "=", "?", ":"]
|
||||||
|
terminator = [";"]
|
||||||
|
|
||||||
|
[semantics]
|
||||||
|
keywords = ["if", "then", "else", "end"]
|
||||||
|
|
||||||
|
[constants]
|
||||||
|
number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)"
|
||||||
|
character = "'.'"
|
||||||
|
logic = "(true|false)"
|
||||||
|
|
||||||
|
[types]
|
||||||
|
Number = "number"
|
||||||
|
Character = "character"
|
||||||
|
Type = ""
|
||||||
|
Array = "{character * number}"
|
||||||
|
Logic = "logic"
|
||||||
|
|
||||||
|
# List of rules
|
||||||
|
# Rules can be found in traces
|
||||||
|
# use better names than rule_1, rule_2, ...
|
||||||
|
# The compiler will run through all rules trying to match exactly one.
|
||||||
|
# Uses the following generic types:
|
||||||
|
# - OPERAND
|
||||||
|
# - IDENTIFIER
|
||||||
|
# - KEYWORD
|
||||||
|
# - TERMINATOR
|
||||||
|
# - OTHER (Use this type for ambiguous parts. Same as lazy .+ in regular expressions)
|
||||||
|
# Definition of custom types are possible, by creation of a rule with the same name.
|
||||||
|
# IMPORTANT: Rules are always top priority and can overwrite other types.
|
||||||
|
# Named placeholders: The character # is reserved for named placeholders. They are only valid inside a rule.
|
||||||
|
[syntax]
|
||||||
|
definition = "IDENTIFIER#1 -> IDENTIFIER#2 := OTHER#3 TERMINATOR"
|
||||||
|
definition_with_parameter = "IDENTIFIER#1 : parameter#2 -> IDENTIFIER#3 := OTHER#4 TERMINATOR"
|
||||||
|
recursion = "#basename OTHER := OTHER #basename OTHER TERMINATOR"
|
||||||
|
replace_predef = [ "IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR OTHER IDENTIFIER#1", "#1 -> OTHER := #2 TERMINATOR OTHER (#2)" ]
|
||||||
|
replace_postdef = [ "IDENTIFIER#1 OTHER TERMINATOR IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR", "#2 OTHER TERMINATOR #1 -> OTHER := #2 TERMINATOR" ]
|
||||||
|
unfold_parameter = [ ": OTHER IDENTIFIER#1 ( IDENTIFIER#2 OTHER#3 ) OTHER ->", ": OTHER #1 #2 #1 ( #3 ) OTHER ->" ]
|
||||||
|
unfold_parameter_remove_brackets = [ ": OTHER IDENTIFIER ( ) OTHER ->", ": OTHER OTHER ->" ]
|
||||||
|
parameter = ": OTHER ->"
|
||||||
|
|
||||||
|
# The following sections are used to build different output formats
|
||||||
|
# [interpreter] refers to the builtin interpreter using a minimal subset of C syntax
|
||||||
|
# The name of each section is only used to specify the actual output.
|
||||||
|
[clang]
|
||||||
|
definition = "#2 #1 () {return (#3);}"
|
||||||
|
Logic = "int"
|
||||||
|
Number = "long int"
|
||||||
|
Character = "char"
|
||||||
|
Type = "struct"
|
||||||
|
|
||||||
|
[interpreter]
|
||||||
4
mathlib.mlc
Normal file
4
mathlib.mlc
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
Sigma -> Array := {0, 1, 2};
|
||||||
|
N -> Array := {3};
|
||||||
|
P -> Array := {3 -> 012};
|
||||||
|
S -> Number := 3;
|
||||||
75
src/identification.rs
Normal file
75
src/identification.rs
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
use crate::Token;
|
||||||
|
use regex::Regex;
|
||||||
|
use toml::{Table, Value};
|
||||||
|
|
||||||
|
// Identifier
|
||||||
|
// Each Identifier is analyzed to be worked with.
|
||||||
|
pub struct Identifier {
|
||||||
|
identities: Vec<Identity>,
|
||||||
|
pub tokens: Vec<Token>,
|
||||||
|
type_configuration: Table,
|
||||||
|
constant_configuration: Table,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Identity
|
||||||
|
// The Identity of a identifier token.
|
||||||
|
struct Identity {
|
||||||
|
class: IdentityClass,
|
||||||
|
name: String,
|
||||||
|
sub_type: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum IdentityClass {
|
||||||
|
TYPE,
|
||||||
|
CONSTANT,
|
||||||
|
DEFINITION,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Identifier {
|
||||||
|
pub fn new(token: Vec<Token>) -> Identifier {
|
||||||
|
let identities: Vec<Identity> = vec![];
|
||||||
|
let new_config_type: Table = Table::new();
|
||||||
|
let new_config_constant: Table = Table::new();
|
||||||
|
Identifier {
|
||||||
|
identities: identities,
|
||||||
|
tokens: token,
|
||||||
|
type_configuration: new_config_type,
|
||||||
|
constant_configuration: new_config_constant,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_criteria_from_configuration(&mut self, complete_configuration: Table) {
|
||||||
|
let type_configuration_wrapped: &Value = complete_configuration
|
||||||
|
.get("types")
|
||||||
|
.expect("Missing section types in configuration.");
|
||||||
|
let constant_configuration_wrapped: &Value = complete_configuration
|
||||||
|
.get("constants")
|
||||||
|
.expect("Missing section constants in configuration.");
|
||||||
|
let type_configuration: Table = Table::try_from(type_configuration_wrapped)
|
||||||
|
.expect("Can't read type configuration from Value.");
|
||||||
|
let constant_configuration: Table = Table::try_from(constant_configuration_wrapped)
|
||||||
|
.expect("Can't read constant configuration from Value.");
|
||||||
|
self.type_configuration = type_configuration;
|
||||||
|
self.constant_configuration = constant_configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn identify_identifiers(&mut self) {
|
||||||
|
let tokens: &Vec<Token> = &self.tokens;
|
||||||
|
let constant_patterns: Table = self.constant_configuration.clone();
|
||||||
|
let type_names: Table = self.type_configuration.clone();
|
||||||
|
let mut identity_found = false;
|
||||||
|
|
||||||
|
for token in tokens.iter() {
|
||||||
|
if token.token_type == crate::TokenType::IDENTIFIER {
|
||||||
|
for raw_pattern in constant_patterns.iter() {
|
||||||
|
let pattern: &str = raw_pattern.1.as_str().unwrap();
|
||||||
|
let expression: Regex = Regex::new(pattern).unwrap();
|
||||||
|
// Check for constant
|
||||||
|
if expression.is_match(token.token.as_str()) {
|
||||||
|
println!("Matching! Found {:?} {:?}.", raw_pattern.0, token.token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
54
src/main.rs
54
src/main.rs
@@ -1,16 +1,52 @@
|
|||||||
mod collector;
|
mod preprocessor;
|
||||||
|
mod syntax;
|
||||||
|
mod testcases;
|
||||||
|
mod tokenizer;
|
||||||
|
|
||||||
use collector::Collector;
|
use tokenizer::*;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut _collector: Collector = Collector {
|
// CL-Wrapper
|
||||||
definitions: vec![(String::from(""), String::from(""))],
|
let args: Vec<String> = std::env::args().collect();
|
||||||
arguments: vec![(String::from(""), String::from(""))],
|
|
||||||
};
|
|
||||||
|
|
||||||
let test_string: String = String::from("(1 + 2) * 3");
|
// Adjust to following principle:
|
||||||
|
// micro [-t <target>] [-l <language.toml>] [<list of source files>]
|
||||||
|
// -t default: first found
|
||||||
|
// -l default: language.toml
|
||||||
|
//
|
||||||
|
// Either loads all source files or takes stdin input by piping code into the program
|
||||||
|
let mut raw_source_code: String = String::from("");
|
||||||
|
for i in 1..args.len() {
|
||||||
|
raw_source_code = raw_source_code
|
||||||
|
+ std::fs::read_to_string(args[i].clone())
|
||||||
|
.expect("Source file not found!")
|
||||||
|
.as_str();
|
||||||
|
}
|
||||||
|
|
||||||
let echo_string: String = _collector.eval(test_string);
|
// Load language toml
|
||||||
|
let mut meta_rules: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./language.toml");
|
||||||
|
let mut tokenizer_configuration: Tokenizer = Tokenizer::new();
|
||||||
|
tokenizer_configuration.read_configuration_from_file("./language.toml");
|
||||||
|
|
||||||
println!("Result: {}", echo_string);
|
// Run preprocessor
|
||||||
|
let preprocessed_source_code: String = meta_rules.process(raw_source_code);
|
||||||
|
|
||||||
|
// Tokenizing
|
||||||
|
tokenizer_configuration.eat(preprocessed_source_code.as_str());
|
||||||
|
tokenizer_configuration.identify_tokens();
|
||||||
|
// Reintroducing meta_tokens
|
||||||
|
for meta_token in meta_rules.special_tokens.iter() {
|
||||||
|
// Go through all tokens
|
||||||
|
for i in 0..tokenizer_configuration.tokens.len() {
|
||||||
|
if meta_token.0 == tokenizer_configuration.tokens[i].token {
|
||||||
|
tokenizer_configuration.tokens[i] = meta_token.1.clone();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Syntax resolving
|
||||||
|
|
||||||
|
// Apply translation
|
||||||
}
|
}
|
||||||
|
|||||||
188
src/preprocessor.rs
Normal file
188
src/preprocessor.rs
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
use crate::tokenizer::Token;
|
||||||
|
use regex::{Captures, Match, Regex};
|
||||||
|
use toml::{Table, Value};
|
||||||
|
|
||||||
|
// MetaRules
|
||||||
|
// Struct containing all meta rules.
|
||||||
|
pub struct MetaRules {
|
||||||
|
replacement_rules: Vec<(String, (String, String))>,
|
||||||
|
interpolation_rules: Vec<(String, (String, String))>,
|
||||||
|
token_rules: Vec<(String, String)>,
|
||||||
|
pub special_tokens: Vec<(String, Token)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation of MetaRules
|
||||||
|
// Trait implementation
|
||||||
|
impl MetaRules {
|
||||||
|
// @name new
|
||||||
|
// @return MetaRules
|
||||||
|
// @brief Create a new rule struct by reading from a configuration file.
|
||||||
|
// @param configuration_filename: &str
|
||||||
|
pub fn new(configuration_filename: &str) -> MetaRules {
|
||||||
|
let configuration_content: String = std::fs::read_to_string(configuration_filename)
|
||||||
|
.expect("[ERROR] Could not open configuration file!");
|
||||||
|
let mut replacements: Vec<(String, (String, String))> = vec![];
|
||||||
|
let mut interpolation: Vec<(String, (String, String))> = vec![];
|
||||||
|
let mut meta_token_rules: Vec<(String, String)> = vec![];
|
||||||
|
let meta_tokens: Vec<(String, Token)> = vec![];
|
||||||
|
let configuration = gtoml::parse(configuration_content.as_str())
|
||||||
|
.expect("[ERROR] TOML invalid in preprocessor!");
|
||||||
|
let configuration_unpacked: Table = Table::try_from(configuration).unwrap();
|
||||||
|
let meta_configuration: Table = match configuration_unpacked.get("meta") {
|
||||||
|
Some(config) => config.as_table().unwrap().clone(),
|
||||||
|
None => Table::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if !meta_configuration.is_empty() {
|
||||||
|
if meta_configuration.contains_key("replacements") {
|
||||||
|
println!("[INFO] Found replacement rules.");
|
||||||
|
let replacement_rules: Table = meta_configuration
|
||||||
|
.get("replacements")
|
||||||
|
.unwrap()
|
||||||
|
.as_table()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
for key in replacement_rules.keys() {
|
||||||
|
let value: Vec<Value> = replacement_rules
|
||||||
|
.get(key)
|
||||||
|
.unwrap()
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
let name: String = key.clone();
|
||||||
|
let pattern: String = value[0].as_str().unwrap().to_owned();
|
||||||
|
let replacement: String = value[1].as_str().unwrap().to_owned();
|
||||||
|
replacements.push((name, (pattern, replacement)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if meta_configuration.contains_key("interpolation") {
|
||||||
|
println!("[INFO] Found interpolation rules.");
|
||||||
|
let interpolation_rules: Table = meta_configuration
|
||||||
|
.get("interpolation")
|
||||||
|
.unwrap()
|
||||||
|
.as_table()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
for key in interpolation_rules.keys() {
|
||||||
|
let value: Vec<Value> = interpolation_rules
|
||||||
|
.get(key)
|
||||||
|
.unwrap()
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
let name: String = key.clone();
|
||||||
|
let pattern: String = value[0].as_str().unwrap().to_owned();
|
||||||
|
let cmd: &str = value[1].as_str().unwrap();
|
||||||
|
interpolation.push((name, (pattern, String::from(cmd))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if meta_configuration.contains_key("token") {
|
||||||
|
println!("[INFO] Found token rules.");
|
||||||
|
let token_rules: Table = meta_configuration
|
||||||
|
.get("token")
|
||||||
|
.unwrap()
|
||||||
|
.as_table()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
for rule in token_rules.keys() {
|
||||||
|
let pattern: String =
|
||||||
|
token_rules.get(rule).unwrap().as_str().unwrap().to_owned();
|
||||||
|
meta_token_rules.push((rule.clone(), pattern));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!("[WARNING] No meta configuration, skipping preprocessor.");
|
||||||
|
}
|
||||||
|
|
||||||
|
MetaRules {
|
||||||
|
replacement_rules: replacements,
|
||||||
|
interpolation_rules: interpolation,
|
||||||
|
token_rules: meta_token_rules,
|
||||||
|
special_tokens: meta_tokens,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name process
|
||||||
|
// @return String
|
||||||
|
// @brief Run preprocessor on raw code.
|
||||||
|
// @param rule_set: MetaRules, raw_code: String
|
||||||
|
pub fn process(&mut self, raw_code: String) -> String {
|
||||||
|
let mut processed_code: String = raw_code.clone();
|
||||||
|
|
||||||
|
// replacement rules
|
||||||
|
for rule in self.replacement_rules.iter() {
|
||||||
|
println!("[INFO] Applying rule {}", rule.0);
|
||||||
|
let base_pattern: Regex = Regex::new((rule.1 .0).as_str()).unwrap();
|
||||||
|
processed_code = base_pattern
|
||||||
|
.replace_all(processed_code.as_str(), rule.1 .1.as_str())
|
||||||
|
.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
// interpolation rules
|
||||||
|
for rule in self.interpolation_rules.iter() {
|
||||||
|
println!("[INFO] Applying rule {}", rule.0);
|
||||||
|
let base_pattern: Regex = Regex::new((rule.1 .0).as_str()).unwrap();
|
||||||
|
let processed_code_replacement = processed_code.clone();
|
||||||
|
|
||||||
|
let captures: Option<Captures> =
|
||||||
|
base_pattern.captures(processed_code_replacement.as_str());
|
||||||
|
let directive: String;
|
||||||
|
match captures {
|
||||||
|
Some(n) => directive = n.get(0).map_or("", |m| m.as_str()).to_string(),
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let command: &str = &base_pattern.replace(directive.as_str(), rule.1 .1.as_str());
|
||||||
|
let subprocess = std::process::Command::new("/bin/bash")
|
||||||
|
.arg("-c")
|
||||||
|
.arg(String::from("echo \"$(") + command + ")\"")
|
||||||
|
.output()
|
||||||
|
.expect((String::from("") + "Failed to run command " + command + "!").as_str());
|
||||||
|
processed_code = base_pattern
|
||||||
|
.replace(
|
||||||
|
processed_code.as_str(),
|
||||||
|
String::from_utf8(subprocess.stdout).unwrap(),
|
||||||
|
)
|
||||||
|
.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
for token_style in self.token_rules.iter() {
|
||||||
|
println!("[INFO] Searching meta tokens of style {}", token_style.0);
|
||||||
|
|
||||||
|
// Search all occurrences
|
||||||
|
let token_pattern: Regex =
|
||||||
|
Regex::new(token_style.1.as_str()).expect("Could not assign pattern.");
|
||||||
|
let match_list: Match;
|
||||||
|
match_list = match token_pattern.find(processed_code.as_str()) {
|
||||||
|
Some(n) => n,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create id for each occurrence
|
||||||
|
let meta_id: String = String::from("meta_token_")
|
||||||
|
+ match_list.start().to_string().as_str()
|
||||||
|
+ "__"
|
||||||
|
+ match_list.end().to_string().as_str();
|
||||||
|
|
||||||
|
// Replace token by id
|
||||||
|
let meta_value: String = match_list.as_str().to_string();
|
||||||
|
let value_regex: Regex =
|
||||||
|
Regex::new(meta_value.as_str()).expect("Could not create pattern.");
|
||||||
|
processed_code = value_regex
|
||||||
|
.replace(processed_code.as_str(), meta_id.as_str())
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Safe id and token
|
||||||
|
self.special_tokens.push((
|
||||||
|
meta_id,
|
||||||
|
Token {
|
||||||
|
token: meta_value,
|
||||||
|
token_type: crate::TokenType::IDENTIFIER,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
return processed_code;
|
||||||
|
}
|
||||||
|
}
|
||||||
31
src/structure.rs
Normal file
31
src/structure.rs
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
// HeadStructure
|
||||||
|
// Top level of structure.
|
||||||
|
pub struct HeadStructure {
|
||||||
|
token: TokenConfiguration,
|
||||||
|
syntax: SyntaxConfiguration,
|
||||||
|
semantics: SemanticsConfiguration,
|
||||||
|
types: TypesConfiguration,
|
||||||
|
hdl: HdlConfiguration,
|
||||||
|
compiled: CompiledConfiguration,
|
||||||
|
interpreter: InterpreterConfiguration,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TokenConfiguration {
|
||||||
|
separator: Vec<String>,
|
||||||
|
operands: Vec<String>,
|
||||||
|
terminator: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SyntaxConfiguration {
|
||||||
|
keywords: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SemanticsConfiguration {}
|
||||||
|
|
||||||
|
pub struct TypesConfiguration {}
|
||||||
|
|
||||||
|
pub struct HdlConfiguration {}
|
||||||
|
|
||||||
|
pub struct CompiledConfiguration {}
|
||||||
|
|
||||||
|
pub struct InterpreterConfiguration {}
|
||||||
76
src/syntax.rs
Normal file
76
src/syntax.rs
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
use toml::{Table, Value};
|
||||||
|
|
||||||
|
// SyntaxRule
|
||||||
|
// Implementation of a syntax rule that can be applied.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SyntaxRule {
|
||||||
|
pub name: String,
|
||||||
|
pub left: String,
|
||||||
|
pub right: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation of SyntaxRule
|
||||||
|
// Load and Resolve from outside
|
||||||
|
impl SyntaxRule {
|
||||||
|
// @name new
|
||||||
|
// @return SyntaxRule
|
||||||
|
// @brief Create a new syntax rule / load rule set.
|
||||||
|
// @param name_: String, left_: String, right_: String
|
||||||
|
fn new(name_: String, left_: String, right_: String) -> SyntaxRule {
|
||||||
|
SyntaxRule {
|
||||||
|
name: String::new(),
|
||||||
|
left: String::new(),
|
||||||
|
right: String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name load
|
||||||
|
// @return Vec<SyntaxRule>
|
||||||
|
// @brief Load configuration and retrieve transformation rules.
|
||||||
|
// @param configuration_filename: &str
|
||||||
|
pub fn load(configuration_filename: &str) -> Vec<SyntaxRule> {
|
||||||
|
let mut rules: Vec<SyntaxRule> = vec![];
|
||||||
|
let configuration_content: String = std::fs::read_to_string(configuration_filename)
|
||||||
|
.expect("[ERROR] Could not open configuration file!");
|
||||||
|
let configuration = gtoml::parse(configuration_content.as_str())
|
||||||
|
.expect("[ERROR] TOML invalid in preprocessor!");
|
||||||
|
let configuration_unpacked: Table = Table::try_from(configuration).unwrap();
|
||||||
|
|
||||||
|
let syntax_definitions: Table = match configuration_unpacked.get("syntax") {
|
||||||
|
Some(config) => config.as_table().unwrap().clone(),
|
||||||
|
None => Table::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
for key in syntax_definitions.keys() {
|
||||||
|
let rule: Value = syntax_definitions.get(key).unwrap().clone();
|
||||||
|
if rule.is_array() {
|
||||||
|
let rule_array = rule.as_array().unwrap();
|
||||||
|
let left: String = rule_array[0].to_string();
|
||||||
|
let right: String = rule_array[1].to_string();
|
||||||
|
rules.push(SyntaxRule {
|
||||||
|
name: key.to_string(),
|
||||||
|
left: left,
|
||||||
|
right: right,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rules
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name resolve
|
||||||
|
// @return String
|
||||||
|
// @brief Applies all rules until none of them can be applied again.
|
||||||
|
// @param rules: Vec<SyntaxRule>, unsolved: String
|
||||||
|
pub fn resolve(rules: Vec<SyntaxRule>, unsolved: String) -> String {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name transform
|
||||||
|
// @return String
|
||||||
|
// @brief Applies a rule.
|
||||||
|
// @param &mut self, unformed: String
|
||||||
|
fn transform(&mut self, unformed: String) -> String {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
138
src/testcases.rs
Normal file
138
src/testcases.rs
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
// preprocessor
|
||||||
|
#[test]
|
||||||
|
fn test_replacements() {
|
||||||
|
let mut ruleset: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./testspecs.toml");
|
||||||
|
let sut: String = ruleset.process(String::from("-- Comment to remove"));
|
||||||
|
let verify: String = String::from("");
|
||||||
|
let case_comment_at_end: String =
|
||||||
|
ruleset.process(String::from("This -- comment is not removed."));
|
||||||
|
let case_comment_at_end_verify: String = String::from("This -- comment is not removed.");
|
||||||
|
assert_eq!(sut, verify);
|
||||||
|
assert_eq!(case_comment_at_end, case_comment_at_end_verify);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_interpolation() {
|
||||||
|
let mut ruleset: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./testspecs.toml");
|
||||||
|
let run_with_interpolation_test: String = ruleset.process(String::from("#test"));
|
||||||
|
let interpolation_verification: String = std::fs::read_to_string("./mathlib.mlc").unwrap();
|
||||||
|
|
||||||
|
assert_eq!(run_with_interpolation_test, interpolation_verification);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_meta_token() {
|
||||||
|
let mut ruleset: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./testspecs.toml");
|
||||||
|
let meta_token_test_string: String = ruleset.process(String::from("\"sample\""));
|
||||||
|
let meta_token_sample_string: String = String::from("\"sample\"");
|
||||||
|
let meta_token_verify: Vec<crate::tokenizer::Token> = vec![crate::tokenizer::Token {
|
||||||
|
token: meta_token_sample_string,
|
||||||
|
token_type: crate::tokenizer::TokenType::IDENTIFIER,
|
||||||
|
}];
|
||||||
|
assert_eq!(meta_token_verify.len(), ruleset.special_tokens.len());
|
||||||
|
assert_eq!(
|
||||||
|
meta_token_verify[0].token,
|
||||||
|
ruleset.special_tokens[0].1.token
|
||||||
|
);
|
||||||
|
assert_eq!(meta_token_test_string, "meta_token_0__8");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokenizer
|
||||||
|
#[test]
|
||||||
|
fn test_eat() {
|
||||||
|
let mut sample: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new();
|
||||||
|
sample.read_configuration_from_file("./testspecs.toml");
|
||||||
|
sample.eat("faculty : Natural n := if n = 0 then 1 else n * faculty (n - 1);");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
sample.token_list,
|
||||||
|
vec![
|
||||||
|
"faculty", ":", "Natural", "n", ":=", "if", "n", "=", "0", "then", "1", "else",
|
||||||
|
"n", "*", "faculty", "(", "n", "-", "1", ")", ";"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_identify_tokens() {
|
||||||
|
let mut token_sample: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new();
|
||||||
|
token_sample.read_configuration_from_file("./testspecs.toml");
|
||||||
|
token_sample.eat("id : -> 125;");
|
||||||
|
token_sample.identify_tokens();
|
||||||
|
|
||||||
|
let mut token_verify: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new();
|
||||||
|
token_verify.read_configuration_from_file("./testspecs.toml");
|
||||||
|
token_verify.eat("id : -> 125;");
|
||||||
|
|
||||||
|
token_verify.tokens = vec![
|
||||||
|
crate::tokenizer::Token {
|
||||||
|
token: String::from("id"),
|
||||||
|
token_type: crate::tokenizer::TokenType::IDENTIFIER,
|
||||||
|
},
|
||||||
|
crate::tokenizer::Token {
|
||||||
|
token: String::from(":"),
|
||||||
|
token_type: crate::tokenizer::TokenType::OPERAND,
|
||||||
|
},
|
||||||
|
crate::tokenizer::Token {
|
||||||
|
token: String::from("->"),
|
||||||
|
token_type: crate::tokenizer::TokenType::OPERAND,
|
||||||
|
},
|
||||||
|
crate::tokenizer::Token {
|
||||||
|
token: String::from("125"),
|
||||||
|
token_type: crate::tokenizer::TokenType::IDENTIFIER,
|
||||||
|
},
|
||||||
|
crate::tokenizer::Token {
|
||||||
|
token: String::from(";"),
|
||||||
|
token_type: crate::tokenizer::TokenType::TERMINATOR,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(token_sample.configuration, token_verify.configuration);
|
||||||
|
assert_eq!(token_sample.tokens.len(), token_verify.tokens.len());
|
||||||
|
assert_eq!(token_sample.token_list.len(), token_verify.token_list.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name test_syntax_load
|
||||||
|
// @return
|
||||||
|
// @brief
|
||||||
|
// @param
|
||||||
|
#[test]
|
||||||
|
fn test_syntax_load() {
|
||||||
|
let test: Vec<crate::syntax::SyntaxRule> =
|
||||||
|
crate::syntax::SyntaxRule::load("./testspecs.toml");
|
||||||
|
let verify: Vec<crate::syntax::SyntaxRule> = vec![
|
||||||
|
crate::syntax::SyntaxRule {
|
||||||
|
name: String::from("replace_predef"),
|
||||||
|
left: String::from(
|
||||||
|
"IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR OTHER IDENTIFIER#1",
|
||||||
|
),
|
||||||
|
right: String::from("#1 -> OTHER := #2 TERMINATOR OTHER (#2)"),
|
||||||
|
},
|
||||||
|
crate::syntax::SyntaxRule {
|
||||||
|
name: String::from("replace_postdef"),
|
||||||
|
left: String::from(
|
||||||
|
"IDENTIFIER#1 OTHER TERMINATOR IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR",
|
||||||
|
),
|
||||||
|
right: String::from("#2 OTHER TERMINATOR #1 -> OTHER := #2 TERMINATOR"),
|
||||||
|
},
|
||||||
|
crate::syntax::SyntaxRule {
|
||||||
|
name: String::from("unfold_parameter"),
|
||||||
|
left: String::from(": OTHER IDENTIFIER#1 ( IDENTIFIER#2 OTHER#3 ) OTHER ->"),
|
||||||
|
right: String::from(": OTHER #1 #2 #1 ( #3 ) OTHER ->"),
|
||||||
|
},
|
||||||
|
crate::syntax::SyntaxRule {
|
||||||
|
name: String::from("unfold_parameter_remove_brackets"),
|
||||||
|
left: String::from(": OTHER IDENTIFIER ( ) OTHER ->"),
|
||||||
|
right: String::from(": OTHER OTHER ->"),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(test.len(), verify.len());
|
||||||
|
}
|
||||||
|
}
|
||||||
330
src/tokenizer.rs
Normal file
330
src/tokenizer.rs
Normal file
@@ -0,0 +1,330 @@
|
|||||||
|
use std::fs;
|
||||||
|
use toml::{Table, Value};
|
||||||
|
|
||||||
|
#[derive(PartialEq, Debug)]
|
||||||
|
pub enum TokenType {
|
||||||
|
OPERAND,
|
||||||
|
TERMINATOR,
|
||||||
|
IDENTIFIER,
|
||||||
|
KEYWORD,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokenizer
|
||||||
|
// Tokenizer and underlying functions to turn code into tokens
|
||||||
|
pub struct Tokenizer {
|
||||||
|
pub token_list: Vec<String>,
|
||||||
|
// BUG:
|
||||||
|
pub tokens: Vec<Token>,
|
||||||
|
// Grammar options from toml file
|
||||||
|
pub configuration: Table,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Token
|
||||||
|
// This is a token with a token type.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Token {
|
||||||
|
pub token: String,
|
||||||
|
pub token_type: TokenType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Token {
|
||||||
|
fn clone(&self) -> Token {
|
||||||
|
let token_type: TokenType = match self.token_type {
|
||||||
|
TokenType::OPERAND => TokenType::OPERAND,
|
||||||
|
TokenType::KEYWORD => TokenType::KEYWORD,
|
||||||
|
TokenType::TERMINATOR => TokenType::TERMINATOR,
|
||||||
|
TokenType::IDENTIFIER => TokenType::IDENTIFIER,
|
||||||
|
};
|
||||||
|
Token {
|
||||||
|
token: self.token.clone(),
|
||||||
|
token_type: token_type,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation of Tokenizer
|
||||||
|
// Functions associated with the tokenizer struct and module.
|
||||||
|
impl Tokenizer {
|
||||||
|
// @name read_configuration_from_file
|
||||||
|
// @return
|
||||||
|
// @brief Try to read configuration from an external file
|
||||||
|
// @param &mut self, configuration_filename: &str
|
||||||
|
pub fn read_configuration_from_file(&mut self, configuration_filename: &str) {
|
||||||
|
let configuration_string: String = fs::read_to_string(configuration_filename).expect(
|
||||||
|
(String::from("Could not open configuration file at: ") + configuration_filename)
|
||||||
|
.as_str(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let configuration = gtoml::parse(configuration_string.as_str()).expect("TOML invalid!");
|
||||||
|
self.configuration = Table::try_from(configuration).unwrap();
|
||||||
|
|
||||||
|
// Check for token section in config, panic if not present
|
||||||
|
if !self.configuration.contains_key("token") {
|
||||||
|
panic!("Token section is not present!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for semantics section in config, panic if not present
|
||||||
|
if !self.configuration.contains_key("semantics") {
|
||||||
|
panic!("Section semantics is not present!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name new
|
||||||
|
// @return Tokenizer
|
||||||
|
// @brief Create a new Tokenizer
|
||||||
|
// @param
|
||||||
|
pub fn new() -> Tokenizer {
|
||||||
|
let empty_tokens: Vec<Token> = vec![];
|
||||||
|
let empty_value: toml::map::Map<String, Value> = toml::map::Map::new();
|
||||||
|
let empty_token_list: Vec<String> = vec![];
|
||||||
|
Tokenizer {
|
||||||
|
tokens: empty_tokens,
|
||||||
|
token_list: empty_token_list,
|
||||||
|
configuration: empty_value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name eat
|
||||||
|
// @return
|
||||||
|
// @brief Consumes a string and safes the tokens
|
||||||
|
// @param line: &str
|
||||||
|
pub fn eat(&mut self, line: &str) {
|
||||||
|
// Get token vectors from configuration
|
||||||
|
let token_table_value: &Value = self.configuration.get("token").unwrap();
|
||||||
|
let token_table: Table = Table::try_from(token_table_value).unwrap();
|
||||||
|
let mut tokens: Vec<String> = vec![line.to_string()];
|
||||||
|
let mut new_tokens: Vec<String> = vec![];
|
||||||
|
let mut token_buffer: String = String::from("");
|
||||||
|
|
||||||
|
// Iterate over tokens in token table and split tokens.
|
||||||
|
if token_table.contains_key("separator") {
|
||||||
|
let separator: Vec<Value> = token_table
|
||||||
|
.get_key_value("separator")
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
if separator.len() > 0 {
|
||||||
|
for token in tokens.iter() {
|
||||||
|
let mut token_feed = token.clone();
|
||||||
|
while !token_feed.is_empty() {
|
||||||
|
let mut no_match: bool = true;
|
||||||
|
for sep in separator.iter() {
|
||||||
|
if token_feed.starts_with(sep.as_str().unwrap()) {
|
||||||
|
// Reset and add token
|
||||||
|
no_match = false;
|
||||||
|
if token_buffer.len() > 0 {
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
token_buffer = String::from("");
|
||||||
|
}
|
||||||
|
let new_feed: String =
|
||||||
|
token_feed.split_off(sep.as_str().unwrap().len());
|
||||||
|
token_feed = new_feed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if no_match {
|
||||||
|
let new_feed: String = token_feed.split_off(1);
|
||||||
|
token_buffer = token_buffer
|
||||||
|
+ String::from(token_feed.chars().next().unwrap()).as_str();
|
||||||
|
token_feed = new_feed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// empty token
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
token_buffer = String::from("");
|
||||||
|
}
|
||||||
|
// empty token
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
token_buffer = String::from("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokens = new_tokens.clone();
|
||||||
|
new_tokens = vec![];
|
||||||
|
if token_table.contains_key("operands") {
|
||||||
|
let operands: Vec<Value> = token_table
|
||||||
|
.get_key_value("operands")
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
if operands.len() > 0 {
|
||||||
|
for token in tokens.iter() {
|
||||||
|
let mut token_feed = token.clone();
|
||||||
|
while !token_feed.is_empty() {
|
||||||
|
let mut no_match: bool = true;
|
||||||
|
for op in operands.iter() {
|
||||||
|
if token_feed.starts_with(op.as_str().unwrap()) {
|
||||||
|
// Reset and add token
|
||||||
|
no_match = false;
|
||||||
|
if token_buffer.len() > 0 {
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
}
|
||||||
|
token_buffer = String::from("");
|
||||||
|
new_tokens.push(op.as_str().unwrap().to_string());
|
||||||
|
let new_feed: String =
|
||||||
|
token_feed.split_off(op.as_str().unwrap().len());
|
||||||
|
token_feed = new_feed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if no_match {
|
||||||
|
let new_feed: String = token_feed.split_off(1);
|
||||||
|
token_buffer = token_buffer
|
||||||
|
+ String::from(token_feed.chars().next().unwrap()).as_str();
|
||||||
|
token_feed = new_feed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// empty token
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
token_buffer = String::from("");
|
||||||
|
}
|
||||||
|
// empty token
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
token_buffer = String::from("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokens = new_tokens.clone();
|
||||||
|
new_tokens = vec![];
|
||||||
|
if token_table.contains_key("terminator") {
|
||||||
|
let terminator: Vec<Value> = token_table
|
||||||
|
.get_key_value("terminator")
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
if terminator.len() > 0 {
|
||||||
|
for token in tokens.iter() {
|
||||||
|
let mut token_feed = token.clone();
|
||||||
|
while !token_feed.is_empty() {
|
||||||
|
let mut no_match: bool = true;
|
||||||
|
for term in terminator.iter() {
|
||||||
|
if token_feed.starts_with(term.as_str().unwrap()) {
|
||||||
|
// Reset and add token
|
||||||
|
no_match = false;
|
||||||
|
if token_buffer.len() > 0 {
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
}
|
||||||
|
token_buffer = String::from("");
|
||||||
|
new_tokens.push(term.as_str().unwrap().to_string());
|
||||||
|
let new_feed: String =
|
||||||
|
token_feed.split_off(term.as_str().unwrap().len());
|
||||||
|
token_feed = new_feed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if no_match {
|
||||||
|
let new_feed: String = token_feed.split_off(1);
|
||||||
|
token_buffer = token_buffer
|
||||||
|
+ String::from(token_feed.chars().next().unwrap()).as_str();
|
||||||
|
token_feed = new_feed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// empty token as token ended
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
token_buffer = String::from("");
|
||||||
|
}
|
||||||
|
// empty token
|
||||||
|
new_tokens.push(token_buffer.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.token_list.append(&mut new_tokens);
|
||||||
|
|
||||||
|
// Clean up token list
|
||||||
|
let mut cleaned_token_list: Vec<String> = vec![];
|
||||||
|
for token in self.token_list.iter() {
|
||||||
|
if token.as_str() != "" {
|
||||||
|
cleaned_token_list.push(token.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.token_list = cleaned_token_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @name identify_tokens
|
||||||
|
// @return
|
||||||
|
// @brief Go through all tokens and try to find them.
|
||||||
|
// @param &mut self
|
||||||
|
pub fn identify_tokens(&mut self) {
|
||||||
|
// Go through token list
|
||||||
|
let mut token_identities: Vec<Token> = vec![];
|
||||||
|
let mut found_token: bool;
|
||||||
|
let token_section: Table =
|
||||||
|
Table::try_from(self.configuration.get("token").unwrap()).unwrap();
|
||||||
|
let semantics_section: Table =
|
||||||
|
Table::try_from(self.configuration.get("semantics").unwrap()).unwrap();
|
||||||
|
|
||||||
|
for token in self.token_list.iter() {
|
||||||
|
found_token = false;
|
||||||
|
|
||||||
|
if token.as_str() == "" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if token is an operand
|
||||||
|
if token_section.contains_key("operands") {
|
||||||
|
let operands: Vec<Value> = token_section
|
||||||
|
.get_key_value("operands")
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
for operand in operands.iter() {
|
||||||
|
if operand.as_str().unwrap() == token.as_str() {
|
||||||
|
token_identities.push(Token {
|
||||||
|
token: token.clone(),
|
||||||
|
token_type: TokenType::OPERAND,
|
||||||
|
});
|
||||||
|
found_token = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if token_section.contains_key("terminator") && !found_token {
|
||||||
|
let terminator: Vec<Value> = token_section
|
||||||
|
.get_key_value("terminator")
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
for term in terminator.iter() {
|
||||||
|
if term.as_str().unwrap() == token.as_str() {
|
||||||
|
token_identities.push(Token {
|
||||||
|
token: token.clone(),
|
||||||
|
token_type: TokenType::TERMINATOR,
|
||||||
|
});
|
||||||
|
found_token = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if semantics_section.contains_key("keywords") && !found_token {
|
||||||
|
let keywords: Vec<Value> = semantics_section
|
||||||
|
.get_key_value("keywords")
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
for keyword in keywords.iter() {
|
||||||
|
if keyword.as_str().unwrap() == token.as_str() {
|
||||||
|
token_identities.push(Token {
|
||||||
|
token: token.clone(),
|
||||||
|
token_type: TokenType::KEYWORD,
|
||||||
|
});
|
||||||
|
found_token = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found_token {
|
||||||
|
token_identities.push(Token {
|
||||||
|
token: token.clone(),
|
||||||
|
token_type: TokenType::IDENTIFIER,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.tokens = token_identities;
|
||||||
|
}
|
||||||
|
}
|
||||||
79
testspecs.toml
Normal file
79
testspecs.toml
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
# Meta rules are separate rules with priority over all other rules.
|
||||||
|
# They can be compared to preprocessor directives, but are more powerful.
|
||||||
|
|
||||||
|
# Pattern matching in preprocessor style, is running at highest priority before anything else.
|
||||||
|
[meta.replacements]
|
||||||
|
comments = ["^--.*", ""]
|
||||||
|
|
||||||
|
# Interpolation with a shell, replaces the meta pattern by the interpolation result.
|
||||||
|
# Passing arguments is supported through groups and #<parameter number> in the shell command.
|
||||||
|
[meta.interpolation]
|
||||||
|
with = ["^#with ([\\w./]+)", "cat $1"]
|
||||||
|
date = ["#date_now", "date"]
|
||||||
|
user = ["#user", "user"]
|
||||||
|
test = ["#test", "cat ./mathlib.mlc"]
|
||||||
|
|
||||||
|
# Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer.
|
||||||
|
# All special tokens are treated as constants
|
||||||
|
[meta.token]
|
||||||
|
string_constant = "\".*?\""
|
||||||
|
char_constant = "'.'"
|
||||||
|
|
||||||
|
# Every key below is used as type in an enumerate to sort the tokens
|
||||||
|
# -> Replacement in order
|
||||||
|
# -> Every amount of other symbols is saved as some kind of value
|
||||||
|
# -> Those are using the default type "identifier"
|
||||||
|
[token]
|
||||||
|
separator = [" ", ",", "\n"]
|
||||||
|
operands = [":=", "->", "<=", ">=", "<", ">", "!", "+", "-", "/", "*", "(", ")", "[", "]", "{", "}", "=", "?", ":"]
|
||||||
|
terminator = [";"]
|
||||||
|
|
||||||
|
[semantics]
|
||||||
|
keywords = ["if", "then", "else", "end"]
|
||||||
|
|
||||||
|
[constants]
|
||||||
|
number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)"
|
||||||
|
character = "'.'"
|
||||||
|
logic = "(true|false)"
|
||||||
|
|
||||||
|
[types]
|
||||||
|
Number = "number"
|
||||||
|
Character = "character"
|
||||||
|
Type = ""
|
||||||
|
Array = "{character * number}"
|
||||||
|
Logic = "logic"
|
||||||
|
|
||||||
|
# List of rules
|
||||||
|
# Rules can be found in traces
|
||||||
|
# use better names than rule_1, rule_2, ...
|
||||||
|
# The compiler will run through all rules trying to match exactly one.
|
||||||
|
# Uses the following generic types:
|
||||||
|
# - OPERAND
|
||||||
|
# - IDENTIFIER
|
||||||
|
# - KEYWORD
|
||||||
|
# - TERMINATOR
|
||||||
|
# - OTHER (Use this type for ambiguous parts. Same as lazy .+ in regular expressions)
|
||||||
|
# Definition of custom types are possible, by creation of a rule with the same name.
|
||||||
|
# IMPORTANT: Rules are always top priority and can overwrite other types.
|
||||||
|
# Named placeholders: The character # is reserved for named placeholders. They are only valid inside a rule.
|
||||||
|
[syntax]
|
||||||
|
definition = "IDENTIFIER#1 -> IDENTIFIER#2 := OTHER#3 TERMINATOR"
|
||||||
|
definition_with_parameter = "IDENTIFIER#1 : parameter#2 -> IDENTIFIER#3 := OTHER#4 TERMINATOR"
|
||||||
|
recursion = "#basename OTHER := OTHER #basename OTHER TERMINATOR"
|
||||||
|
replace_predef = [ "IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR OTHER IDENTIFIER#1", "#1 -> OTHER := #2 TERMINATOR OTHER (#2)" ]
|
||||||
|
replace_postdef = [ "IDENTIFIER#1 OTHER TERMINATOR IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR", "#2 OTHER TERMINATOR #1 -> OTHER := #2 TERMINATOR" ]
|
||||||
|
unfold_parameter = [ ": OTHER IDENTIFIER#1 ( IDENTIFIER#2 OTHER#3 ) OTHER ->", ": OTHER #1 #2 #1 ( #3 ) OTHER ->" ]
|
||||||
|
unfold_parameter_remove_brackets = [ ": OTHER IDENTIFIER ( ) OTHER ->", ": OTHER OTHER ->" ]
|
||||||
|
parameter = ": OTHER ->"
|
||||||
|
|
||||||
|
# The following sections are used to build different output formats
|
||||||
|
# [interpreter] refers to the builtin interpreter using a minimal subset of C syntax
|
||||||
|
# The name of each section is only used to specify the actual output.
|
||||||
|
[clang]
|
||||||
|
definition = "#2 #1 () {return (#3);}"
|
||||||
|
Logic = "int"
|
||||||
|
Number = "long int"
|
||||||
|
Character = "char"
|
||||||
|
Type = "struct"
|
||||||
|
|
||||||
|
[interpreter]
|
||||||
Reference in New Issue
Block a user