implement meta token replacement
This commit is contained in:
parent
ddba3423df
commit
015de5dc0a
|
@ -1,3 +1,4 @@
|
||||||
|
#with mathlib.mlc
|
||||||
variable:=-3; c := (a+b- 3) * 23 + variable; d := c - a;Natural : Number (n) := {n >= 0};faculty : Natural (n) -> Natural := if n = 0 then 1 else faculty (n-1) * n end;
|
variable:=-3; c := (a+b- 3) * 23 + variable; d := c - a;Natural : Number (n) := {n >= 0};faculty : Natural (n) -> Natural := if n = 0 then 1 else faculty (n-1) * n end;
|
||||||
String Natural (n) := {Character * n};hello_word -> String := "Hello World!";
|
String Natural (n) := {Character * n};hello_word -> String := "Hello World!";
|
||||||
first_letter -> Character := 'a';
|
first_letter -> Character := 'a';
|
||||||
|
|
|
@ -11,6 +11,7 @@ comments = ["^--.*", ""]
|
||||||
with = ["^#with ([\\w./]+)", "cat $1"]
|
with = ["^#with ([\\w./]+)", "cat $1"]
|
||||||
date = ["#date_now", "date"]
|
date = ["#date_now", "date"]
|
||||||
user = ["#user", "user"]
|
user = ["#user", "user"]
|
||||||
|
test = ["#test", "cat ./mathlib.mlc"]
|
||||||
|
|
||||||
# Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer.
|
# Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer.
|
||||||
# All special tokens are treated as constants
|
# All special tokens are treated as constants
|
||||||
|
@ -28,7 +29,7 @@ operands = [":=", "->", "<=", ">=", "<", ">", "!", "+", "-", "/", "*", "(", ")",
|
||||||
terminator = [";"]
|
terminator = [";"]
|
||||||
|
|
||||||
[semantics]
|
[semantics]
|
||||||
keywords = ["if", "then", "else", "end", "with"]
|
keywords = ["if", "then", "else", "end"]
|
||||||
|
|
||||||
[constants]
|
[constants]
|
||||||
number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)"
|
number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)"
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Sigma -> Array := {0, 1, 2};
|
||||||
|
N -> Array := {3};
|
||||||
|
P -> Array := {3 -> 012};
|
||||||
|
S -> Number := 3;
|
|
@ -6,15 +6,24 @@ mod tokenizer;
|
||||||
use tokenizer::*;
|
use tokenizer::*;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
// Preprocessor
|
||||||
let sample_code: String = std::fs::read_to_string("example.mlc").unwrap();
|
let sample_code: String = std::fs::read_to_string("example.mlc").unwrap();
|
||||||
let mut example_tokenizer: Tokenizer = Tokenizer::new();
|
let mut example_tokenizer: Tokenizer = Tokenizer::new();
|
||||||
let mut meta_rules: crate::preprocessor::MetaRules =
|
let mut meta_rules: crate::preprocessor::MetaRules =
|
||||||
crate::preprocessor::MetaRules::new("./language.toml");
|
crate::preprocessor::MetaRules::new("./language.toml");
|
||||||
let processed_sample_code: String = meta_rules.process(sample_code.to_owned());
|
let processed_sample_code: String = meta_rules.process(sample_code.to_owned());
|
||||||
|
|
||||||
|
// tokenizing
|
||||||
example_tokenizer.read_configuration_from_file("./language.toml");
|
example_tokenizer.read_configuration_from_file("./language.toml");
|
||||||
example_tokenizer.eat(processed_sample_code.as_str());
|
example_tokenizer.eat(processed_sample_code.as_str());
|
||||||
example_tokenizer.identify_tokens();
|
example_tokenizer.identify_tokens();
|
||||||
|
// Insert meta tokens into token list
|
||||||
|
let mut token_index: usize = 0;
|
||||||
|
for meta_token in meta_rules.special_tokens.iter() {
|
||||||
|
println!("Token: {:?}", meta_token.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Semantic analysis
|
||||||
let mut example_identifier: identification::Identifier =
|
let mut example_identifier: identification::Identifier =
|
||||||
identification::Identifier::new(example_tokenizer.tokens);
|
identification::Identifier::new(example_tokenizer.tokens);
|
||||||
example_identifier.load_criteria_from_configuration(example_tokenizer.configuration);
|
example_identifier.load_criteria_from_configuration(example_tokenizer.configuration);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::tokenizer::Token;
|
use crate::tokenizer::Token;
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Match, Regex};
|
||||||
use toml::{Table, Value};
|
use toml::{Table, Value};
|
||||||
|
|
||||||
// MetaRules
|
// MetaRules
|
||||||
|
@ -8,7 +8,7 @@ pub struct MetaRules {
|
||||||
replacement_rules: Vec<(String, (String, String))>,
|
replacement_rules: Vec<(String, (String, String))>,
|
||||||
interpolation_rules: Vec<(String, (String, String))>,
|
interpolation_rules: Vec<(String, (String, String))>,
|
||||||
token_rules: Vec<(String, String)>,
|
token_rules: Vec<(String, String)>,
|
||||||
special_tokens: Vec<Token>,
|
pub special_tokens: Vec<(String, Token)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implementation of MetaRules
|
// Implementation of MetaRules
|
||||||
|
@ -24,7 +24,7 @@ impl MetaRules {
|
||||||
let mut replacements: Vec<(String, (String, String))> = vec![];
|
let mut replacements: Vec<(String, (String, String))> = vec![];
|
||||||
let mut interpolation: Vec<(String, (String, String))> = vec![];
|
let mut interpolation: Vec<(String, (String, String))> = vec![];
|
||||||
let mut meta_token_rules: Vec<(String, String)> = vec![];
|
let mut meta_token_rules: Vec<(String, String)> = vec![];
|
||||||
let meta_tokens: Vec<Token> = vec![];
|
let meta_tokens: Vec<(String, Token)> = vec![];
|
||||||
let configuration = gtoml::parse(configuration_content.as_str())
|
let configuration = gtoml::parse(configuration_content.as_str())
|
||||||
.expect("[ERROR] TOML invalid in preprocessor!");
|
.expect("[ERROR] TOML invalid in preprocessor!");
|
||||||
let configuration_unpacked: Table = Table::try_from(configuration).unwrap();
|
let configuration_unpacked: Table = Table::try_from(configuration).unwrap();
|
||||||
|
@ -125,22 +125,15 @@ impl MetaRules {
|
||||||
println!("[INFO] Applying rule {}", rule.0);
|
println!("[INFO] Applying rule {}", rule.0);
|
||||||
let base_pattern: Regex = Regex::new((rule.1 .0).as_str()).unwrap();
|
let base_pattern: Regex = Regex::new((rule.1 .0).as_str()).unwrap();
|
||||||
let processed_code_replacement = processed_code.clone();
|
let processed_code_replacement = processed_code.clone();
|
||||||
let parameter = if &base_pattern
|
|
||||||
.captures(processed_code_replacement.as_str())
|
let captures: Option<Captures> =
|
||||||
.unwrap()
|
base_pattern.captures(processed_code_replacement.as_str());
|
||||||
.len()
|
let directive: String;
|
||||||
> 0
|
match captures {
|
||||||
{
|
Some(n) => directive = n.get(0).map_or("", |m| m.as_str()).to_string(),
|
||||||
&base_pattern
|
None => continue,
|
||||||
.captures(processed_code_replacement.as_str())
|
|
||||||
.unwrap()[0]
|
|
||||||
} else {
|
|
||||||
&base_pattern
|
|
||||||
.captures(processed_code_replacement.as_str())
|
|
||||||
.unwrap()
|
|
||||||
};
|
};
|
||||||
let command: &str = &base_pattern.replace(parameter, rule.1 .1.as_str());
|
let command: &str = &base_pattern.replace(directive.as_str(), rule.1 .1.as_str());
|
||||||
println!("{:?}", &command);
|
|
||||||
let subprocess = std::process::Command::new("/bin/bash")
|
let subprocess = std::process::Command::new("/bin/bash")
|
||||||
.arg("-c")
|
.arg("-c")
|
||||||
.arg(String::from("echo \"$(") + command + ")\"")
|
.arg(String::from("echo \"$(") + command + ")\"")
|
||||||
|
@ -154,6 +147,43 @@ impl MetaRules {
|
||||||
.to_string();
|
.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for token_style in self.token_rules.iter() {
|
||||||
|
println!("[INFO] Searching meta tokens of style {}", token_style.0);
|
||||||
|
|
||||||
|
// Search all occurrences
|
||||||
|
let token_pattern: Regex =
|
||||||
|
Regex::new(token_style.1.as_str()).expect("Could not assign pattern.");
|
||||||
|
let match_list: Match;
|
||||||
|
match_list = match token_pattern.find(processed_code.as_str()) {
|
||||||
|
Some(n) => n,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create id for each occurrence
|
||||||
|
let meta_id: String = String::from("meta_token_")
|
||||||
|
+ match_list.start().to_string().as_str()
|
||||||
|
+ "__"
|
||||||
|
+ match_list.end().to_string().as_str();
|
||||||
|
|
||||||
|
// Replace token by id
|
||||||
|
let meta_value: String = match_list.as_str().to_string();
|
||||||
|
let value_regex: Regex =
|
||||||
|
Regex::new(meta_value.as_str()).expect("Could not create pattern.");
|
||||||
|
processed_code = value_regex
|
||||||
|
.replace(processed_code.as_str(), meta_id.as_str())
|
||||||
|
.to_string();
|
||||||
|
println!("Replace {} with {}.", meta_value, meta_id);
|
||||||
|
|
||||||
|
// Safe id and token
|
||||||
|
self.special_tokens.push((
|
||||||
|
meta_id,
|
||||||
|
Token {
|
||||||
|
token: meta_value,
|
||||||
|
token_type: crate::TokenType::IDENTIFIER,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
return processed_code;
|
return processed_code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,61 @@
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
|
// preprocessor
|
||||||
|
#[test]
|
||||||
|
fn test_replacements() {
|
||||||
|
let mut ruleset: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./language.toml");
|
||||||
|
let sut: String = ruleset.process(String::from("-- Comment to remove"));
|
||||||
|
let verify: String = String::from("");
|
||||||
|
let case_comment_at_end: String =
|
||||||
|
ruleset.process(String::from("This -- comment is not removed."));
|
||||||
|
let case_comment_at_end_verify: String = String::from("This -- comment is not removed.");
|
||||||
|
assert_eq!(sut, verify);
|
||||||
|
assert_eq!(case_comment_at_end, case_comment_at_end_verify);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn dummy_test() {
|
fn test_interpolation() {
|
||||||
assert_eq!(2, 2);
|
let mut ruleset: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./language.toml");
|
||||||
|
let run_with_interpolation_test: String = ruleset.process(String::from("#test"));
|
||||||
|
let interpolation_verification: String = std::fs::read_to_string("./mathlib.mlc").unwrap();
|
||||||
|
|
||||||
|
assert_eq!(run_with_interpolation_test, interpolation_verification);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_meta_token() {
|
||||||
|
let mut ruleset: crate::preprocessor::MetaRules =
|
||||||
|
crate::preprocessor::MetaRules::new("./language.toml");
|
||||||
|
let meta_token_test_string: String = ruleset.process(String::from("\"sample\""));
|
||||||
|
let meta_token_sample_string: String = String::from("\"sample\"");
|
||||||
|
let meta_token_verify: Vec<crate::tokenizer::Token> = vec![crate::tokenizer::Token {
|
||||||
|
token: meta_token_sample_string,
|
||||||
|
token_type: crate::tokenizer::TokenType::IDENTIFIER,
|
||||||
|
}];
|
||||||
|
assert_eq!(meta_token_verify.len(), ruleset.special_tokens.len());
|
||||||
|
assert_eq!(
|
||||||
|
meta_token_verify[0].token,
|
||||||
|
ruleset.special_tokens[0].1.token
|
||||||
|
);
|
||||||
|
assert_eq!(meta_token_test_string, "meta_token_0__8");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokenizer
|
||||||
|
#[test]
|
||||||
|
fn test_eat() {
|
||||||
|
let mut sample: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new();
|
||||||
|
sample.read_configuration_from_file("./language.toml");
|
||||||
|
sample.eat("faculty : Natural n := if n = 0 then 1 else n * faculty (n - 1);");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
sample.token_list,
|
||||||
|
vec![
|
||||||
|
"faculty", ":", "Natural", "n", ":=", "if", "n", "=", "0", "then", "1", "else",
|
||||||
|
"n", "*", "faculty", "(", "n", "-", "1", ")", ";"
|
||||||
|
]
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -212,6 +212,15 @@ impl Tokenizer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.token_list.append(&mut new_tokens);
|
self.token_list.append(&mut new_tokens);
|
||||||
|
|
||||||
|
// Clean up token list
|
||||||
|
let mut cleaned_token_list: Vec<String> = vec![];
|
||||||
|
for token in self.token_list.iter() {
|
||||||
|
if token.as_str() != "" {
|
||||||
|
cleaned_token_list.push(token.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.token_list = cleaned_token_list;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @name identify_tokens
|
// @name identify_tokens
|
||||||
|
|
Loading…
Reference in New Issue