diff --git a/example.mlc b/example.mlc index e0c3385..f1776a4 100644 --- a/example.mlc +++ b/example.mlc @@ -1,3 +1,4 @@ +#with mathlib.mlc variable:=-3; c := (a+b- 3) * 23 + variable; d := c - a;Natural : Number (n) := {n >= 0};faculty : Natural (n) -> Natural := if n = 0 then 1 else faculty (n-1) * n end; String Natural (n) := {Character * n};hello_word -> String := "Hello World!"; first_letter -> Character := 'a'; diff --git a/language.toml b/language.toml index 43c5085..559e2d5 100644 --- a/language.toml +++ b/language.toml @@ -11,6 +11,7 @@ comments = ["^--.*", ""] with = ["^#with ([\\w./]+)", "cat $1"] date = ["#date_now", "date"] user = ["#user", "user"] +test = ["#test", "cat ./mathlib.mlc"] # Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer. # All special tokens are treated as constants @@ -28,7 +29,7 @@ operands = [":=", "->", "<=", ">=", "<", ">", "!", "+", "-", "/", "*", "(", ")", terminator = [";"] [semantics] -keywords = ["if", "then", "else", "end", "with"] +keywords = ["if", "then", "else", "end"] [constants] number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)" diff --git a/mathlib.mlc b/mathlib.mlc new file mode 100644 index 0000000..0ff0e56 --- /dev/null +++ b/mathlib.mlc @@ -0,0 +1,4 @@ +Sigma -> Array := {0, 1, 2}; +N -> Array := {3}; +P -> Array := {3 -> 012}; +S -> Number := 3; diff --git a/src/main.rs b/src/main.rs index a9fa082..20c7894 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,15 +6,24 @@ mod tokenizer; use tokenizer::*; fn main() { + // Preprocessor let sample_code: String = std::fs::read_to_string("example.mlc").unwrap(); let mut example_tokenizer: Tokenizer = Tokenizer::new(); let mut meta_rules: crate::preprocessor::MetaRules = crate::preprocessor::MetaRules::new("./language.toml"); let processed_sample_code: String = meta_rules.process(sample_code.to_owned()); + + // tokenizing example_tokenizer.read_configuration_from_file("./language.toml"); example_tokenizer.eat(processed_sample_code.as_str()); example_tokenizer.identify_tokens(); + // Insert meta tokens into token list + let mut token_index: usize = 0; + for meta_token in meta_rules.special_tokens.iter() { + println!("Token: {:?}", meta_token.0); + } + // Semantic analysis let mut example_identifier: identification::Identifier = identification::Identifier::new(example_tokenizer.tokens); example_identifier.load_criteria_from_configuration(example_tokenizer.configuration); diff --git a/src/preprocessor.rs b/src/preprocessor.rs index e6644fc..e8cbdc6 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -1,5 +1,5 @@ use crate::tokenizer::Token; -use regex::{Captures, Regex}; +use regex::{Captures, Match, Regex}; use toml::{Table, Value}; // MetaRules @@ -8,7 +8,7 @@ pub struct MetaRules { replacement_rules: Vec<(String, (String, String))>, interpolation_rules: Vec<(String, (String, String))>, token_rules: Vec<(String, String)>, - special_tokens: Vec, + pub special_tokens: Vec<(String, Token)>, } // Implementation of MetaRules @@ -24,7 +24,7 @@ impl MetaRules { let mut replacements: Vec<(String, (String, String))> = vec![]; let mut interpolation: Vec<(String, (String, String))> = vec![]; let mut meta_token_rules: Vec<(String, String)> = vec![]; - let meta_tokens: Vec = vec![]; + let meta_tokens: Vec<(String, Token)> = vec![]; let configuration = gtoml::parse(configuration_content.as_str()) .expect("[ERROR] TOML invalid in preprocessor!"); let configuration_unpacked: Table = Table::try_from(configuration).unwrap(); @@ -125,22 +125,15 @@ impl MetaRules { println!("[INFO] Applying rule {}", rule.0); let base_pattern: Regex = Regex::new((rule.1 .0).as_str()).unwrap(); let processed_code_replacement = processed_code.clone(); - let parameter = if &base_pattern - .captures(processed_code_replacement.as_str()) - .unwrap() - .len() - > 0 - { - &base_pattern - .captures(processed_code_replacement.as_str()) - .unwrap()[0] - } else { - &base_pattern - .captures(processed_code_replacement.as_str()) - .unwrap() + + let captures: Option = + base_pattern.captures(processed_code_replacement.as_str()); + let directive: String; + match captures { + Some(n) => directive = n.get(0).map_or("", |m| m.as_str()).to_string(), + None => continue, }; - let command: &str = &base_pattern.replace(parameter, rule.1 .1.as_str()); - println!("{:?}", &command); + let command: &str = &base_pattern.replace(directive.as_str(), rule.1 .1.as_str()); let subprocess = std::process::Command::new("/bin/bash") .arg("-c") .arg(String::from("echo \"$(") + command + ")\"") @@ -154,6 +147,43 @@ impl MetaRules { .to_string(); } + for token_style in self.token_rules.iter() { + println!("[INFO] Searching meta tokens of style {}", token_style.0); + + // Search all occurrences + let token_pattern: Regex = + Regex::new(token_style.1.as_str()).expect("Could not assign pattern."); + let match_list: Match; + match_list = match token_pattern.find(processed_code.as_str()) { + Some(n) => n, + None => continue, + }; + + // Create id for each occurrence + let meta_id: String = String::from("meta_token_") + + match_list.start().to_string().as_str() + + "__" + + match_list.end().to_string().as_str(); + + // Replace token by id + let meta_value: String = match_list.as_str().to_string(); + let value_regex: Regex = + Regex::new(meta_value.as_str()).expect("Could not create pattern."); + processed_code = value_regex + .replace(processed_code.as_str(), meta_id.as_str()) + .to_string(); + println!("Replace {} with {}.", meta_value, meta_id); + + // Safe id and token + self.special_tokens.push(( + meta_id, + Token { + token: meta_value, + token_type: crate::TokenType::IDENTIFIER, + }, + )); + } + return processed_code; } } diff --git a/src/testcases.rs b/src/testcases.rs index 092d231..0986760 100644 --- a/src/testcases.rs +++ b/src/testcases.rs @@ -1,9 +1,61 @@ #[cfg(test)] mod tests { - use super::*; + + // preprocessor + #[test] + fn test_replacements() { + let mut ruleset: crate::preprocessor::MetaRules = + crate::preprocessor::MetaRules::new("./language.toml"); + let sut: String = ruleset.process(String::from("-- Comment to remove")); + let verify: String = String::from(""); + let case_comment_at_end: String = + ruleset.process(String::from("This -- comment is not removed.")); + let case_comment_at_end_verify: String = String::from("This -- comment is not removed."); + assert_eq!(sut, verify); + assert_eq!(case_comment_at_end, case_comment_at_end_verify); + } #[test] - fn dummy_test() { - assert_eq!(2, 2); + fn test_interpolation() { + let mut ruleset: crate::preprocessor::MetaRules = + crate::preprocessor::MetaRules::new("./language.toml"); + let run_with_interpolation_test: String = ruleset.process(String::from("#test")); + let interpolation_verification: String = std::fs::read_to_string("./mathlib.mlc").unwrap(); + + assert_eq!(run_with_interpolation_test, interpolation_verification); + } + + #[test] + fn test_meta_token() { + let mut ruleset: crate::preprocessor::MetaRules = + crate::preprocessor::MetaRules::new("./language.toml"); + let meta_token_test_string: String = ruleset.process(String::from("\"sample\"")); + let meta_token_sample_string: String = String::from("\"sample\""); + let meta_token_verify: Vec = vec![crate::tokenizer::Token { + token: meta_token_sample_string, + token_type: crate::tokenizer::TokenType::IDENTIFIER, + }]; + assert_eq!(meta_token_verify.len(), ruleset.special_tokens.len()); + assert_eq!( + meta_token_verify[0].token, + ruleset.special_tokens[0].1.token + ); + assert_eq!(meta_token_test_string, "meta_token_0__8"); + } + + // Tokenizer + #[test] + fn test_eat() { + let mut sample: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new(); + sample.read_configuration_from_file("./language.toml"); + sample.eat("faculty : Natural n := if n = 0 then 1 else n * faculty (n - 1);"); + + assert_eq!( + sample.token_list, + vec![ + "faculty", ":", "Natural", "n", ":=", "if", "n", "=", "0", "then", "1", "else", + "n", "*", "faculty", "(", "n", "-", "1", ")", ";" + ] + ) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 598e8be..db28f33 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -212,6 +212,15 @@ impl Tokenizer { } } self.token_list.append(&mut new_tokens); + + // Clean up token list + let mut cleaned_token_list: Vec = vec![]; + for token in self.token_list.iter() { + if token.as_str() != "" { + cleaned_token_list.push(token.to_string()); + } + } + self.token_list = cleaned_token_list; } // @name identify_tokens