From f67c79c65bca9c1a29741dea7c0d273d9d2cd4e5 Mon Sep 17 00:00:00 2001 From: yannickreiss Date: Mon, 25 Aug 2025 12:09:54 +0200 Subject: [PATCH] Add testbench --- src/testcases.rs | 8 ++--- testspecs.toml | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 testspecs.toml diff --git a/src/testcases.rs b/src/testcases.rs index 0986760..2f76581 100644 --- a/src/testcases.rs +++ b/src/testcases.rs @@ -5,7 +5,7 @@ mod tests { #[test] fn test_replacements() { let mut ruleset: crate::preprocessor::MetaRules = - crate::preprocessor::MetaRules::new("./language.toml"); + crate::preprocessor::MetaRules::new("./testspecs.toml"); let sut: String = ruleset.process(String::from("-- Comment to remove")); let verify: String = String::from(""); let case_comment_at_end: String = @@ -18,7 +18,7 @@ mod tests { #[test] fn test_interpolation() { let mut ruleset: crate::preprocessor::MetaRules = - crate::preprocessor::MetaRules::new("./language.toml"); + crate::preprocessor::MetaRules::new("./testspecs.toml"); let run_with_interpolation_test: String = ruleset.process(String::from("#test")); let interpolation_verification: String = std::fs::read_to_string("./mathlib.mlc").unwrap(); @@ -28,7 +28,7 @@ mod tests { #[test] fn test_meta_token() { let mut ruleset: crate::preprocessor::MetaRules = - crate::preprocessor::MetaRules::new("./language.toml"); + crate::preprocessor::MetaRules::new("./testspecs.toml"); let meta_token_test_string: String = ruleset.process(String::from("\"sample\"")); let meta_token_sample_string: String = String::from("\"sample\""); let meta_token_verify: Vec = vec![crate::tokenizer::Token { @@ -47,7 +47,7 @@ mod tests { #[test] fn test_eat() { let mut sample: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new(); - sample.read_configuration_from_file("./language.toml"); + sample.read_configuration_from_file("./testspecs.toml"); sample.eat("faculty : Natural n := if n = 0 then 1 else n * faculty (n - 1);"); assert_eq!( diff --git a/testspecs.toml b/testspecs.toml new file mode 100644 index 0000000..559e2d5 --- /dev/null +++ b/testspecs.toml @@ -0,0 +1,79 @@ +# Meta rules are separate rules with priority over all other rules. +# They can be compared to preprocessor directives, but are more powerful. + +# Pattern matching in preprocessor style, is running at highest priority before anything else. +[meta.replacements] +comments = ["^--.*", ""] + +# Interpolation with a shell, replaces the meta pattern by the interpolation result. +# Passing arguments is supported through groups and # in the shell command. +[meta.interpolation] +with = ["^#with ([\\w./]+)", "cat $1"] +date = ["#date_now", "date"] +user = ["#user", "user"] +test = ["#test", "cat ./mathlib.mlc"] + +# Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer. +# All special tokens are treated as constants +[meta.token] +string_constant = "\".*?\"" +char_constant = "'.'" + +# Every key below is used as type in an enumerate to sort the tokens +# -> Replacement in order +# -> Every amount of other symbols is saved as some kind of value +# -> Those are using the default type "identifier" +[token] +separator = [" ", ",", "\n"] +operands = [":=", "->", "<=", ">=", "<", ">", "!", "+", "-", "/", "*", "(", ")", "[", "]", "{", "}", "=", "?", ":"] +terminator = [";"] + +[semantics] +keywords = ["if", "then", "else", "end"] + +[constants] +number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)" +character = "'.'" +logic = "(true|false)" + +[types] +Number = "number" +Character = "character" +Type = "" +Array = "{character * number}" +Logic = "logic" + +# List of rules +# Rules can be found in traces +# use better names than rule_1, rule_2, ... +# The compiler will run through all rules trying to match exactly one. +# Uses the following generic types: +# - OPERAND +# - IDENTIFIER +# - KEYWORD +# - TERMINATOR +# - OTHER (Use this type for ambiguous parts. Same as lazy .+ in regular expressions) +# Definition of custom types are possible, by creation of a rule with the same name. +# IMPORTANT: Rules are always top priority and can overwrite other types. +# Named placeholders: The character # is reserved for named placeholders. They are only valid inside a rule. +[syntax] +definition = "IDENTIFIER#1 -> IDENTIFIER#2 := OTHER#3 TERMINATOR" +definition_with_parameter = "IDENTIFIER#1 : parameter#2 -> IDENTIFIER#3 := OTHER#4 TERMINATOR" +recursion = "#basename OTHER := OTHER #basename OTHER TERMINATOR" +replace_predef = [ "IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR OTHER IDENTIFIER#1", "#1 -> OTHER := #2 TERMINATOR OTHER (#2)" ] +replace_postdef = [ "IDENTIFIER#1 OTHER TERMINATOR IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR", "#2 OTHER TERMINATOR #1 -> OTHER := #2 TERMINATOR" ] +unfold_parameter = [ ": OTHER IDENTIFIER#1 ( IDENTIFIER#2 OTHER#3 ) OTHER ->", ": OTHER #1 #2 #1 ( #3 ) OTHER ->" ] +unfold_parameter_remove_brackets = [ ": OTHER IDENTIFIER ( ) OTHER ->", ": OTHER OTHER ->" ] +parameter = ": OTHER ->" + +# The following sections are used to build different output formats +# [interpreter] refers to the builtin interpreter using a minimal subset of C syntax +# The name of each section is only used to specify the actual output. +[clang] +definition = "#2 #1 () {return (#3);}" +Logic = "int" +Number = "long int" +Character = "char" +Type = "struct" + +[interpreter]