From f67c79c65bca9c1a29741dea7c0d273d9d2cd4e5 Mon Sep 17 00:00:00 2001
From: yannickreiss <yannick.reiss@protonmail.ch>
Date: Mon, 25 Aug 2025 12:09:54 +0200
Subject: [PATCH] Add testbench

---
 src/testcases.rs |  8 ++---
 testspecs.toml   | 79 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 testspecs.toml
diff --git a/src/testcases.rs b/src/testcases.rs
index 0986760..2f76581 100644
--- a/src/testcases.rs
+++ b/src/testcases.rs
@@ -5,7 +5,7 @@ mod tests {
     #[test]
     fn test_replacements() {
         let mut ruleset: crate::preprocessor::MetaRules =
-            crate::preprocessor::MetaRules::new("./language.toml");
+            crate::preprocessor::MetaRules::new("./testspecs.toml");
         let sut: String = ruleset.process(String::from("-- Comment to remove"));
         let verify: String = String::from("");
         let case_comment_at_end: String =
@@ -18,7 +18,7 @@ mod tests {
     #[test]
     fn test_interpolation() {
         let mut ruleset: crate::preprocessor::MetaRules =
-            crate::preprocessor::MetaRules::new("./language.toml");
+            crate::preprocessor::MetaRules::new("./testspecs.toml");
         let run_with_interpolation_test: String = ruleset.process(String::from("#test"));
         let interpolation_verification: String = std::fs::read_to_string("./mathlib.mlc").unwrap();
 
@@ -28,7 +28,7 @@ mod tests {
     #[test]
     fn test_meta_token() {
         let mut ruleset: crate::preprocessor::MetaRules =
-            crate::preprocessor::MetaRules::new("./language.toml");
+            crate::preprocessor::MetaRules::new("./testspecs.toml");
         let meta_token_test_string: String = ruleset.process(String::from("\"sample\""));
         let meta_token_sample_string: String = String::from("\"sample\"");
         let meta_token_verify: Vec<crate::tokenizer::Token> = vec![crate::tokenizer::Token {
@@ -47,7 +47,7 @@ mod tests {
     #[test]
     fn test_eat() {
         let mut sample: crate::tokenizer::Tokenizer = crate::tokenizer::Tokenizer::new();
-        sample.read_configuration_from_file("./language.toml");
+        sample.read_configuration_from_file("./testspecs.toml");
         sample.eat("faculty : Natural n := if n = 0 then 1 else n * faculty (n - 1);");
 
         assert_eq!(
diff --git a/testspecs.toml b/testspecs.toml
new file mode 100644
index 0000000..559e2d5
--- /dev/null
+++ b/testspecs.toml
@@ -0,0 +1,79 @@
+# Meta rules are separate rules with priority over all other rules.
+# They can be compared to preprocessor directives, but are more powerful.
+
+# Pattern matching in preprocessor style, is running at highest priority before anything else.
+[meta.replacements]
+comments = ["^--.*", ""]
+
+# Interpolation with a shell, replaces the meta pattern by the interpolation result.
+# Passing arguments is supported through groups and #<parameter number> in the shell command.
+[meta.interpolation]
+with = ["^#with ([\\w./]+)", "cat $1"]
+date = ["#date_now", "date"]
+user = ["#user", "user"]
+test = ["#test", "cat ./mathlib.mlc"]
+
+# Describes tokens to be replaced by identifiers and then later swapped back in after the tokenizer.
+# All special tokens are treated as constants
+[meta.token]
+string_constant = "\".*?\""
+char_constant = "'.'"
+
+# Every key below is used as type in an enumerate to sort the tokens
+#  -> Replacement in order
+#  -> Every amount of other symbols is saved as some kind of value
+#  -> Those are using the default type "identifier"
+[token]
+separator = [" ", ",", "\n"]
+operands = [":=", "->", "<=", ">=", "<", ">", "!", "+", "-", "/", "*", "(", ")", "[", "]", "{", "}", "=", "?", ":"]
+terminator = [";"]
+
+[semantics]
+keywords = ["if", "then", "else", "end"]
+
+[constants]
+number = "(?:0b[01]+|0x[0-9a-fA-F]+|0[0-7]+|[1-9][0-9]*)"
+character = "'.'"
+logic = "(true|false)"
+
+[types]
+Number = "number"
+Character = "character"
+Type = ""
+Array = "{character * number}"
+Logic = "logic"
+
+# List of rules
+# Rules can be found in traces
+# use better names than rule_1, rule_2, ...
+# The compiler will run through all rules trying to match exactly one.
+# Uses the following generic types:
+# - OPERAND
+# - IDENTIFIER
+# - KEYWORD
+# - TERMINATOR
+# - OTHER (Use this type for ambiguous parts. Same as lazy .+ in regular expressions)
+# Definition of custom types are possible, by creation of a rule with the same name.
+# IMPORTANT: Rules are always top priority and can overwrite other types.
+# Named placeholders: The character # is reserved for named placeholders. They are only valid inside a rule.
+[syntax]
+definition = "IDENTIFIER#1 -> IDENTIFIER#2 := OTHER#3 TERMINATOR"
+definition_with_parameter = "IDENTIFIER#1 : parameter#2 -> IDENTIFIER#3 := OTHER#4 TERMINATOR"
+recursion = "#basename OTHER := OTHER #basename OTHER TERMINATOR"
+replace_predef = [ "IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR OTHER IDENTIFIER#1", "#1 -> OTHER := #2 TERMINATOR OTHER (#2)" ]
+replace_postdef = [ "IDENTIFIER#1 OTHER TERMINATOR IDENTIFIER#1 -> OTHER := OTHER#2 TERMINATOR", "#2 OTHER TERMINATOR #1 -> OTHER := #2 TERMINATOR" ]
+unfold_parameter = [ ": OTHER IDENTIFIER#1 ( IDENTIFIER#2 OTHER#3 ) OTHER ->", ": OTHER #1 #2 #1 ( #3 ) OTHER ->" ]
+unfold_parameter_remove_brackets = [ ": OTHER IDENTIFIER ( ) OTHER ->", ": OTHER OTHER ->" ]
+parameter = ": OTHER ->"
+
+# The following sections are used to build different output formats
+# [interpreter] refers to the builtin interpreter using a minimal subset of C syntax
+# The name of each section is only used to specify the actual output.
+[clang]
+definition = "#2 #1 () {return (#3);}"
+Logic = "int"
+Number = "long int"
+Character = "char"
+Type = "struct"
+
+[interpreter]