From 42fa5affb53724f124ac1cec919c11efc2afbb72 Mon Sep 17 00:00:00 2001 From: yannickreiss Date: Mon, 25 Aug 2025 07:12:22 +0200 Subject: [PATCH] Reintroducing meta tokens --- example.mlc | 3 --- src/main.rs | 49 ++++++++++++++++++++++++++++++++++++++++++--- src/preprocessor.rs | 1 - src/tokenizer.rs | 19 +++++++++++++++++- 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/example.mlc b/example.mlc index f1776a4..d461ffa 100644 --- a/example.mlc +++ b/example.mlc @@ -1,6 +1,3 @@ -#with mathlib.mlc variable:=-3; c := (a+b- 3) * 23 + variable; d := c - a;Natural : Number (n) := {n >= 0};faculty : Natural (n) -> Natural := if n = 0 then 1 else faculty (n-1) * n end; String Natural (n) := {Character * n};hello_word -> String := "Hello World!"; first_letter -> Character := 'a'; -wrong -> Logic := false;date -> String := "#date_now"; -user -> String := "#user" diff --git a/src/main.rs b/src/main.rs index 20c7894..9e3d314 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,8 +6,51 @@ mod tokenizer; use tokenizer::*; fn main() { - // Preprocessor - let sample_code: String = std::fs::read_to_string("example.mlc").unwrap(); + // CL-Wrapper + let args: Vec = std::env::args().collect(); + + // Adjust to following principle: + // micro [-t ] [-l ] [] + // -t default: first found + // -l default: language.toml + // + // Either loads all source files or takes stdin input by piping code into the program + let mut raw_source_code: String = String::from(""); + for i in 1..args.len() { + raw_source_code = raw_source_code + + std::fs::read_to_string(args[i].clone()) + .expect("Source file not found!") + .as_str(); + } + + // Load language toml + let mut meta_rules: crate::preprocessor::MetaRules = + crate::preprocessor::MetaRules::new("./language.toml"); + let mut tokenizer_configuration: Tokenizer = Tokenizer::new(); + tokenizer_configuration.read_configuration_from_file("./language.toml"); + + // Run preprocessor + let preprocessed_source_code: String = meta_rules.process(raw_source_code); + + // Tokenizing + tokenizer_configuration.eat(preprocessed_source_code.as_str()); + tokenizer_configuration.identify_tokens(); + // Reintroducing meta_tokens + for meta_token in meta_rules.special_tokens.iter() { + // Go through all tokens + for i in 0..tokenizer_configuration.tokens.len() { + if meta_token.0 == tokenizer_configuration.tokens[i].token { + tokenizer_configuration.tokens[i] = meta_token.1.clone(); + break; + } + } + } + + // Syntax resolving + + // Apply translation + + /* let sample_code: String = std::fs::read_to_string("example.mlc").unwrap(); let mut example_tokenizer: Tokenizer = Tokenizer::new(); let mut meta_rules: crate::preprocessor::MetaRules = crate::preprocessor::MetaRules::new("./language.toml"); @@ -31,5 +74,5 @@ fn main() { for token in example_identifier.tokens.iter() { print!("{}", token.token); - } + } */ } diff --git a/src/preprocessor.rs b/src/preprocessor.rs index e8cbdc6..6c538e3 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -172,7 +172,6 @@ impl MetaRules { processed_code = value_regex .replace(processed_code.as_str(), meta_id.as_str()) .to_string(); - println!("Replace {} with {}.", meta_value, meta_id); // Safe id and token self.special_tokens.push(( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index db28f33..5f7076b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,7 +1,7 @@ use std::fs; use toml::{Table, Value}; -#[derive(PartialEq)] +#[derive(PartialEq, Debug)] pub enum TokenType { OPERAND, TERMINATOR, @@ -21,10 +21,27 @@ pub struct Tokenizer { // Token // This is a token with a token type. +#[derive(Debug)] pub struct Token { pub token: String, pub token_type: TokenType, } + +impl Clone for Token { + fn clone(&self) -> Token { + let token_type: TokenType = match self.token_type { + TokenType::OPERAND => TokenType::OPERAND, + TokenType::KEYWORD => TokenType::KEYWORD, + TokenType::TERMINATOR => TokenType::TERMINATOR, + TokenType::IDENTIFIER => TokenType::IDENTIFIER, + }; + Token { + token: self.token.clone(), + token_type: token_type, + } + } +} + // Implementation of Tokenizer // Functions associated with the tokenizer struct and module. impl Tokenizer {