use std::fs; use toml::{Table, Value}; #[derive(PartialEq)] pub enum TokenType { OPERAND, TERMINATOR, IDENTIFIER, KEYWORD, } // Tokenizer // Tokenizer and underlying functions to turn code into tokens pub struct Tokenizer { pub token_list: Vec, // BUG: pub tokens: Vec, // Grammar options from toml file pub configuration: Table, } // Token // This is a token with a token type. pub struct Token { pub token: String, pub token_type: TokenType, } // Implementation of Tokenizer // Functions associated with the tokenizer struct and module. impl Tokenizer { // @name read_configuration_from_file // @return // @brief Try to read configuration from an external file // @param &mut self, configuration_filename: &str pub fn read_configuration_from_file(&mut self, configuration_filename: &str) { let configuration_string: String = fs::read_to_string(configuration_filename).expect( (String::from("Could not open configuration file at: ") + configuration_filename) .as_str(), ); let configuration = gtoml::parse(configuration_string.as_str()).expect("TOML invalid!"); self.configuration = Table::try_from(configuration).unwrap(); // Check for token section in config, panic if not present if !self.configuration.contains_key("token") { panic!("Token section is not present!"); } // Check for semantics section in config, panic if not present if !self.configuration.contains_key("semantics") { panic!("Section semantics is not present!"); } } // @name new // @return Tokenizer // @brief Create a new Tokenizer // @param pub fn new() -> Tokenizer { let empty_tokens: Vec = vec![]; let empty_value: toml::map::Map = toml::map::Map::new(); let empty_token_list: Vec = vec![]; Tokenizer { tokens: empty_tokens, token_list: empty_token_list, configuration: empty_value, } } // @name eat // @return // @brief Consumes a string and safes the tokens // @param line: &str pub fn eat(&mut self, line: &str) { // Get token vectors from configuration let token_table_value: &Value = self.configuration.get("token").unwrap(); let token_table: Table = Table::try_from(token_table_value).unwrap(); let mut tokens: Vec = vec![line.to_string()]; let mut new_tokens: Vec = vec![]; let mut token_buffer: String = String::from(""); // Iterate over tokens in token table and split tokens. if token_table.contains_key("separator") { let separator: Vec = token_table .get_key_value("separator") .unwrap() .1 .as_array() .unwrap() .clone(); if separator.len() > 0 { for token in tokens.iter() { let mut token_feed = token.clone(); while !token_feed.is_empty() { let mut no_match: bool = true; for sep in separator.iter() { if token_feed.starts_with(sep.as_str().unwrap()) { // Reset and add token no_match = false; if token_buffer.len() > 0 { new_tokens.push(token_buffer.clone()); token_buffer = String::from(""); } let new_feed: String = token_feed.split_off(sep.as_str().unwrap().len()); token_feed = new_feed; } } if no_match { let new_feed: String = token_feed.split_off(1); token_buffer = token_buffer + String::from(token_feed.chars().next().unwrap()).as_str(); token_feed = new_feed; } } // empty token new_tokens.push(token_buffer.clone()); token_buffer = String::from(""); } // empty token new_tokens.push(token_buffer.clone()); token_buffer = String::from(""); } } tokens = new_tokens.clone(); new_tokens = vec![]; if token_table.contains_key("operands") { let operands: Vec = token_table .get_key_value("operands") .unwrap() .1 .as_array() .unwrap() .clone(); if operands.len() > 0 { for token in tokens.iter() { let mut token_feed = token.clone(); while !token_feed.is_empty() { let mut no_match: bool = true; for op in operands.iter() { if token_feed.starts_with(op.as_str().unwrap()) { // Reset and add token no_match = false; if token_buffer.len() > 0 { new_tokens.push(token_buffer.clone()); } token_buffer = String::from(""); new_tokens.push(op.as_str().unwrap().to_string()); let new_feed: String = token_feed.split_off(op.as_str().unwrap().len()); token_feed = new_feed; } } if no_match { let new_feed: String = token_feed.split_off(1); token_buffer = token_buffer + String::from(token_feed.chars().next().unwrap()).as_str(); token_feed = new_feed; } } // empty token new_tokens.push(token_buffer.clone()); token_buffer = String::from(""); } // empty token new_tokens.push(token_buffer.clone()); token_buffer = String::from(""); } } tokens = new_tokens.clone(); new_tokens = vec![]; if token_table.contains_key("terminator") { let terminator: Vec = token_table .get_key_value("terminator") .unwrap() .1 .as_array() .unwrap() .clone(); if terminator.len() > 0 { for token in tokens.iter() { let mut token_feed = token.clone(); while !token_feed.is_empty() { let mut no_match: bool = true; for term in terminator.iter() { if token_feed.starts_with(term.as_str().unwrap()) { // Reset and add token no_match = false; if token_buffer.len() > 0 { new_tokens.push(token_buffer.clone()); } token_buffer = String::from(""); new_tokens.push(term.as_str().unwrap().to_string()); let new_feed: String = token_feed.split_off(term.as_str().unwrap().len()); token_feed = new_feed; } } if no_match { let new_feed: String = token_feed.split_off(1); token_buffer = token_buffer + String::from(token_feed.chars().next().unwrap()).as_str(); token_feed = new_feed; } } // empty token as token ended new_tokens.push(token_buffer.clone()); token_buffer = String::from(""); } // empty token new_tokens.push(token_buffer.clone()); } } self.token_list.append(&mut new_tokens); // Clean up token list let mut cleaned_token_list: Vec = vec![]; for token in self.token_list.iter() { if token.as_str() != "" { cleaned_token_list.push(token.to_string()); } } self.token_list = cleaned_token_list; } // @name identify_tokens // @return // @brief Go through all tokens and try to find them. // @param &mut self pub fn identify_tokens(&mut self) { // Go through token list let mut token_identities: Vec = vec![]; let mut found_token: bool; let token_section: Table = Table::try_from(self.configuration.get("token").unwrap()).unwrap(); let semantics_section: Table = Table::try_from(self.configuration.get("semantics").unwrap()).unwrap(); for token in self.token_list.iter() { found_token = false; if token.as_str() == "" { continue; } // Check if token is an operand if token_section.contains_key("operands") { let operands: Vec = token_section .get_key_value("operands") .unwrap() .1 .as_array() .unwrap() .clone(); for operand in operands.iter() { if operand.as_str().unwrap() == token.as_str() { token_identities.push(Token { token: token.clone(), token_type: TokenType::OPERAND, }); found_token = true; } } } if token_section.contains_key("terminator") && !found_token { let terminator: Vec = token_section .get_key_value("terminator") .unwrap() .1 .as_array() .unwrap() .clone(); for term in terminator.iter() { if term.as_str().unwrap() == token.as_str() { token_identities.push(Token { token: token.clone(), token_type: TokenType::TERMINATOR, }); found_token = true; } } } if semantics_section.contains_key("keywords") && !found_token { let keywords: Vec = semantics_section .get_key_value("keywords") .unwrap() .1 .as_array() .unwrap() .clone(); for keyword in keywords.iter() { if keyword.as_str().unwrap() == token.as_str() { token_identities.push(Token { token: token.clone(), token_type: TokenType::KEYWORD, }); found_token = true; } } } if !found_token { token_identities.push(Token { token: token.clone(), token_type: TokenType::IDENTIFIER, }); } } self.tokens = token_identities; } }