# 2 engines: NFA/DFA RegEx-Art # FA -> Finite Automation class Regex: """ Can only support a-z """ def __init__(self): self.fsm = [] def _char_to_int(self, char): """ Returns 0 for a, 1 for b, 2 for c, ... 25 for z """ return ord(char) - 97 def create_state(self, default_value=0): return [default_value]*26 def compile(self, regex_string): self.fsm = [] # add state 0 as default failure state self.fsm.append(self.create_state()) _iter = iter(regex_string) for char in _iter: next_valid_state = len(self.fsm) + 1 if char == ".": state = self.create_state(next_valid_state) self.fsm.append(state) elif char == "[": state = self.create_state() while True: try: valid_char = next(_iter) if valid_char == "]": self.fsm.append(state) break else: state[self._char_to_int(valid_char)] = next_valid_state except StopIteration: print("Invalid Regex: No ] found") self.fsm = [] return else: state = self.create_state() state[self._char_to_int(char)] = next_valid_state self.fsm.append(state) def match(self, text): current_state = 1 for input_char in text: current_state = self.fsm[current_state][self._char_to_int(input_char)] if current_state == 0: return False return current_state == len(self.fsm) def __str__(self): n = len(self.fsm) result = " " + " ".join(str(x) for x in range(n)) + "\n" result += "----" + "--" * n + "\n" for number, row in enumerate(zip(*self.fsm), start=97): result += f"{chr(number)} | " + " ".join(str(x) for x in row) + "\n" return result __repr__ = __str__ r = Regex() r.compile("abc") print(r)