# example.py # # Example of a tokenizer import re from collections import namedtuple NAME = r'(?P[a-zA-Z_][a-zA-Z_0-9]*)' NUM = r'(?P\d+)' PLUS = r'(?P\+)' TIMES = r'(?P\*)' EQ = r'(?P=)' WS = r'(?P\s+)' master_pat = re.compile('|'.join([NAME, NUM, PLUS, TIMES, EQ, WS])) Token = namedtuple('Token', ['type','value']) def generate_tokens(pat, text): scanner = pat.scanner(text) for m in iter(scanner.match, None): yield Token(m.lastgroup, m.group()) for tok in generate_tokens(master_pat, 'foo = 42'): print(tok)