source: proto/RE/Haskell/mre_compiler.hs @ 3599

Last change on this file since 3599 was 3599, checked in by cameron, 5 years ago

Haskell prototype interpreter and regular expression simulator

File size: 2.9 KB
Line 
1-- Module MRE_Compiler (Compile minimal regular expressions (MREs) to Pablo).
2-- This module is a prototype to illustrate regular expression compilation
3-- in a very simple, but fully working form.
4
5module MRE_Compiler (MRE, PabloE, PabloS, compile) where
6
7-- MRE is the type for minimal regular expressions of 4 forms,
8-- individual characters, and concatenations, alternations and
9-- Kleene star.   Here we restrict star repetition to individual
10-- characters for the first prototype.
11
12data MRE = Ch(Char) | Cat(MRE, MRE) | Alt(MRE, MRE) | Star(Char) 
13   deriving Show   -- deriving Show just means Haskell can print MREs
14
15-- PabloE is the type of expressions in (minimal) Pablo. 
16-- And, Or, Not, Advance (advance by 1) and MatchStar are
17-- as expected.  CharClass gives the character class bitstream
18-- for a character.   Marker is used to define and enumerate
19-- marker variables.   The available variables are thus (Marker 0),
20-- (Marker 1), (Marker 2) ...
21
22data PabloE = Marker(Int) | And(PabloE, PabloE) | Or(PabloE, PabloE) | Not(PabloE)
23              | CharClass(Char) | Advance(PabloE) | MatchStar(PabloE, PabloE) 
24   deriving Show
25
26-- PabloS is the type of statements in (minimal) Pablo.  Just
27-- assigment and a return statement for now.
28
29data PabloS = SetMarker(Int, PabloE) | ReturnMarker(Int)     
30   deriving Show
31
32-- Now the compiler.   We compile an MRE to a list of Pablo statements
33-- to match the MRE.   To keep track of marker variables we use
34-- a helper function that takes the "current" marker number as input
35-- and returns a 2-tuple consisting of the final marker number and
36-- the list of compiled statements.
37
38compile :: MRE -> [PabloS] 
39
40compile(e) = stmts ++ [ReturnMarker m]
41  where (m, stmts) = mre2pablo_helper(e, 0)
42
43mre2pablo_helper :: (MRE, Int) -> (Int, [PabloS])
44
45-- To match a character class, use bitwise and of the marker and
46-- character class, then advance 1.  The output marker variable
47-- is the next one in sequence.
48
49mre2pablo_helper(Ch(c), m) = 
50   (m + 1, [SetMarker (m + 1, Advance(And(Marker(m), CharClass(c))))])
51
52-- A concatenation of two regular expressions is compiled by first
53-- compiling the first expression with the input marker number, and
54-- then compiling the second expression with the input marker set to
55-- be the output of the first.
56
57mre2pablo_helper(Cat(e1, e2), m) = (m2, s1 ++ s2)
58  where 
59   (m1, s1) = mre2pablo_helper(e1, m)
60   (m2, s2) = mre2pablo_helper(e2, m1)
61
62-- An alternation of two regular expressions needs a copy of the
63-- input to be saved first.   
64
65mre2pablo_helper(Alt(e1, e2), m) = (m2 + 1, save ++ s1 ++ s2 ++ final) 
66  where 
67   (m1, s1) = mre2pablo_helper(e1, m) 
68   (m2, s2) = mre2pablo_helper(e2, m1 + 1)
69   save = [SetMarker(m1 + 1, Marker(m))]
70   final = [SetMarker(m2 + 1, Or(Marker(m1), Marker(m2)))]
71
72-- For a repeated character class, just use MatchStar.
73
74mre2pablo_helper(Star(c), m) = 
75   (m + 1, [SetMarker(m + 1, MatchStar(Marker(m), CharClass(c)))]) 
76
Note: See TracBrowser for help on using the repository browser.