source: proto/RE/demo/grep_demo.py @ 3428

Last change on this file since 3428 was 3428, checked in by ksherdy, 6 years ago

Fixed line counting.

File size: 6.4 KB
Line 
1#
2# grep -        A very simple example to demonstrate grep with parallel bit streams.
3#
4#                                       This prototypes demonstrates a compilable prototype with
5#                                 minimal reliance on complementary hand-written template code.
6#                                       In addition, this examples draws attention to missing features in the Pablo
7#                                       language which prevent the elimination of hand-written templates.
8#
9# Ken Herdy
10# July 29, 2013
11#
12# grep program flow
13#
14# StreamInput' -> Transpose' -> ClassifyBytes -> Match -> MatchLines -> FilterMatchLines -> StreamOutput'
15#                                                                 
16# grep -c program flow
17#
18# ... MatchLines -> CountLines -> StreamOutput''
19#
20# grep | wc -l program flow
21#
22# ... StreamOutput' -> Transpose' -> MatchLF -> PopCount -> StreamOutPut''
23#   
24# ' marks denote missing functionality which prevent the direct compilation of Pablo code to C/C++ code
25#
26# Observations:
27#
28# 1) Both sequential and bit parallel computations are performed.
29#   
30#    Examples include:
31#       
32#          a) sequential iteration over the matches and line feed streams in the 'Match' routine
33#          b) sequential and/or parallel output of marked lines in the 'CompressLines' / 'WriteLines' routine   
34#
35# 2) Creation of spans are problematic at the start of an input data stream.
36#
37#    A solution approach to this problem is to:
38#               
39#    a) formalize the meaning of the expressions of the language as expressions
40#    operating on sequences (streams) of 2^k bit data items both preceded and succeeded by an unbounded number of
41#    2^k with zero value items.
42#
43#    i.e. leverage denotational semenatics to define what parallel bit stream programs do.
44#
45#    b) define the 'line starts' stream as line_starts = ~Adv~(0) | Advance(lines_end_marks)
46#
47#    This technique can then be generalized to create spans in the case ends but not starts are marked.
48#               
49# 3) Options such as -c represent separate branches in the underlying stream graph.
50#
51# 4) Support of pipes and filter type applications requires subgraph composition but only a single transposition step.
52#    As in SPL a composite program structure could support UNIX-style pipe and filters archicture for
53#    parallel bit stream apps, e.g. composite { graph { /* program flow */ } }
54#
55import sys
56import pablo
57import optparse
58import grep_demo_config
59
60class Basis(): 
61        bit_0 = 0
62        bit_1 = 0
63        bit_2 = 0
64        bit_3 = 0
65        bit_4 = 0
66        bit_5 = 0
67        bit_6 = 0
68        bit_7 = 0
69
70class Lex():
71        a  = 0
72        p  = 0
73        l  = 0
74        e  = 0
75        LF = 0
76       
77class Matches():
78        all_matches = 0
79
80class Lines():
81        all_lines = 0
82
83class Output():
84        lines = 0
85
86def ClassifyBytes(basis_bits, lex): 
87        temp1 = (basis_bits.bit_1 &~ basis_bits.bit_0)
88        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3)
89        temp3 = (temp1 & temp2)
90        temp4 = (basis_bits.bit_4 | basis_bits.bit_5)
91        temp5 = (basis_bits.bit_7 &~ basis_bits.bit_6)
92        temp6 = (temp5 &~ temp4)
93        lex.a = (temp3 & temp6)
94        temp7 = (basis_bits.bit_2 & basis_bits.bit_3)
95        temp8 = (temp1 & temp7)
96        temp9 = (basis_bits.bit_6 | basis_bits.bit_7)
97        temp10 = (temp4 | temp9)
98        lex.p = (temp8 &~ temp10)
99        temp11 = (basis_bits.bit_4 & basis_bits.bit_5)
100        temp12 = (temp11 &~ temp9)
101        lex.l = (temp3 & temp12)
102        temp13 = (basis_bits.bit_5 &~ basis_bits.bit_4)
103        temp14 = (temp13 & temp5)
104        lex.e = (temp3 & temp14)
105        temp15 = (basis_bits.bit_0 | basis_bits.bit_1)
106        temp16 = (basis_bits.bit_2 | basis_bits.bit_3)
107        temp17 = (temp15 | temp16)
108        temp18 = (basis_bits.bit_4 &~ basis_bits.bit_5)
109        temp19 = (basis_bits.bit_6 &~ basis_bits.bit_7)
110        temp20 = (temp18 & temp19)
111        lex.LF = (temp20 &~ temp17)
112       
113def Match(lex, matches):
114        m0 = lex.a
115        m1 = pablo.Advance(m0) & lex.p
116        m2 = pablo.Advance(m1) & lex.p
117        m3 = pablo.Advance(m2) & lex.l
118        m4 = pablo.Advance(m3) & lex.e
119        matches.all_matches = m4
120
121# not compilable - requires pablo.First()
122def MatchLines1(lex, matches, lines):
123        global lgth
124
125        last_start = pablo.First()
126        LF_or_match = lex.LF | matches.all_matches
127        cursor = pablo.ScanToFirst(LF_or_match)
128
129        while(pablo.inFile(cursor)):
130                if(cursor & matches.all_matches):
131                        next_end = pablo.AdvanceThenScanTo(cursor, lex.LF)
132                        lines.all_lines |= pablo.InclusiveSpan(last_start, next_end) | next_end
133                        cursor = next_end
134                if(cursor & lex.LF):
135                        last_start = pablo.Advance(cursor)
136                cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match)
137
138# compilable
139def MatchLines2(lex, matches, lines):
140        global lgth
141
142        last_start = 0
143        last_start = pablo.Advance(~last_start) ^ ~last_start
144        LF_or_match = lex.LF | matches.all_matches
145        cursor = pablo.ScanToFirst(LF_or_match)
146
147        while(pablo.inFile(cursor)):
148                if(cursor & matches.all_matches):
149                        next_end = pablo.AdvanceThenScanTo(cursor, lex.LF)
150                        lines.all_lines |= pablo.InclusiveSpan(last_start, next_end) | next_end
151                        cursor = next_end
152                if(cursor & lex.LF):
153                        last_start = pablo.Advance(cursor)
154                cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match)
155                               
156def FilterMatchLines(data, output):
157        output.lines = pablo.filter_bytes(data, ~lines.all_lines)
158       
159basis_bits      = Basis()
160lex           = Lex()
161matches     = Matches()
162lines       = Lines()
163output      = Output()
164
165if __name__ == "__main__":
166
167        option_parser = grep_demo_config.get_option_parser() 
168        options, args = option_parser.parse_args(sys.argv[1:])
169
170        if len(args) != 1:
171                option_parser.print_usage()
172                sys.exit()
173        else:
174                input_file = args[0]
175
176  # ReadStreamInput(data)
177        global data
178        data = pablo.readfile(input_file)
179 
180        global lgth
181        lgth = len(data)
182
183  # Transpose(data, basis)
184        pablo.EOF_mask = pablo.transpose_streams(data, basis_bits)
185        ClassifyBytes(basis_bits, lex)
186        Match(lex, matches)
187
188        MatchLines1(lex, matches, lines)
189        FilterMatchLines(data, output)
190
191  # WriteStreamOutput(Output)
192        if(options.count):
193                print str(pablo.PopCount(pablo.MatchStar(matches.all_matches, ~lex.LF) & lex.LF))
194        else:
195          lgth = len(data)
196          print "Raw Input:\n" + data
197          print "---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------"
198          print "data.replace('LF','$')   " + data.replace("\n","$")
199          print "lex.a                    " + pablo.bitstream2string(lex.a, lgth)
200          print "lex.p                    " + pablo.bitstream2string(lex.p, lgth)
201          print "lex.l                    " + pablo.bitstream2string(lex.l, lgth)
202          print "lex.e                    " + pablo.bitstream2string(lex.e, lgth)
203          print "lex.LF                   " + pablo.bitstream2string(lex.LF, lgth)
204          print "matches.all_matches      " + pablo.bitstream2string(matches.all_matches, lgth)
205          print "lines.all_lines          " + pablo.bitstream2string(lines.all_lines, lgth)
206          print "---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------"
207          print "Raw Output:" + output.lines
208               
209       
210
211
Note: See TracBrowser for help on using the repository browser.