source: proto/RE/demo/grep_demo.py @ 3739

Last change on this file since 3739 was 3739, checked in by ksherdy, 4 years ago

Added support for segment-at-a-time processing. Match strings at follows position.

File size: 7.6 KB
Line 
1#
2# grep - A very simple example to demonstrate grep with parallel bit streams.
3#
4#        This prototypes demonstrates a compilable prototype with
5#        minimal reliance on complementary hand-written template code.
6#        In addition, this examples draws attention to missing features in the Pablo
7#        language which prevent the elimination of hand-written templates.
8#
9#  LF    -> trailing line feeds as part of a matched line
10#  NO LF -> GNU grep - trailing line feeds are not part of a matched line
11#
12#
13# Ken Herdy
14# July 29, 2013
15#
16# grep program flow
17#
18# StreamInput' -> Transpose' -> ClassifyBytes -> Match -> MatchLines -> FilterMatchLines -> StreamOutput'
19#                                                                 
20# grep -c program flow
21#
22# ... MatchLines -> CountLines -> StreamOutput''
23#
24# grep | wc -l program flow
25#
26# ... StreamOutput' -> Transpose' -> MatchLF -> PopCount -> StreamOutPut''
27#   
28# ' marks denote missing functionality which prevent the direct compilation of Pablo code to C/C++ code
29#
30# Observations:
31#
32# 1) Both sequential and bit parall_matchedel computations are performed.
33#   
34#    Examples include:
35#       
36#          a) sequential iteration over the matches and line feed streams in the 'Match' routine
37#          b) sequential and/or parall_matchedel output of marked lines in the 'CompressLines' / 'WriteLines' routine   
38#
39# 2) Creation of spans are problematic at the start of an input data stream.
40#
41#    A solution approach to this problem is to:
42#               
43#    a) formalize the meaning of the expressions of the language as expressions
44#    operating on sequences (streams) of 2^k bit data items both preceded and succeeded by an unbounded number of
45#    2^k with zero value items.
46#
47#    i.e. leverage denotational semenatics to define what parall_matchedel bit stream programs do.
48#
49#    b) define the 'line starts' stream as line_starts = ~Adv~(0) | Advance(lines_end_marks)
50#
51#    This technique can then be generalized to create spans in the case ends but not starts are marked.
52#               
53# 3) Options such as -c represent separate branches in the underlying stream graph.
54#
55# 4) Support of pipes and filter type applications requires subgraph composition but only a single transposition step.
56#    As in SPL a composite program structure could support UNIX-style pipe and filters archicture for
57#    parall_matchedel bit stream apps, e.g. composite { graph { /* program flow */ } }
58#
59import sys
60import pablo
61import optparse
62import grep_demo_config
63
64class Basis(): 
65        bit_0 = 0
66        bit_1 = 0
67        bit_2 = 0
68        bit_3 = 0
69        bit_4 = 0
70        bit_5 = 0
71        bit_6 = 0
72        bit_7 = 0
73
74class Lex():
75        a  = 0
76        p  = 0
77        l  = 0
78        e  = 0
79        LF = 0
80       
81class Output():
82        matches                 = 0
83        lines                   = 0
84        line_starts             = 0
85        line_ends               = 0
86        byte_data               = 0
87
88def ClassifyBytes(basis_bits, lex): 
89        temp1 = (basis_bits.bit_1 &~ basis_bits.bit_0)
90        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3)
91        temp3 = (temp1 & temp2)
92        temp4 = (basis_bits.bit_4 | basis_bits.bit_5)
93        temp5 = (basis_bits.bit_7 &~ basis_bits.bit_6)
94        temp6 = (temp5 &~ temp4)
95        lex.a = (temp3 & temp6)
96        temp7 = (basis_bits.bit_2 & basis_bits.bit_3)
97        temp8 = (temp1 & temp7)
98        temp9 = (basis_bits.bit_6 | basis_bits.bit_7)
99        temp10 = (temp4 | temp9)
100        lex.p = (temp8 &~ temp10)
101        temp11 = (basis_bits.bit_4 & basis_bits.bit_5)
102        temp12 = (temp11 &~ temp9)
103        lex.l = (temp3 & temp12)
104        temp13 = (basis_bits.bit_5 &~ basis_bits.bit_4)
105        temp14 = (temp13 & temp5)
106        lex.e = (temp3 & temp14)
107        temp15 = (basis_bits.bit_0 | basis_bits.bit_1)
108        temp16 = (basis_bits.bit_2 | basis_bits.bit_3)
109        temp17 = (temp15 | temp16)
110        temp18 = (basis_bits.bit_4 &~ basis_bits.bit_5)
111        temp19 = (basis_bits.bit_6 &~ basis_bits.bit_7)
112        temp20 = (temp18 & temp19)
113        lex.LF = (temp20 &~ temp17)
114       
115def Match(lex, output):
116#   Mark '_follows'
117        cursor = pablo.Advance(lex.a)
118        cursor = pablo.Advance(cursor & lex.p)
119        cursor = pablo.Advance(cursor & lex.p)
120        cursor = pablo.Advance(cursor & lex.l)
121        cursor = pablo.Advance(cursor & lex.e)
122        output.matches = cursor
123
124#   Mark '_ends'
125#       m0 = lex.a
126#       m1 = pablo.Advance(m0) & lex.p
127#       m2 = pablo.Advance(m1) & lex.p
128#       m3 = pablo.Advance(m2) & lex.l
129#       m4 = pablo.Advance(m3) & lex.e
130#       output.matches = m4
131
132def MatchLines(lex, output):
133
134        all_line_starts   = pablo.ScanToFirst(~lex.LF) | (pablo.Advance(lex.LF) &~ lex.LF)
135        output.all_starts = all_line_starts
136        all_line_ends     = lex.LF
137
138        last_line_start   = pablo.ScanToFirst(all_line_starts) 
139        cursor            = last_line_start
140
141        while(pablo.inFile(cursor)):
142               
143                if(cursor & all_line_starts):
144                        last_line_start = cursor
145                       
146                if(cursor & output.matches):
147                        cursor = pablo.ScanTo(cursor, lex.LF)
148                        output.lines |= pablo.InclusiveSpan(last_line_start, cursor) # LF | match
149               
150                cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.matches)
151
152        output.line_starts = output.lines & all_line_starts
153        output.line_ends   = pablo.ScanTo(output.line_starts, output.lines & all_line_ends)
154
155###
156###  KH: Fails to compile. Root cause not yet found.
157###
158def MatchLines2(lex, output):
159
160        all_line_starts   = pablo.ScanToFirst(~lex.LF) | (pablo.Advance(lex.LF) &~ lex.LF)
161        output.all_starts = all_line_starts
162        all_line_ends     = lex.LF
163
164        last_line_start   = pablo.ScanToFirst(all_line_starts) 
165        cursor            = last_line_start
166
167        while(pablo.inFile(cursor)):
168               
169                if(cursor & all_line_starts):
170                        last_line_start = cursor
171                       
172                if(cursor & output.matches):
173                        output.line_starts |= last_line_start                   
174                        cursor = pablo.ScanTo(cursor, lex.LF)
175                        output.line_ends |= cursor
176                        output.lines |= pablo.InclusiveSpan(last_line_start, cursor) # LF | match
177               
178                cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.matches)
179               
180def FilterMatchLines(data, output):
181        output.byte_data = pablo.filter_bytes(data, ~output.lines)
182        # output.byte_data = pablo.filter_bytes(data, ~output.lines)                                                            # LF
183
184def CountLines(lex, output):
185        ### return pablo.PopCount(pablo.MatchStar(output.lines, ~lex.LF) & lex.LF)  # LF
186        return pablo.PopCount(output.lines & lex.LF)                                                                                                                            # LF
187        # return pablo.PopCount(pablo.Advance(output.lines) & lex.LF)                                           # no LF
188
189basis_bits      = Basis()
190lex                 = Lex()
191output          = Output()
192
193if __name__ == "__main__":
194
195        option_parser = grep_demo_config.get_option_parser() 
196        options, args = option_parser.parse_args(sys.argv[1:])
197
198        if len(args) != 1:
199                option_parser.print_usage()
200                sys.exit()
201        else:
202                input_file = args[0]
203
204  # ReadStreamInput(data)
205        global data
206        data = pablo.readfile(input_file)
207 
208        global lgth
209        lgth = len(data)
210
211  # Transpose(data, basis)
212        pablo.EOF_mask = pablo.transpose_streams(data, basis_bits)
213        ClassifyBytes(basis_bits, lex)
214        Match(lex, output)
215        MatchLines(lex, output)
216
217  # WriteStreamOutput(Output)
218        if(options.count):
219          print str(CountLines(lex, output))
220
221        elif(options.invertMatch):
222            output.lines = ~output.lines                                                                        # LF
223            # output.lines = (~output.lines) &~ lex.LF          # no LF
224            FilterMatchLines(data, output)
225            print output.byte_data
226        else:
227            FilterMatchLines(data, output)
228            print output.byte_data
229
230        if(options.debug):
231                print ""
232                print "data.replace('LF','$')   " + data.replace("\n","$")
233                print "lex.a                    " + pablo.bitstream2string(lex.a, lgth)
234                print "lex.p                    " + pablo.bitstream2string(lex.p, lgth)
235                print "lex.l                    " + pablo.bitstream2string(lex.l, lgth)
236                print "lex.e                    " + pablo.bitstream2string(lex.e, lgth)
237                print "lex.LF                   " + pablo.bitstream2string(lex.LF, lgth)
238                print "output.matches           " + pablo.bitstream2string(output.matches, lgth)
239                print "output.line_starts       " + pablo.bitstream2string(output.line_starts, lgth)
240                print "output.line_ends         " + pablo.bitstream2string(output.line_ends, lgth)
241                print "output.lines             " + pablo.bitstream2string(output.lines, lgth)
242                print ""
243
244        #lgth = len(data)
245
246
247               
248       
249
250
Note: See TracBrowser for help on using the repository browser.