source: proto/RE/demo/grep.py @ 3739

Last change on this file since 3739 was 3739, checked in by ksherdy, 5 years ago

Added support for segment-at-a-time processing. Match strings at follows position.

File size: 5.2 KB
Line 
1#
2# grep - A very simple example to demonstrate grep with parallel bit streams.
3#
4#        This prototypes demonstrates a compilable prototype with
5#        minimal reliance on complementary hand-written template code.
6#        In addition, this examples draws attention to missing features in the Pablo
7#        language which prevent the elimination of hand-written templates.
8#
9# Ken Herdy
10# July 29, 2013
11#
12# grep program flow
13#
14# StreamInput' -> Transpose' -> ClassifyBytes -> Match -> MatchLines -> FilterMatchLines -> StreamOutput'
15#                                                                 
16# grep -c program flow
17#
18# ... MatchLines -> CountLines -> StreamOutput''
19#
20# grep | wc -l program flow
21#
22# ... StreamOutput' -> Transpose' -> MatchLF -> PopCount -> StreamOutPut''
23#   
24# ' marks denote missing functionality which prevent the direct compilation of Pablo code to C/C++ code
25#
26# Observations:
27#
28# 1) Both sequential and bit parall_matchedel computations are performed.
29#   
30#    Examples include:
31#       
32#          a) sequential iteration over the matches and line feed streams in the 'Match' routine
33#          b) sequential and/or parall_matchedel output of marked lines in the 'CompressLines' / 'WriteLines' routine   
34#
35# 2) Creation of spans are problematic at the start of an input data stream.
36#
37#    A solution approach to this problem is to:
38#               
39#    a) formalize the meaning of the expressions of the language as expressions
40#    operating on sequences (streams) of 2^k bit data items both preceded and succeeded by an unbounded number of
41#    2^k with zero value items.
42#
43#    i.e. leverage denotational semenatics to define what parall_matchedel bit stream programs do.
44#
45#    b) define the 'line starts' stream as line_starts = ~Adv~(0) | Advance(lines_end_marks)
46#
47#    This technique can then be generalized to create spans in the case ends but not starts are marked.
48#               
49# 3) Options such as -c represent separate branches in the underlying stream graph.
50#
51# 4) Support of pipes and filter type applications requires subgraph composition but only a single transposition step.
52#    As in SPL a composite program structure could support UNIX-style pipe and filters archicture for
53#    parall_matchedel bit stream apps, e.g. composite { graph { /* program flow */ } }
54#
55import sys
56import pablo
57import optparse
58import grep_demo_config
59
60class Basis_bits():
61        bit_0 = 0
62        bit_1 = 0
63        bit_2 = 0
64        bit_3 = 0
65        bit_4 = 0
66        bit_5 = 0
67        bit_6 = 0
68        bit_7 = 0
69
70class Lex():
71        a  = 0
72        p  = 0
73        l  = 0
74        e  = 0
75        LF = 0
76
77class Output():
78        matches                 = 0
79        lines                   = 0
80        line_starts             = 0
81        line_ends               = 0
82        all_starts              = 0
83       
84def ClassifyBytes(basis_bits, lex): 
85        temp1 = (basis_bits.bit_1 &~ basis_bits.bit_0)
86        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3)
87        temp3 = (temp1 & temp2)
88        temp4 = (basis_bits.bit_4 | basis_bits.bit_5)
89        temp5 = (basis_bits.bit_7 &~ basis_bits.bit_6)
90        temp6 = (temp5 &~ temp4)
91        lex.a = (temp3 & temp6)
92        temp7 = (basis_bits.bit_2 & basis_bits.bit_3)
93        temp8 = (temp1 & temp7)
94        temp9 = (basis_bits.bit_6 | basis_bits.bit_7)
95        temp10 = (temp4 | temp9)
96        lex.p = (temp8 &~ temp10)
97        temp11 = (basis_bits.bit_4 & basis_bits.bit_5)
98        temp12 = (temp11 &~ temp9)
99        lex.l = (temp3 & temp12)
100        temp13 = (basis_bits.bit_5 &~ basis_bits.bit_4)
101        temp14 = (temp13 & temp5)
102        lex.e = (temp3 & temp14)
103        temp15 = (basis_bits.bit_0 | basis_bits.bit_1)
104        temp16 = (basis_bits.bit_2 | basis_bits.bit_3)
105        temp17 = (temp15 | temp16)
106        temp18 = (basis_bits.bit_4 &~ basis_bits.bit_5)
107        temp19 = (basis_bits.bit_6 &~ basis_bits.bit_7)
108        temp20 = (temp18 & temp19)
109        lex.LF = (temp20 &~ temp17)
110       
111def Match(lex, output):
112    # Mark _follows.
113        cursor = pablo.Advance(lex.a)
114        cursor = pablo.Advance(cursor & lex.p)
115        cursor = pablo.Advance(cursor & lex.p)
116        cursor = pablo.Advance(cursor & lex.l)
117        cursor = pablo.Advance(cursor & lex.e)
118        output.matches = cursor
119
120def MatchLines(lex, output):
121       
122        all_line_starts   = pablo.ScanToFirst(~lex.LF) | (pablo.Advance(lex.LF) &~ lex.LF)
123        output.all_starts = all_line_starts
124        all_line_ends     = lex.LF
125
126        last_line_start   = pablo.ScanToFirst(all_line_starts) 
127        cursor            = last_line_start
128
129        while(pablo.inFile(cursor)):
130               
131                if(cursor & all_line_starts):
132                        last_line_start = cursor
133                       
134                if(cursor & output.matches):
135                        cursor = pablo.ScanTo(cursor, lex.LF)
136                        output.lines |= pablo.InclusiveSpan(last_line_start, cursor) # LF | match
137               
138                cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.matches)
139
140        output.line_starts = output.lines & all_line_starts
141        output.line_ends   = pablo.ScanTo(output.line_starts, output.lines & all_line_ends)
142
143
144
145###
146### <-- Demo only for now.
147###
148### def FilterMatchLines(data, output):
149###     output.byte_data = pablo.filter_bytes(data, ~(pablo.Advance(output.lines) | output.lines))
150        # output.byte_data = pablo.filter_bytes(data, ~output.lines) # a line has a LF
151
152
153### def CountLines(lex, output):
154        # return pablo.PopCount(pablo.MatchStar(output.lines, ~lex.LF) & lex.LF)
155###     return pablo.PopCount(pablo.Advance(output.lines) & lex.LF)
156        # return pablo.PopCount(output.lines & lex.LF) # a line has a LF
157
158# write out template considers end positions
159
160def Main(basis_bits, lex, output):
161        Transpose(byte_data, basis_bits)
162        ClassifyBytes(basis_bits, lex)
163        Match(lex, output)
164        MatchLines(lex, output)
165
166    ### FilterMatchLines(data, output)
167    ### WriteStreamOutput(Output)
168
169        ### if(options.count):
170        ### CountLines(lex, output))
171               
Note: See TracBrowser for help on using the repository browser.