Changeset 3415 for proto


Ignore:
Timestamp:
Jul 31, 2013, 3:34:31 PM (6 years ago)
Author:
ksherdy
Message:

Progress towards a compilable grep demo.

Location:
proto/RE/demo
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/demo/fruitlist.txt

    r3414 r3415  
    1 apple#apples#pineapple#apple-#apple-fruit#fruit-apple#
     1apple#apples#pine#apple-#apple-fruit#fruit-apple#
  • proto/RE/demo/grep.py

    r3414 r3415  
    44# grep Program flow.
    55#
    6 # <STDIN> -> Transpose -> ClassifyBytes -> Match -> Matchlines -> STDOUT
     6# StreamInput (STDIN) -> Transpose -> ClassifyBytes -> Match -> MatchLines -> (FilterMatchLines) -> StreamOutput (STDOUT)
    77#                                                                     
     8# Notes/Observations:
     9#
     10# 1) Both sequential and parallel work is performed.
     11#   
     12#       For example,
     13#       
     14#       a) sequential iteration over the matches and line feed streams in the 'Match' routine
     15#       b) sequential and/or parallel output of marked lines in the 'CompressLines' / 'WriteLines' routine     
     16#
     17# 2) Creation of spans are problematic at the start of streams.
     18#
     19#       Potential solutions:
     20#
     21#       a) insert a start of span marker in the source byte stream
     22#       b) push to the problem onto the developer
     23#       c) add code to handle this case
     24#
    825#
    926# Ken Herdy
     
    3148       
    3249class Matches():
    33         m = 0
     50        all_matches = 0
     51
     52class Lines():
     53        all_lines = 0
     54
     55class Output():
     56        lines = 0
    3457
    3558def ClassifyBytes(basis, lex): 
     
    6487        m3 = pablo.Advance(m2) & lex.l
    6588        m4 = pablo.Advance(m3) & lex.e
    66         matches.m = m4
     89        matches.all_matches = m4
    6790
    68 def
     91def MatchLines(lex, matches, lines):
     92        last_start = 0 # last_start = pablo.First(), last_start = pablo.Mark(0) ?
     93        LF_or_match = lex.LF | matches.all_matches
     94        cursor = pablo.ScanToFirst(LF_or_match)
    6995
    70 basis = Basis()
    71 lex = Lex()
     96        while(pablo.inFile(cursor)):
     97                if(cursor & matches.all_matches):
     98                        next_end = pablo.AdvanceThenScanTo(cursor, lex.LF)
     99                        #next_end = pablo.Advance(cursor)
     100                        #next_end = pablo.ScanTo(cursor, lex.LF)
     101                        lines.all_lines |= pablo.ExclusiveSpan(last_start, next_end) | next_end # e but not s hmmm?
     102                        cursor = next_end
     103                if(cursor & lex.LF):
     104                        last_start = cursor
     105                cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match)
     106                #cursor = pablo.Advance(cursor)
     107                #cursor = pablo.ScanTo(cursor, LF_or_match)
     108               
     109def FilterMatchLines(u8data, output):
     110        output.lines = pablo.filter_bytes(u8data, ~lines.all_lines)
     111       
     112
     113
     114basis   = Basis()
     115lex     = Lex()
    72116matches = Matches()
     117lines   = Lines()
     118output  = Output()
    73119
    74120if __name__ == "__main__":
     
    79125                ClassifyBytes(basis, lex)
    80126                Match(lex, matches)
     127                MatchLines(lex, matches, lines)
     128                FilterMatchLines(u8data, output)
     129
    81130                lgth = len(u8data)
    82                 print "source data: " + u8data
    83                 print "lex.a        " + pablo.bitstream2string(lex.a, lgth)
    84                 print "lex.p        " + pablo.bitstream2string(lex.p, lgth)
    85                 print "lex.l        " + pablo.bitstream2string(lex.l, lgth)
    86                 print "lex.e        " + pablo.bitstream2string(lex.e, lgth)
    87                 print "lex.LF       " + pablo.bitstream2string(lex.LF, lgth)
    88                 print "matche.m     " + pablo.bitstream2string(matches.m, lgth)
     131                print "stream input data:       " + u8data
     132                print "lex.a                    " + pablo.bitstream2string(lex.a, lgth)
     133                print "lex.p                    " + pablo.bitstream2string(lex.p, lgth)
     134                print "lex.l                    " + pablo.bitstream2string(lex.l, lgth)
     135                print "lex.e                    " + pablo.bitstream2string(lex.e, lgth)
     136                print "lex.LF                   " + pablo.bitstream2string(lex.LF, lgth)
     137                print "matches.all_matches      " + pablo.bitstream2string(matches.all_matches, lgth)
     138                print "lines.all_lines          " + pablo.bitstream2string(lines.all_lines, lgth)
     139                print "stream output data       " + output.lines
    89140               
    90141        else:
  • proto/RE/demo/pablo.py

    r3414 r3415  
    165165
    166166def SpanUpTo(starts, ends):
    167         return (ends - starts)
     167        if(starts == 0):
     168                return (ends - 1)
     169        else:
     170                return (ends - starts)
    168171
    169172def InclusiveSpan(starts, ends):
    170         return (ends - starts) | ends
     173        if(starts == 0):
     174                return (ends - 1)
     175        else:
     176                return (ends - starts) | ends
    171177
    172178def ExclusiveSpan(starts, ends):
    173         return (ends - starts) &~ starts
     179        if(starts == 0):
     180                return (ends - 1) | 1
     181        else:
     182                return (ends - starts) &~ starts
    174183
    175184
Note: See TracChangeset for help on using the changeset viewer.