Ignore:
Timestamp:
Aug 2, 2013, 1:39:42 PM (6 years ago)
Author:
ksherdy
Message:

Grep demo clean up / notes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/demo/grep_demo.py

    r3419 r3422  
    11#
    2 # grep - A very simple example to match lines containing the fixed string 'apple'.
     2# grep -        A very simple example to demonstrate grep with parallel bit streams.
     3#
     4#                                       This prototypes demonstrates a compilable prototype with
     5#                                 minimal reliance on complementary hand-written template code.
     6#                                       In addition, this examples draws attention to missing features in the Pablo
     7#                                       language which prevent the elimination of hand-written templates.
    38#
    49# Ken Herdy
    510# July 29, 2013
    611#
    7 #
    812# grep program flow
    913#
    10 # StreamInput' -> PrependLF' -> Transpose' -> ClassifyBytes -> Match -> MatchLines -> FilterMatchLines -> StreamOutput'
     14# StreamInput' -> Transpose' -> ClassifyBytes -> Match -> MatchLines -> FilterMatchLines -> StreamOutput'
    1115#                                                                 
    1216# grep -c program flow
     
    1822# ... StreamOutput' -> Transpose' -> MatchLF -> PopCount -> StreamOutPut''
    1923#   
    20 # Notes/Observations:
    21 #
    22 # 1) Both sequential and parallel work is performed.
     24# ' marks denote missing functionality which prevent the direct compilation of Pablo code to C/C++ code
     25#
     26# Observations:
     27#
     28# 1) Both sequential and bit parallel computations are performed.
    2329#   
    24 #       For example,
     30#    Examples include:
    2531#       
    26 #       a) sequential iteration over the matches and line feed streams in the 'Match' routine
    27 #       b) sequential and/or parallel output of marked lines in the 'CompressLines' / 'WriteLines' routine     
     32#          a) sequential iteration over the matches and line feed streams in the 'Match' routine
     33#          b) sequential and/or parallel output of marked lines in the 'CompressLines' / 'WriteLines' routine   
    2834#
    29 # 2) Creation of spans are problematic at the start of streams.
    30 #
    31 #       Potential solutions:
    32 #
    33 #       a) prototype code handles with modification to source input stream, ie. srli<1> input stream | pablo<8>.SetValue(0,LF)
    34 #
    35 #    pablo.SetValue<8>(integer index, integer value)
    36 #
    37 #       b) prototype code handles with modification to pablo.SpanUpTo, pablo.InclusiveSpan, pablo.ExclusiveSpan
    38 #
    39 #       c) prototype code handles with 'artificial 1 bit set at 0th byte index'
    40 #    then OR in a 1-bit at 0th position 'if while' versus 'while'
    41 #
     35# 2) Creation of spans are problematic at the start of an input data stream.
     36#
     37#    A solution approach to this problem is to:
     38#               
     39#    a) formalize the meaning of the expressions of the language as expressions
     40#    operating on sequences (streams) of 2^k bit data items both preceded and succeeded by an unbounded number of
     41#    2^k with zero value items.
     42#
     43#    i.e. leverage denotational semenatics to define what parallel bit stream programs do.
     44#
     45#    b) define the 'line starts' stream as line_starts = ~Adv~(0) | Advance(lines_end_marks)
     46#
     47#    This technique can then be generalized to create spans in the case ends but not starts are marked.
     48#               
    4249# 3) Options such as -c represent separate branches in the underlying stream graph.
    4350#
     
    112119        matches.all_matches = m4
    113120
    114 def MatchLines(lex, matches, lines):
     121# not compilable - requires pablo.First()
     122def MatchLines1(lex, matches, lines):
    115123        global lgth
    116124
    117         last_start = 0 # last_start = pablo.First(), last_start = pablo.Mark(0) ?
     125        last_start = pablo.First()
    118126        LF_or_match = lex.LF | matches.all_matches
    119127        cursor = pablo.ScanToFirst(LF_or_match)
     
    122130                if(cursor & matches.all_matches):
    123131                        next_end = pablo.AdvanceThenScanTo(cursor, lex.LF)
    124                         #next_end = pablo.Advance(cursor)
    125                         #next_end = pablo.ScanTo(cursor, lex.LF)
    126                         lines.all_lines |= pablo.ExclusiveSpan(last_start, next_end) | next_end
     132                        lines.all_lines |= pablo.InclusiveSpan(last_start, next_end) | next_end
    127133                        cursor = next_end
    128134                if(cursor & lex.LF):
    129                         last_start = cursor
     135                        last_start = pablo.Advance(cursor)
    130136                cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match)
    131                
    132                 #print "cursor                    " + pablo.bitstream2string(cursor, lgth)
    133                 #cursor = pablo.Advance(cursor)
    134                 #cursor = pablo.ScanTo(cursor, LF_or_match)
    135                
     137
     138# compilable
     139def MatchLines2(lex, matches, lines):
     140        global lgth
     141
     142        last_start = 0
     143        last_start = pablo.Advance(~last_start) ^ ~last_start
     144        LF_or_match = lex.LF | matches.all_matches
     145        cursor = pablo.ScanToFirst(LF_or_match)
     146
     147        while(pablo.inFile(cursor)):
     148                if(cursor & matches.all_matches):
     149                        next_end = pablo.AdvanceThenScanTo(cursor, lex.LF)
     150                        lines.all_lines |= pablo.InclusiveSpan(last_start, next_end) | next_end
     151                        cursor = next_end
     152                if(cursor & lex.LF):
     153                        last_start = pablo.Advance(cursor)
     154                cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match)
     155                               
    136156def FilterMatchLines(data, output):
    137157        output.lines = pablo.filter_bytes(data, ~lines.all_lines)
     
    147167        option_parser = grep_demo_config.get_option_parser()
    148168        options, args = option_parser.parse_args(sys.argv[1:])
    149 
    150         print len(args)
    151169
    152170        if len(args) != 1:
     
    163181        lgth = len(data)
    164182
    165   # PrependLF(data)
    166 
    167183  # Transpose(data, basis)
    168184        pablo.EOF_mask = pablo.transpose_streams(data, basis_bits)
    169185        ClassifyBytes(basis_bits, lex)
    170186        Match(lex, matches)
    171         MatchLines(lex, matches, lines)
     187        MatchLines1(lex, matches, lines)
    172188        FilterMatchLines(data, output)
    173189
     
    177193    # WriteStreamOutput(Output)
    178194          lgth = len(data)
    179           print "\ninput data:     \n\n" + data
     195          print "Raw Input:\n" + data
     196          print "---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------"
     197          print "data.replace('LF','$')   " + data.replace("\n","$")
    180198          print "lex.a                    " + pablo.bitstream2string(lex.a, lgth)
    181199          print "lex.p                    " + pablo.bitstream2string(lex.p, lgth)
     
    185203          print "matches.all_matches      " + pablo.bitstream2string(matches.all_matches, lgth)
    186204          print "lines.all_lines          " + pablo.bitstream2string(lines.all_lines, lgth)
    187           print "\noutput data:    \n\n" + output.lines
     205          print "---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------"
     206          print "Raw Output:" + output.lines
    188207               
    189208       
Note: See TracChangeset for help on using the changeset viewer.