Changeset 3741


Ignore:
Timestamp:
Mar 26, 2014, 5:55:08 PM (3 years ago)
Author:
ksherdy
Message:

Ironed out segment-at-a-time processing model. Updated test cases.

Location:
proto/RE/demo
Files:
5 added
5 edited
7 moved

Legend:

Unmodified
Added
Removed
  • proto/RE/demo/Makefile

    r3740 r3741  
    77
    88PABLO_DEMO=grep_demo.py
    9 INFILE=test/fruitlist7.dat
     9INFILE=test/valid/fruitlist0.dat
    1010
    1111PABLO_DEBUG=-d
  • proto/RE/demo/grep.py

    r3739 r3741  
    11#
    2 # grep - A very simple example to demonstrate grep with parallel bit streams.
     2# grep - A very simple example to demonstrate 'fixed-string' grep with parallel bit streams.
    33#
    44#        This prototypes demonstrates a compilable prototype with
     
    2222# ... StreamOutput' -> Transpose' -> MatchLF -> PopCount -> StreamOutPut''
    2323#   
    24 # ' marks denote missing functionality which prevent the direct compilation of Pablo code to C/C++ code
     24# ' marks denote missing functionality which prevents
     25# the direct compilation of Pablo code to C/C++ code
    2526#
    2627# Observations:
    2728#
    28 # 1) Both sequential and bit parall_matchedel computations are performed.
     29# 1) Both sequential and bit parallel computations are performed.
    2930#   
    3031#    Examples include:
     
    3839#               
    3940#    a) formalize the meaning of the expressions of the language as expressions
    40 #    operating on sequences (streams) of 2^k bit data items both preceded and succeeded by an unbounded number of
    41 #    2^k with zero value items.
     41#    operating on sequences (streams) of 2^k bit data items both preceded and
     42#    succeeded by an unbounded number of 2^k with zero value items.
    4243#
    43 #    i.e. leverage denotational semenatics to define what parall_matchedel bit stream programs do.
    44 #
    45 #    b) define the 'line starts' stream as line_starts = ~Adv~(0) | Advance(lines_end_marks)
    46 #
    47 #    This technique can then be generalized to create spans in the case ends but not starts are marked.
     44#    b) define the 'line starts' stream as line_starts = ~Adv(0) ^ ~0
    4845#               
    4946# 3) Options such as -c represent separate branches in the underlying stream graph.
    5047#
    51 # 4) Support of pipes and filter type applications requires subgraph composition but only a single transposition step.
    52 #    As in SPL a composite program structure could support UNIX-style pipe and filters archicture for
    53 #    parall_matchedel bit stream apps, e.g. composite { graph { /* program flow */ } }
     48# 4) Support of pipes and filter type applications requires subgraph
     49#    composition but only a single transposition step.
     50#    As in SPL a composite program structure could support
     51#    UNIX-style pipe and filters archicture for
     52#    parall_matchedel bit stream apps,
     53#    e.g. composite { graph { /* program flow */ } }
    5454#
    5555import sys
     
    7676
    7777class Output():
    78         matches                 = 0
     78        match_follows   = 0
    7979        lines                   = 0
    8080        line_starts             = 0
    8181        line_ends               = 0
    82         all_starts              = 0
    8382       
    8483def ClassifyBytes(basis_bits, lex): 
     
    116115        cursor = pablo.Advance(cursor & lex.l)
    117116        cursor = pablo.Advance(cursor & lex.e)
    118         output.matches = cursor
     117        output.match_follows = cursor
    119118
    120119def MatchLines(lex, output):
    121120       
    122121        all_line_starts   = pablo.ScanToFirst(~lex.LF) | (pablo.Advance(lex.LF) &~ lex.LF)
    123         output.all_starts = all_line_starts
    124122        all_line_ends     = lex.LF
    125123
     
    132130                        last_line_start = cursor
    133131                       
    134                 if(cursor & output.matches):
     132                if(cursor & output.match_follows):
    135133                        cursor = pablo.ScanTo(cursor, lex.LF)
    136134                        output.lines |= pablo.InclusiveSpan(last_line_start, cursor) # LF | match
    137135               
    138                 cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.matches)
     136                cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.match_follows)
    139137
    140138        output.line_starts = output.lines & all_line_starts
    141139        output.line_ends   = pablo.ScanTo(output.line_starts, output.lines & all_line_ends)
    142 
    143 
    144140
    145141###
  • proto/RE/demo/grep_demo.py

    r3739 r3741  
    11#
    2 # grep - A very simple example to demonstrate grep with parallel bit streams.
    3 #
     2# grep - A very simple example to demonstrate 'fixed-string' grep with parallel bit streams.
     3#'
    44#        This prototypes demonstrates a compilable prototype with
    55#        minimal reliance on complementary hand-written template code.
     
    77#        language which prevent the elimination of hand-written templates.
    88#
    9 #  LF    -> trailing line feeds as part of a matched line
    10 #  NO LF -> GNU grep - trailing line feeds are not part of a matched line
    11 #
    12 #
    13 # Ken Herdy
    14 # July 29, 2013
    15 #
    16 # grep program flow
    17 #
    18 # StreamInput' -> Transpose' -> ClassifyBytes -> Match -> MatchLines -> FilterMatchLines -> StreamOutput'
    19 #                                                                 
    20 # grep -c program flow
    21 #
    22 # ... MatchLines -> CountLines -> StreamOutput''
    23 #
    24 # grep | wc -l program flow
    25 #
    26 # ... StreamOutput' -> Transpose' -> MatchLF -> PopCount -> StreamOutPut''
    27 #   
    28 # ' marks denote missing functionality which prevent the direct compilation of Pablo code to C/C++ code
    29 #
    30 # Observations:
    31 #
    32 # 1) Both sequential and bit parall_matchedel computations are performed.
    33 #   
    34 #    Examples include:
    35 #       
    36 #          a) sequential iteration over the matches and line feed streams in the 'Match' routine
    37 #          b) sequential and/or parall_matchedel output of marked lines in the 'CompressLines' / 'WriteLines' routine   
    38 #
    39 # 2) Creation of spans are problematic at the start of an input data stream.
    40 #
    41 #    A solution approach to this problem is to:
    42 #               
    43 #    a) formalize the meaning of the expressions of the language as expressions
    44 #    operating on sequences (streams) of 2^k bit data items both preceded and succeeded by an unbounded number of
    45 #    2^k with zero value items.
    46 #
    47 #    i.e. leverage denotational semenatics to define what parall_matchedel bit stream programs do.
    48 #
    49 #    b) define the 'line starts' stream as line_starts = ~Adv~(0) | Advance(lines_end_marks)
    50 #
    51 #    This technique can then be generalized to create spans in the case ends but not starts are marked.
    52 #               
    53 # 3) Options such as -c represent separate branches in the underlying stream graph.
    54 #
    55 # 4) Support of pipes and filter type applications requires subgraph composition but only a single transposition step.
    56 #    As in SPL a composite program structure could support UNIX-style pipe and filters archicture for
    57 #    parall_matchedel bit stream apps, e.g. composite { graph { /* program flow */ } }
    58 #
     9
    5910import sys
    6011import pablo
     
    8031       
    8132class Output():
    82         matches                 = 0
     33        match_follows   = 0
    8334        lines                   = 0
    8435        line_starts             = 0
    8536        line_ends               = 0
    86         byte_data               = 0
    8737
    8838def ClassifyBytes(basis_bits, lex): 
     
    12070        cursor = pablo.Advance(cursor & lex.l)
    12171        cursor = pablo.Advance(cursor & lex.e)
    122         output.matches = cursor
     72        output.match_follows = cursor
    12373
    12474#   Mark '_ends'
     
    13181
    13282def MatchLines(lex, output):
    133 
     83       
    13484        all_line_starts   = pablo.ScanToFirst(~lex.LF) | (pablo.Advance(lex.LF) &~ lex.LF)
    135         output.all_starts = all_line_starts
    13685        all_line_ends     = lex.LF
    13786
     
    14493                        last_line_start = cursor
    14594                       
    146                 if(cursor & output.matches):
     95                if(cursor & output.match_follows):
    14796                        cursor = pablo.ScanTo(cursor, lex.LF)
    148                         output.lines |= pablo.InclusiveSpan(last_line_start, cursor) # LF | match
     97                        output.lines |= pablo.InclusiveSpan(last_line_start, cursor)
    14998               
    150                 cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.matches)
     99                cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.match_follows)
    151100
    152101        output.line_starts = output.lines & all_line_starts
    153102        output.line_ends   = pablo.ScanTo(output.line_starts, output.lines & all_line_ends)
    154103
    155 ###
    156 ###  KH: Fails to compile. Root cause not yet found.
    157 ###
     104# Progressively marks line_starts and line_ends.
    158105def MatchLines2(lex, output):
    159106
    160107        all_line_starts   = pablo.ScanToFirst(~lex.LF) | (pablo.Advance(lex.LF) &~ lex.LF)
    161         output.all_starts = all_line_starts
    162108        all_line_ends     = lex.LF
    163109
     
    170116                        last_line_start = cursor
    171117                       
    172                 if(cursor & output.matches):
     118                if(cursor & output.match_follows):
    173119                        output.line_starts |= last_line_start                   
    174120                        cursor = pablo.ScanTo(cursor, lex.LF)
     
    176122                        output.lines |= pablo.InclusiveSpan(last_line_start, cursor) # LF | match
    177123               
    178                 cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.matches)
     124                cursor = pablo.AdvanceThenScanTo(cursor, all_line_starts | output.match_follows)
    179125               
    180126def FilterMatchLines(data, output):
     
    213159        ClassifyBytes(basis_bits, lex)
    214160        Match(lex, output)
    215         MatchLines(lex, output)
     161        #MatchLines(lex, output)
     162        MatchLines2(lex, output)
    216163
    217164  # WriteStreamOutput(Output)
     
    236183                print "lex.e                    " + pablo.bitstream2string(lex.e, lgth)
    237184                print "lex.LF                   " + pablo.bitstream2string(lex.LF, lgth)
    238                 print "output.matches           " + pablo.bitstream2string(output.matches, lgth)
     185                print "output.match_follows     " + pablo.bitstream2string(output.match_follows, lgth)
    239186                print "output.line_starts       " + pablo.bitstream2string(output.line_starts, lgth)
    240187                print "output.line_ends         " + pablo.bitstream2string(output.line_ends, lgth)
  • proto/RE/demo/grep_template_segment.cpp

    r3740 r3741  
    1616 * b. Support multiple segments a.k.a. buffer-at-a-time.
    1717 * c. Compile in s2k. <--
    18  * d. s2k iterators for (follow, leader) in (follow_stream, leader_stream) do { }
     18 * d. s2k iterators for (start, follow) in (starts_stream, follows_stream) do { }
    1919 *
     20 * Segment-at-a-time Processing Issues:
     21 *
     22 * 1. Start-of-stream or equivalently start-of-segment clear()...
     23 *    The ScanTo and EOF_mask processing ensure a 'fence post' at the
     24 *    end of each segment as well as at the end of file.
     25 *
    2026 * 'grep.py' and 'grep_template.cpp' are tightly coupled on a number of variables:
    2127 *
     
    2430 * 3. Sequential iterators (scanners) expect 'output.matches'.
    2531 * 4. Sequential iterators (scanners) expect 'lex.LF'.
     32 *
    2633 **/
     34
     35#define DEBUG 0
    2736
    2837// Platform independent runtime support libraries and C++ libraries.
     
    108117// Segment-at-a-time parameters.
    109118int bytes_read              = 0;
    110 int bytes_avail             = 0;
     119int bytes_avail             = 0;  // wrt current segment
    111120int bytes_remaining         = 0;
    112121
     
    114123int copy_back_offset        = 0;
    115124
    116 int block_index             = 0; // wrt current segment
    117 int block_base              = 0; // ?
     125int block_index             = 0;
     126int block_base              = 0;
    118127
    119128//int segment_index           = 0; // segment index wrt current buffer  // unused
     
    122131int stream_base             = 0;
    123132
    124 int match_offset            = 0;
    125 int line_start_offset       = 0;  // ?
    126 int line_end_offset         = -1; // ?
    127 
    128 int line_starts_final_offset     = 0;
    129 int line_ends_final_offset       = 0;
     133int match_offset            = 0; // 0-based
     134int line_start_offset       = 0; 
     135int line_end_offset         = 0;
     136
     137int line_final_start_offset = 0;
     138int line_final_end_offset   = 0;
    130139
    131140int main(int argc, char * argv[]) {
     
    196205
    197206  if(print_version_option) {
    198       fprintf(outfile, "grep static fixed-string parallel bit stream string.: March 2014\n");
     207      fprintf(outfile, "grep static fixed-string parallel bit streams.: March 2014\n");
    199208  }
    200209
     
    204213  char * byte_data = buffer;
    205214
    206   // Marker stream iterator.
    207   BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> match_scanner;
     215  // Scanners
     216  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> matches_scanner;
    208217  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_starts_scanner;
    209218  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_ends_scanner;
     
    216225    bytes_remaining = bytes_avail;
    217226   
    218    
    219    
    220     //if(feof(infile)) { KH: No! OPC /*bytes_remaining--;*/ }
     227//    if(feof(infile))
     228//        && (0 == bytes_remaining)) {
     229//        if(infile) { fclose(infile); infile=NULL;}
     230//        if(outfile) { fclose(outfile); outfile=NULL;}
     231//        break;
     232//    }
     233   
    221234    if(ferror(infile)) { perror( "io error" ); exit(1); }
    222235
     
    230243       
    231244      if(only_matching) { 
    232         match_scanner.init();
     245        matches_scanner.init();
    233246      }
    234247     
     
    248261
    249262        if(only_matching) { 
    250           match_scanner.load_block(output.matches, block_index);
     263          matches_scanner.load_block(output.match_follows, block_index);
    251264        }
    252265
     
    258271   
    259272      if(only_matching) {
    260         while(match_scanner.has_next()) {
    261           match_offset = match_scanner.scan_to_next() - pattern_size;
     273        while(matches_scanner.has_next()) {
     274          match_offset = matches_scanner.scan_to_next() - pattern_size;
    262275          if(byte_offset) {
    263276              int match_stream_offset = stream_base + match_offset;
     
    272285        }
    273286       
    274         copy_back_size      = pattern_size;           
     287        copy_back_size      = pattern_size + 1;           
    275288        copy_back_offset    = bytes_avail - copy_back_size;           
    276289      }
     
    281294         
    282295        //if(has_line_start) {
    283             line_starts_final_offset = line_starts_scanner.get_final_pos();
     296            line_final_start_offset = line_starts_scanner.get_final_pos();
    284297        //}
    285298        //if(has_line_end) {
    286             line_ends_final_offset = line_ends_scanner.get_final_pos();
     299            line_final_end_offset = line_ends_scanner.get_final_pos();
    287300        //}
    288301           
     
    301314        }
    302315       
    303         copy_back_offset   = (line_starts_final_offset > line_ends_final_offset) ? line_starts_final_offset : (line_ends_final_offset + 1) ;
     316        copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
    304317        copy_back_size     = bytes_avail - copy_back_offset;   
    305318
     319        assert(("copy_back_offset", (copy_back_offset >= 0)));
     320        assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));           
     321        assert(("copy_back_size", (copy_back_size >= 0)));
     322        assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
     323       
    306324      }
    307325     
     
    316334       
    317335        if(only_matching) {
    318           match_scanner.init();
     336          matches_scanner.init();
    319337        }
    320338       
     
    332350   
    333351          if(only_matching) {
    334             match_scanner.load_block(output.matches, block_index);
     352            matches_scanner.load_block(output.match_follows, block_index);
    335353          }       
    336354         
     
    351369       // Compiler 'do_final_block()' calls.
    352370       @final_block_stmts
    353    
    354        //print_register<BitBlock>("EOF_mask", EOF_mask);
    355371       
    356372       if(only_matching) {
    357           match_scanner.load_block(output.matches & EOF_mask, block_index);
     373          matches_scanner.load_block(output.match_follows & EOF_mask, block_index);
    358374       }
    359375         
     
    364380
    365381       if(only_matching) {
    366           while(match_scanner.has_next()) {
    367             match_offset = match_scanner.scan_to_next() - pattern_size;
     382          while(matches_scanner.has_next()) {
     383            match_offset = matches_scanner.scan_to_next() - pattern_size;
    368384            if(byte_offset) {
    369385                int match_stream_offset = stream_base + match_offset;
     
    373389            // KH: Lookahead.
    374390            fwrite(&buffer[match_offset], 1, pattern_size, outfile);
    375             //fprintf(outfile, "%s\n", fixed_pattern);
    376391            fprintf(outfile, "\n");
    377392          }
    378393         
    379           copy_back_size      = pattern_size;           
     394          copy_back_size      = pattern_size + 1;           
    380395          copy_back_offset    = bytes_avail - copy_back_size;             
    381396        }
     
    383398        if(!only_matching) {
    384399           
    385             //assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next())); 
     400            assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next())); 
    386401                       
    387402            //if(has_line_start) {
    388                 line_starts_final_offset = line_starts_scanner.get_final_pos();
     403                line_final_start_offset = line_starts_scanner.get_final_pos();
    389404            //}
    390405            //if(has_line_end) {
    391                 line_ends_final_offset = line_ends_scanner.get_final_pos();
     406                line_final_end_offset = line_ends_scanner.get_final_pos();
    392407            //}
    393408               
     
    406421            }
    407422
    408             copy_back_offset   = (line_starts_final_offset > line_ends_final_offset) ? line_starts_final_offset : (line_ends_final_offset + 1) ;
     423            copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
    409424            copy_back_size     = bytes_avail - copy_back_offset;
     425           
     426            assert(("copy_back_offset", (copy_back_offset >= 0)));
     427            assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));           
     428            assert(("copy_back_size", (copy_back_size >= 0)));
     429            assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
     430           
    410431        }       
    411432    }
    412433   
    413     if(0) {
    414         cout << "bytes_avail: " << bytes_avail << endl;
    415         cout << "line_starts_final_offset: " << line_starts_final_offset << endl;
    416         cout << "line_ends_final_offset: " << line_ends_final_offset << endl;
    417         cout << "offset: " << copy_back_offset << ", " << "size: " << copy_back_size << endl;
     434    if(DEBUG) {
     435        printf("bytes_avail: %d\n", bytes_avail);
     436        printf("bytes_remaining: %d\n", bytes_remaining);
     437        printf("copy_back_offset: %d\n", copy_back_offset);
     438        printf("copy_back_size: %d\n", copy_back_size);       
     439        printf("final_line_starts_offset: %d\n", line_final_start_offset);
     440        printf("final_line_ends_offset: %d\n", line_final_end_offset);
    418441    }
    419442   
    420     assert(("copy_back_offset", (copy_back_offset >= 0)));
    421     assert(("copy_back_size", (copy_back_offset < SEGMENT_SIZE)));
    422     assert(("copy_back_size", (copy_back_size >= 0)));
    423     assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
    424    
    425443    memmove(&buffer[0], &buffer[copy_back_offset], copy_back_size);
    426444   
    427     // pablo.ScanToFirst() must clear carry-in at the start of each segment.
     445    // pablo.ScanToFirst() must clear carry-in at the start of each segment
    428446    classifyBytes.clear();
    429447    match.clear();
     
    432450    stream_base += bytes_avail;
    433451    stream_base -= copy_back_size;
    434   }
    435 
    436   if(infile) { fclose(infile); }
    437   if(outfile) { fclose(outfile); }
     452
     453  }
     454
     455  if(infile) { fclose(infile); infile=NULL;}
     456  if(outfile) { fclose(outfile); outfile=NULL;}
    438457 
    439458  return 0;
  • proto/RE/demo/src/Makefile

    r3740 r3741  
    22FIXED_PATTERN=apple
    33OUTFILE=grep
    4 TESTDIR=../test
    5 TESTFILE0=../test/fruitlist0.dat
    6 TESTFILE1=../test/fruitlist1.dat
    7 TESTFILE2=../test/fruitlist2.dat
    8 TESTFILE3=../test/fruitlist3.dat
    9 TESTFILE4=../test/fruitlist4.dat
     4TESTDIR=../test/valid
     5TESTFILE=../test/valid/fruitlist0.dat
    106
    117CC= g++ $(CFLAGS)
     
    3329
    3430test: $(OUTFILE)
    35         for file in ../test/*.dat ; do \
     31        for file in $(TESTDIR)/*.dat ; do \
    3632                # echo $$file ; \
    3733        ./$(OUTFILE) $$file > 0 ; \
     
    4137
    4238test_b: $(OUTFILE)
    43         for file in ../test/*.dat ; do \
     39        for file in $(TESTDIR)/*.dat ; do \
    4440                # echo $$file ; \
    4541        ./$(OUTFILE) $$file -b > 0 ; \
     
    4945
    5046test_o: $(OUTFILE)
    51         for file in ../test/*.dat ; do \
     47        for file in $(TESTDIR)/*.dat ; do \
    5248                # echo $$file ; \
    5349        ./$(OUTFILE) $$file -o > 0 ; \
     
    5854   
    5955test_b_o: $(OUTFILE)
    60         for file in ../test/*.dat ; do \
     56        for file in $(TESTDIR)/*.dat ; do \
    6157                # echo $$file ; \
    6258        ./$(OUTFILE) $$file -o -b > 0 ; \
Note: See TracChangeset for help on using the changeset viewer.