Ignore:
Timestamp:
Nov 6, 2016, 8:37:11 PM (3 years ago)
Author:
nmedfort
Message:

Initial work on adding types to PabloAST and mutable Var objects.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5191 r5202  
    6262    //  input: 8 basis bit streams
    6363    //  output: 16 u8-indexed streams, + delmask stream + error stream
    64     PabloFunction * function = PabloFunction::Create("u8u16", 8, 18);
     64    PabloFunction * function = PabloFunction::Create("u8u16"); // , 1, 18
    6565    cc::CC_Compiler ccc(*function);
    6666   
    67     PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
    68     const std::vector<Var *> u8_bits = ccc.getBasisBits();
     67    PabloBuilder & main = ccc.getBuilder();
     68    const auto u8_bits = ccc.getBasisBits();
     69
     70    PabloAST * zeroes = main.createZeroes();
     71
    6972    // Outputs
    70     Assign * u16_hi[8];
    71     Assign * u16_lo[8];
    72     Assign * delmask;
    73     Assign * error_mask;
    74    
    75     // The logic for processing non-ASCII bytes is to be embedded within an if-hierarchy.
     73    Var * u16_hi[8];
     74    for (int i = 0; i < 8; ++i) {
     75        u16_hi[i] = main.createVar("u16_hi" + std::to_string(i), zeroes);
     76    }
     77    Var * u16_lo[8];
     78    for (int i = 0; i < 8; ++i) {
     79        u16_lo[i] = main.createVar("u16_lo" + std::to_string(i), zeroes);
     80    }
     81    Var * delmask = main.createVar("delmask", zeroes);
     82    Var * error_mask = main.createVar("error_mask", zeroes);
     83
     84
     85
     86    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
    7687    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
    7788   
    7889    // Builder for the if statement handling all non-ASCII logic
    79     PabloBuilder nAb = PabloBuilder::Create(pBuilder);
     90    PabloBuilder nAb = PabloBuilder::Create(main);
    8091    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
    8192    // produce the UTF-16 code unit data ...,
     
    97108    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
    98109    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
     110
     111
     112    Var * const u8scope32 = nAb.createVar("u8scope32", zeroes);
     113    Var * const u8scope33 = nAb.createVar("u8scope33", zeroes);
     114    Var * const u8scope44 = nAb.createVar("u8scope44", zeroes);
     115
    99116    //
    100117    // Logic for 4-byte UTF-8 sequences
     
    110127    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeCC(0x90, 0xBF), p4b));
    111128    PabloAST * F5_FF = ccc.compileCC(re::makeCC(0xF5, 0xFF), p4b);
    112     Assign * FX_err = p4b.createAssign("FX_err", p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
     129
     130    Var * FX_err = p34b.createVar("FX_err", zeroes);
     131    p4b.createAssign(FX_err, p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
    113132    //
    114133    // 4-byte prefixes have a scope that extends over the next 3 bytes.
    115     Assign * u8scope42 = p4b.createAssign("u8scope42", p4b.createAdvance(pfx4, 1));
    116     Assign * u8scope43 = p4b.createAssign("u8scope43", p4b.createAdvance(u8scope42, 1));
    117     Assign * u8scope44 = p4b.createAssign("u8scope44", p4b.createAdvance(u8scope43, 1));
     134
     135    Var * u8scope42 = p34b.createVar("u8scope42", zeroes);
     136    Var * u8scope43 = p34b.createVar("u8scope43", zeroes);
     137
     138    p4b.createAssign(u8scope42, p4b.createAdvance(pfx4, 1));
     139    p4b.createAssign(u8scope43, p4b.createAdvance(u8scope42, 1));
     140    p4b.createAssign(u8scope44, p4b.createAdvance(u8scope43, 1));
    118141    //
    119142   
     
    121144    //  we must calculate the value abcde - 1 to produce the bit values
    122145    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
    123     Assign * s43_lo1 = p4b.createAssign("scope43_lo1", p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
    124     Assign * s43_lo0 = p4b.createAssign("scope43_lo0", p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
     146    Var * s43_lo0 = nAb.createVar("scope43_lo0", zeroes);
     147    Var * s43_lo1 = nAb.createVar("scope43_lo1", zeroes);
     148    Var * s43_hi6 = nAb.createVar("scope43_hi6", zeroes);
     149    Var * s43_hi7 = nAb.createVar("scope43_hi7", zeroes);
     150
     151    Var * s43_lo2 = main.createVar("scope43_lo2", zeroes);
     152    Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
     153    Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
     154    Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
     155    Var * s43_lo6 = main.createVar("scope43_lo6", zeroes);
     156    Var * s43_lo7 = main.createVar("scope43_lo7", zeroes);
     157
     158    p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
     159    p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
    125160    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
    126     Assign * s43_hi7 = p4b.createAssign("scope43_hi7", p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
     161    p4b.createAssign(s43_hi7, p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
    127162    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
    128     Assign * s43_hi6 = p4b.createAssign("scope43_hi6", p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
    129     //
    130     Assign * s43_lo2 = p4b.createAssign("scope43_lo2", p4b.createAnd(u8scope43, bit4a1));
    131     Assign * s43_lo3 = p4b.createAssign("scope43_lo3", p4b.createAnd(u8scope43, bit5a1));
    132     Assign * s43_lo4 = p4b.createAssign("scope43_lo4", p4b.createAnd(u8scope43, bit6a1));
    133     Assign * s43_lo5 = p4b.createAssign("scope43_lo5", p4b.createAnd(u8scope43, bit7a1));
    134     Assign * s43_lo6 = p4b.createAssign("scope43_lo6", p4b.createAnd(u8scope43, u8_bits[2]));
    135     Assign * s43_lo7 = p4b.createAssign("scope43_lo7", p4b.createAnd(u8scope43, u8_bits[3]));
    136     //
    137     //
    138     p34b.createIf(pfx4,
    139                   {FX_err, u8scope42, u8scope43, u8scope44, s43_hi6, s43_hi7,
    140                    s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
    141                    p4b);
     163    p4b.createAssign(s43_hi6, p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
     164    //
     165    p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit4a1));
     166    p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit5a1));
     167    p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit6a1));
     168    p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit7a1));
     169    p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, u8_bits[2]));
     170    p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, u8_bits[3]));
     171    //
     172    //
     173    p34b.createIf(pfx4, p4b);
    142174    //
    143175    // Combined logic for 3 and 4 byte sequences
    144176    //
    145177    PabloAST * pfx3 = ccc.compileCC(re::makeCC(0xE0, 0xEF), p34b);
    146     Assign * u8scope32 = p34b.createAssign("u8scope32", p34b.createAdvance(pfx3, 1));
    147     Assign * u8scope33 = p34b.createAssign("u8scope33", p34b.createAdvance(u8scope32, 1));
     178
     179    p34b.createAssign(u8scope32, p34b.createAdvance(pfx3, 1));
     180    p34b.createAssign(u8scope33, p34b.createAdvance(u8scope32, 1));
    148181
    149182    // Illegal 3-byte sequences
     
    152185    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeCC(0x80, 0x9F), p34b));
    153186    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeCC(0xA0, 0xBF), p34b));
    154     Assign * EX_FX_err = p34b.createAssign("EX_FX_err", p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
     187    Var * EX_FX_err = nAb.createVar("EX_FX_err", zeroes);
     188
     189    p34b.createAssign(EX_FX_err, p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
    155190    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
    156191    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
    157192   
    158     Assign * p34del = p34b.createAssign("p34del", p34b.createOr(u8scope32, u8scope42));
     193    Var * p34del = nAb.createVar("p34del", zeroes);
     194    p34b.createAssign(p34del, p34b.createOr(u8scope32, u8scope42));
    159195
    160196
    161197    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
    162198    // UTF-8 sequences.
    163     u16_hi[0] = p34b.createAssign("u16_hi0", p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
    164     u16_hi[1] = p34b.createAssign("u16_hi1", p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
    165     u16_hi[2] = p34b.createAssign("u16_hi2", p34b.createAnd(u8scope33, bit6a2));
    166     u16_hi[3] = p34b.createAssign("u16_hi3", p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
    167     u16_hi[4] = p34b.createAssign("u16_hi4", p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
    168    
    169     //
    170     nAb.createIf(pfx34,
    171                  {u8scope33, EX_FX_err, p34del,
    172                   u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u8scope44, s43_hi6, s43_hi7,
    173                   s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
    174                  p34b);
     199    p34b.createAssign(u16_hi[0], p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
     200    p34b.createAssign(u16_hi[1], p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
     201    p34b.createAssign(u16_hi[2], p34b.createAnd(u8scope33, bit6a2));
     202    p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
     203    p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
     204   
     205    //
     206    nAb.createIf(pfx34, p34b);
    175207    //
    176208    // Combined logic for 2, 3 and 4 byte sequences
    177209    //
     210
     211    Var * u8lastscope = main.createVar("u8lastscope", zeroes);
     212
    178213    PabloAST * pfx2 = ccc.compileCC(re::makeCC(0xC0, 0xDF), nAb);
    179214    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
    180     Assign * u8lastscope = nAb.createAssign("u8lastscope", nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
     215    nAb.createAssign(u8lastscope, nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
    181216    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
    182217
    183218    PabloAST * C0_C1_err = ccc.compileCC(re::makeCC(0xC0, 0xC1), nAb);
    184219    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeCC(0x80, 0xBF), nAb));
    185     error_mask = nAb.createAssign("errormask", nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
    186     delmask = nAb.createAssign("delmask", nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
     220    nAb.createAssign(error_mask, nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
     221    nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
    187222   
    188223    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
    189224    // low byte are only nonzero for 2, 3 and 4 byte sequences.
    190     u16_hi[5] = nAb.createAssign("u16_hi5", nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
    191     u16_hi[6] = nAb.createAssign("u16_hi6", nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
    192     u16_hi[7] = nAb.createAssign("u16_hi7", nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
    193     u16_lo[0] = nAb.createAssign("u16_lo0", nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
    194     Assign * p234_lo1 = nAb.createAssign("p234_lo1", nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
    195 
    196     pBuilder.createIf(nonASCII,
    197                       {error_mask, delmask, u8lastscope,
    198                        u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u16_hi[5], u16_hi[6], u16_hi[7],
    199                        u16_lo[0], p234_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
    200                       nAb);
     225    nAb.createAssign(u16_hi[5], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
     226    nAb.createAssign(u16_hi[6], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
     227    nAb.createAssign(u16_hi[7], nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
     228    nAb.createAssign(u16_lo[0], nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
     229
     230    Var * p234_lo1 = main.createVar("p234_lo1", zeroes);
     231
     232    nAb.createAssign(p234_lo1, nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
     233
     234    main.createIf(nonASCII, nAb);
    201235    //
    202236    //
    203237    PabloAST * ASCII = ccc.compileCC(re::makeCC(0x0, 0x7F));
    204     PabloAST * last_byte = pBuilder.createOr(ASCII, u8lastscope);
    205     u16_lo[1] = pBuilder.createAssign("u16_lo1", pBuilder.createOr(pBuilder.createAnd(ASCII, u8_bits[1]), p234_lo1));
    206     u16_lo[2] = pBuilder.createAssign("u16_lo2", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[2]), s43_lo2));
    207     u16_lo[3] = pBuilder.createAssign("u16_lo3", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[3]), s43_lo3));
    208     u16_lo[4] = pBuilder.createAssign("u16_lo4", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[4]), s43_lo4));
    209     u16_lo[5] = pBuilder.createAssign("u16_lo5", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[5]), s43_lo5));
    210     u16_lo[6] = pBuilder.createAssign("u16_lo6", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[6]), s43_lo6));
    211     u16_lo[7] = pBuilder.createAssign("u16_lo7", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[7]), s43_lo7));
    212    
     238    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
     239    main.createAssign(u16_lo[1], main.createOr(main.createAnd(ASCII, u8_bits[1]), p234_lo1));
     240    main.createAssign(u16_lo[2], main.createOr(main.createAnd(last_byte, u8_bits[2]), s43_lo2));
     241    main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
     242    main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
     243    main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
     244    main.createAssign(u16_lo[6], main.createOr(main.createAnd(last_byte, u8_bits[6]), s43_lo6));
     245    main.createAssign(u16_lo[7], main.createOr(main.createAnd(last_byte, u8_bits[7]), s43_lo7));
     246   
     247    Var * output = function->addResult("output", getStreamTy(1, 18));
    213248    for (unsigned i = 0; i < 8; i++) {
    214         function->setResult(i, pBuilder.createAssign("u16_hi" + std::to_string(i), u16_hi[i]));
    215         function->setResult(i+8, pBuilder.createAssign("u16_lo" + std::to_string(i), u16_lo[i]));
    216     }
    217     function->setResult(16, pBuilder.createAssign("delbits", delmask));
    218     function->setResult(17, pBuilder.createAssign("errors", error_mask));
     249        main.createAssign(main.createExtract(output, i), u16_hi[i]);
     250    }
     251    for (unsigned i = 0; i < 8; i++) {
     252        main.createAssign(main.createExtract(output, i + 8), u16_lo[i]);
     253    }
     254    main.createAssign(main.createExtract(output, 16), delmask);
     255    main.createAssign(main.createExtract(output, 17), error_mask);
    219256
    220257    return function;
     
    233270    const unsigned bufferSegments = codegen::BufferSegments;
    234271   
    235     ExternalFileBuffer ByteStream(iBuilder, StreamSetType(1, i8));
    236     //SingleBlockBuffer BasisBits(iBuilder, StreamSetType(8, i1));
    237     CircularBuffer BasisBits(iBuilder, StreamSetType(8, i1), segmentSize * bufferSegments);
    238 
    239     //SingleBlockBuffer U8u16Bits(iBuilder, StreamSetType(18, i1));
    240     CircularBuffer U8u16Bits(iBuilder, StreamSetType(18, i1), segmentSize * bufferSegments);
    241 
    242     //SingleBlockBuffer U16Bits(iBuilder, StreamSetType(16, i1));
    243     CircularBuffer U16Bits(iBuilder, StreamSetType(16, i1), segmentSize * bufferSegments);
    244    
    245     //SingleBlockBuffer DeletionCounts(iBuilder, StreamSetType(1, i1));
    246     CircularBuffer DeletionCounts(iBuilder, StreamSetType(1, i1), segmentSize * bufferSegments );
    247    
     272    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(iBuilder,1, 8));
     273
     274    CircularBuffer BasisBits(iBuilder, StreamSetType(iBuilder,8, 1), segmentSize * bufferSegments);
     275
     276    CircularBuffer U8u16Bits(iBuilder, StreamSetType(iBuilder, 18, 1), segmentSize * bufferSegments);
     277
     278    CircularBuffer U16Bits(iBuilder, StreamSetType(iBuilder,16, 1), segmentSize * bufferSegments);
     279   
     280    CircularBuffer DeletionCounts(iBuilder, StreamSetType(iBuilder, 1, 1), segmentSize * bufferSegments);
     281
    248282    // Different choices for the output buffer depending on chosen option.
    249     ExternalFileBuffer U16external(iBuilder, StreamSetType(1, i16));
    250     LinearCopybackBuffer U16out(iBuilder, StreamSetType(1, i16), segmentSize * bufferSegments + 2);
     283    ExternalFileBuffer U16external(iBuilder, StreamSetType(iBuilder,1, 16));
     284    LinearCopybackBuffer U16out(iBuilder, StreamSetType(iBuilder,1, 16), segmentSize * bufferSegments + 2);
    251285
    252286    s2pKernel  s2pk(iBuilder);
     
    254288
    255289    pablo_function_passes(function);
    256     pablo::PabloKernel  u8u16k(iBuilder, "u8u16", function, {});
     290    pablo::PabloKernel u8u16k(iBuilder, "u8u16", function);
    257291    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits});
    258292   
    259     deletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
     293    DeletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
    260294    delK.generateKernel({&U8u16Bits}, {&U16Bits, &DeletionCounts});
    261295   
Note: See TracChangeset for help on using the changeset viewer.