Changeset 6246


Ignore:
Timestamp:
Dec 20, 2018, 7:55:35 AM (5 weeks ago)
Author:
cameron
Message:

Use little-endian bit numbering for u8u16

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r6228 r6246  
    9393    Var * error_mask = main.createVar("error_mask", zeroes);
    9494
    95     cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits, cc::BitNumbering::BigEndian);
     95    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits);
    9696
    9797    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
     
    100100    // Builder for the if statement handling all non-ASCII logic
    101101    auto nAb = main.createScope();
    102     // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
     102    // Bits 3 through LE0 of a 2-byte prefix are data bits, needed to
    103103    // produce the UTF-16 code unit data ...,
     104    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
    104105    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
    105     PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
    106     PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
    107     PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
    108     PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
     106    PabloAST * bit2a1 = nAb.createAdvance(u8_bits[2], 1);
     107    PabloAST * bit1a1 = nAb.createAdvance(u8_bits[1], 1);
     108    PabloAST * bit0a1 = nAb.createAdvance(u8_bits[0], 1);
    109109
    110110    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
     
    112112    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
    113113    auto p34b = nAb.createScope();
    114     // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
     114    // Bits LE3 through LE0 of a 3-byte prefix are data bits.  They must be moved
    115115    // to the final position of the 3-byte sequence.
    116     PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
    117     PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
    118     PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
    119     PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
    120     PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
     116    PabloAST * bit5a1 = p34b.createAdvance(u8_bits[5], 1);
     117    PabloAST * bit3a2 = p34b.createAdvance(bit3a1, 1);
     118    PabloAST * bit2a2 = p34b.createAdvance(bit2a1, 1);
     119    PabloAST * bit1a2 = p34b.createAdvance(bit1a1, 1);
     120    PabloAST * bit0a2 = p34b.createAdvance(bit0a1, 1);
    121121
    122122    Var * const u8scope32 = nAb.createVar("u8scope32", zeroes);
     
    153153    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
    154154    //  we must calculate the value abcde - 1 to produce the bit values
    155     //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
    156     Var * s43_lo0 = nAb.createVar("scope43_lo0", zeroes);
    157     Var * s43_lo1 = nAb.createVar("scope43_lo1", zeroes);
    158     Var * s43_hi6 = nAb.createVar("scope43_hi6", zeroes);
    159     Var * s43_hi7 = nAb.createVar("scope43_hi7", zeroes);
    160 
     155    //  for u16_hi1, hi0, lo7, lo6 at the scope43 position.
     156    Var * s43_lo7 = nAb.createVar("scope43_lo7", zeroes);
     157    Var * s43_lo6 = nAb.createVar("scope43_lo6", zeroes);
     158    Var * s43_hi1 = nAb.createVar("scope43_hi1", zeroes);
     159    Var * s43_hi0 = nAb.createVar("scope43_hi0", zeroes);
     160
     161    Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
     162    Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
     163    Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
    161164    Var * s43_lo2 = main.createVar("scope43_lo2", zeroes);
    162     Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
    163     Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
    164     Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
    165     Var * s43_lo6 = main.createVar("scope43_lo6", zeroes);
    166     Var * s43_lo7 = main.createVar("scope43_lo7", zeroes);
    167 
    168     p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
    169     p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
    170     PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
    171     p4b.createAssign(s43_hi7, p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
    172     PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
    173     p4b.createAssign(s43_hi6, p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
    174     //
    175     p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit4a1));
    176     p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit5a1));
    177     p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit6a1));
    178     p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit7a1));
    179     p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, u8_bits[2]));
    180     p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, u8_bits[3]));
     165    Var * s43_lo1 = main.createVar("scope43_lo1", zeroes);
     166    Var * s43_lo0 = main.createVar("scope43_lo0", zeroes);
     167
     168    p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, p4b.createNot(bit4a1)));           // e - 1
     169    p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, p4b.createXor(bit5a1, s43_lo6)));  // d - borrow
     170    PabloAST * brw1 = p4b.createAnd(s43_lo6, p4b.createNot(bit5a1));
     171    p4b.createAssign(s43_hi0, p4b.createAnd(u8scope43, p4b.createXor(bit0a2, brw1)));     // c - borrow
     172    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit0a2));
     173    p4b.createAssign(s43_hi1, p4b.createAnd(u8scope43, p4b.createXor(bit1a2, brw2)));     // b - borrow
     174    //
     175    p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit3a1));
     176    p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit2a1));
     177    p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit1a1));
     178    p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit0a1));
     179    p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, u8_bits[5]));
     180    p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, u8_bits[4]));
    181181    //
    182182    //
     
    207207    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
    208208    // UTF-8 sequences.
    209     p34b.createAssign(u16_hi[0], p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
    210     p34b.createAssign(u16_hi[1], p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
    211     p34b.createAssign(u16_hi[2], p34b.createAnd(u8scope33, bit6a2));
    212     p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
    213     p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
     209    p34b.createAssign(u16_hi[7], p34b.createOr(p34b.createAnd(u8scope33, bit3a2), surrogate));
     210    p34b.createAssign(u16_hi[6], p34b.createOr(p34b.createAnd(u8scope33, bit2a2), surrogate));
     211    p34b.createAssign(u16_hi[5], p34b.createAnd(u8scope33, bit1a2));
     212    p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit0a2), surrogate));
     213    p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit5a1), surrogate));
    214214
    215215    //
     
    233233    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
    234234    // low byte are only nonzero for 2, 3 and 4 byte sequences.
    235     nAb.createAssign(u16_hi[5], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
    236     nAb.createAssign(u16_hi[6], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
    237     nAb.createAssign(u16_hi[7], nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
    238     nAb.createAssign(u16_lo[0], nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
    239 
    240     Var * p234_lo1 = main.createVar("p234_lo1", zeroes);
    241 
    242     nAb.createAssign(p234_lo1, nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
     235    nAb.createAssign(u16_hi[2], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), u8scope44));
     236    nAb.createAssign(u16_hi[1], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), s43_hi1));
     237    nAb.createAssign(u16_hi[0], nAb.createOr(nAb.createAnd(u8lastscope, bit2a1), s43_hi0));
     238    nAb.createAssign(u16_lo[7], nAb.createOr(nAb.createAnd(u8lastscope, bit1a1), s43_lo7));
     239
     240    Var * p234_lo6 = main.createVar("p234_lo6", zeroes);
     241
     242    nAb.createAssign(p234_lo6, nAb.createOr(nAb.createAnd(u8lastscope, bit0a1), s43_lo6));
    243243
    244244    main.createIf(nonASCII, nAb);
     
    247247    PabloAST * ASCII = ccc.compileCC(re::makeByte(0x0, 0x7F));
    248248    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
    249     main.createAssign(u16_lo[1], main.createOr(main.createAnd(ASCII, u8_bits[1]), p234_lo1));
     249    main.createAssign(u16_lo[6], main.createOr(main.createAnd(ASCII, u8_bits[6]), p234_lo6));
     250    main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
     251    main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
     252    main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
    250253    main.createAssign(u16_lo[2], main.createOr(main.createAnd(last_byte, u8_bits[2]), s43_lo2));
    251     main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
    252     main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
    253     main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
    254     main.createAssign(u16_lo[6], main.createOr(main.createAnd(last_byte, u8_bits[6]), s43_lo6));
    255     main.createAssign(u16_lo[7], main.createOr(main.createAnd(last_byte, u8_bits[7]), s43_lo7));
     254    main.createAssign(u16_lo[1], main.createOr(main.createAnd(last_byte, u8_bits[1]), s43_lo1));
     255    main.createAssign(u16_lo[0], main.createOr(main.createAnd(last_byte, u8_bits[0]), s43_lo0));
    256256
    257257    Var * output = getOutputStreamVar("u16bit");
    258258    for (unsigned i = 0; i < 8; i++) {
    259         main.createAssign(main.createExtract(output, i), u16_hi[i]);
     259        main.createAssign(main.createExtract(output, i + 8), u16_hi[i]);
    260260    }
    261261    for (unsigned i = 0; i < 8; i++) {
    262         main.createAssign(main.createExtract(output, i + 8), u16_lo[i]);
     262        main.createAssign(main.createExtract(output, i), u16_lo[i]);
    263263    }
    264264    PabloAST * selectors = main.createInFile(main.createNot(delmask));
     
    279279    // Transposed bits from s2p
    280280    StreamSet * BasisBits = P->CreateStreamSet(8);
    281     P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits, cc::BitNumbering::BigEndian);
     281    P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
    282282
    283283    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
     
    301301        // Produce unswizzled UTF-16 bit streams
    302302        P->CreateKernelCall<SwizzleGenerator>(u16Swizzles, std::vector<StreamSet *>{u16bits});
    303         P->CreateKernelCall<P2S16Kernel>(u16bits, u16bytes, cc::BitNumbering::BigEndian);
     303        P->CreateKernelCall<P2S16Kernel>(u16bits, u16bytes);
    304304    } else {
    305305        P->CreateKernelCall<FieldCompressKernel>(b->getBitBlockWidth()/16, u8bits, selectors, u16bits);
    306         P->CreateKernelCall<P2S16KernelWithCompressedOutput>(u16bits, selectors, u16bytes, cc::BitNumbering::BigEndian);
     306        P->CreateKernelCall<P2S16KernelWithCompressedOutput>(u16bits, selectors, u16bytes);
    307307    }
    308308
Note: See TracChangeset for help on using the changeset viewer.