Ignore:
Timestamp:
Jun 14, 2018, 10:41:45 AM (13 months ago)
Author:
cameron
Message:

Little-endian/big-endian bit number options, default to little-endian

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r6069 r6089  
    3333    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
    3434}
    35 //#define LITTLE_ENDIAN_BIT_NUMBERING
    36 
    37 void s2p(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
    38     Value * bit00224466[4];
    39     Value * bit11335577[4];
     35
     36void s2p(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[], cc::BitNumbering basisNumbering) {
     37    // Little-endian bit number is used for variables.
     38    Value * bit66442200[4];
     39    Value * bit77553311[4];
    4040
    4141    for (unsigned i = 0; i < 4; i++) {
    4242        Value * s0 = input[2 * i];
    4343        Value * s1 = input[2 * i + 1];
    44         s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
    45     }
    46     Value * bit00004444[2];
    47     Value * bit22226666[2];
    48     Value * bit11115555[2];
    49     Value * bit33337777[2];
     44        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit77553311[i], bit66442200[i]);
     45    }
     46    Value * bit44440000[2];
     47    Value * bit66662222[2];
     48    Value * bit55551111[2];
     49    Value * bit77773333[2];
    5050    for (unsigned j = 0; j<2; j++) {
    51         s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
    52                  iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
    53         s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
    54                  iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
    55     }
    56 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    57     s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
    58     s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
    59     s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
    60     s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
    61 #else
    62     s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[7], output[3]);
    63     s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[6], output[2]);
    64     s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[5], output[1]);
    65     s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[4], output[0]);
    66 #endif   
     51        s2p_step(iBuilder, bit66442200[2*j], bit66442200[2*j+1],
     52                 iBuilder->simd_himask(4), 2, bit66662222[j], bit44440000[j]);
     53        s2p_step(iBuilder, bit77553311[2*j], bit77553311[2*j+1],
     54                 iBuilder->simd_himask(4), 2, bit77773333[j], bit55551111[j]);
     55    }
     56    if (basisNumbering == cc::BitNumbering::LittleEndian) {
     57        s2p_step(iBuilder, bit44440000[0], bit44440000[1], iBuilder->simd_himask(8), 4, output[4], output[0]);
     58        s2p_step(iBuilder, bit55551111[0], bit55551111[1], iBuilder->simd_himask(8), 4, output[5], output[1]);
     59        s2p_step(iBuilder, bit66662222[0], bit66662222[1], iBuilder->simd_himask(8), 4, output[6], output[2]);
     60        s2p_step(iBuilder, bit77773333[0], bit77773333[1], iBuilder->simd_himask(8), 4, output[7], output[3]);
     61    }
     62    else {
     63        s2p_step(iBuilder, bit44440000[0], bit44440000[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
     64        s2p_step(iBuilder, bit55551111[0], bit55551111[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
     65        s2p_step(iBuilder, bit66662222[0], bit66662222[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
     66        s2p_step(iBuilder, bit77773333[0], bit77773333[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
     67    }
    6768}
    6869
    6970/* Alternative transposition model, but small field width packs are problematic. */
    7071#if 0
    71 void s2p_ideal(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
     72void s2p_ideal(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[], cc::BitNumbering basisNumbering) {
    7273    Value * hi_nybble[4];
    7374    Value * lo_nybble[4];
     
    7879        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
    7980    }
    80     Value * pair01[2];
    81     Value * pair23[2];
    82     Value * pair45[2];
    83     Value * pair67[2];
     81    Value * pair76[2];
     82    Value * pair54[2];
     83    Value * pair32[2];
     84    Value * pair10[2];
    8485    for (unsigned i = 0; i<2; i++) {
    85         pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    86         pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    87         pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    88         pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    89     }
    90     output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
    91     output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
    92     output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
    93     output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
    94     output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
    95     output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
    96     output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
    97     output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
     86        pair76[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     87        pair54[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     88        pair32[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     89        pair10[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     90    }
     91    if (basisNumbering == cc::BitNumbering::LittleEndian) {
     92        output[7] = iBuilder->hsimd_packh(2, pair76[0], pair76[1]);
     93        output[6] = iBuilder->hsimd_packl(2, pair76[0], pair76[1]);
     94        output[5] = iBuilder->hsimd_packh(2, pair54[0], pair54[1]);
     95        output[4] = iBuilder->hsimd_packl(2, pair54[0], pair54[1]);
     96        output[3] = iBuilder->hsimd_packh(2, pair32[0], pair32[1]);
     97        output[2] = iBuilder->hsimd_packl(2, pair32[0], pair32[1]);
     98        output[1] = iBuilder->hsimd_packh(2, pair10[0], pair10[1]);
     99        output[0] = iBuilder->hsimd_packl(2, pair10[0], pair10[1]);
     100    } else {
     101        output[0] = iBuilder->hsimd_packh(2, pair76[0], pair76[1]);
     102        output[1] = iBuilder->hsimd_packl(2, pair76[0], pair76[1]);
     103        output[2] = iBuilder->hsimd_packh(2, pair54[0], pair54[1]);
     104        output[3] = iBuilder->hsimd_packl(2, pair54[0], pair54[1]);
     105        output[4] = iBuilder->hsimd_packh(2, pair32[0], pair32[1]);
     106        output[5] = iBuilder->hsimd_packl(2, pair32[0], pair32[1]);
     107        output[6] = iBuilder->hsimd_packh(2, pair10[0], pair10[1]);
     108        output[7] = iBuilder->hsimd_packl(2, pair10[0], pair10[1]);
     109    }
    98110}
    99111#endif
    100112   
    101 #if 0
    102 void generateS2P_16Kernel(const std::unique_ptr<KernelBuilder> & iBuilder, Kernel * kBuilder) {
    103     kBuilder->addInputStream(16, "unit_pack");
    104     for(unsigned i = 0; i < 16; i++) {
    105             kBuilder->addOutputStream(1);
    106     }
    107     Value * ptr = kBuilder->getInputStream(0);
    108 
    109     Value * lo[8];
    110     Value * hi[8];
    111     for (unsigned i = 0; i < 8; i++) {
    112         Value * s0 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
    113         Value * s1 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
    114         lo[i] = iBuilder->hsimd_packl(16, s0, s1);
    115         hi[i] = iBuilder->hsimd_packh(16, s0, s1);
    116     }
    117 
    118     Value * output[16];
    119     s2p(iBuilder, lo, output);
    120     s2p(iBuilder, hi, output + 8);
    121     for (unsigned j = 0; j < 16; j++) {
    122         iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    123     }
    124 }   
    125 #endif
    126 
    127113void S2PKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, Value * const numOfBlocks) {
    128114    BasicBlock * entry = kb->GetInsertBlock();
     
    148134    }
    149135    Value * basisbits[8];
    150     s2p(kb, bytepack, basisbits);
     136    s2p(kb, bytepack, basisbits, mBasisSetNumbering);
    151137    for (unsigned i = 0; i < 8; ++i) {
    152138        kb->storeOutputStreamBlock("basisBits", kb->getInt32(i), blockOffsetPhi, basisbits[i]);
     
    158144    kb->SetInsertPoint(s2pDone);
    159145}
    160 S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, bool aligned, std::string prefix)
    161 : MultiBlockKernel(aligned ? prefix + "s2p" : prefix + "s2p_unaligned",
     146
     147S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, cc::BitNumbering numbering, bool aligned, std::string prefix)
     148    : MultiBlockKernel(aligned ? prefix + "s2p" + cc::numberingSuffix(numbering): prefix + "s2p_unaligned" + cc::numberingSuffix(numbering),
    162149    {Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()}},
    163150    {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
     151  mBasisSetNumbering(numbering),
    164152  mAligned(aligned) {
    165153    if (!aligned) {
     
    168156}
    169157   
    170 S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> & b)
    171 : MultiBlockKernel("s2p_21",
     158S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> & b, cc::BitNumbering numbering)
     159: MultiBlockKernel("s2p_21" + cc::numberingSuffix(numbering),
    172160                   {Binding{b->getStreamSetTy(1, 32), "codeUnitStream", FixedRate(), Principal()}},
    173                    {Binding{b->getStreamSetTy(21, 1), "basisBits"}}, {}, {}, {}) {
    174 }
    175 
     161                   {Binding{b->getStreamSetTy(21, 1), "basisBits"}}, {}, {}, {}),
     162    mBasisSetNumbering(numbering) {
     163}
    176164
    177165void S2P_21Kernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, Value * const numOfBlocks) {
     
    209197    }
    210198    Value * basisbits[24];
    211     s2p(kb, u32byte0, basisbits);
    212     s2p(kb, u32byte1, &basisbits[8]);
    213     s2p(kb, u32byte2, &basisbits[16]);
     199    s2p(kb, u32byte0, basisbits, cc::BitNumbering::LittleEndian);
     200    s2p(kb, u32byte1, &basisbits[8], cc::BitNumbering::LittleEndian);
     201    s2p(kb, u32byte2, &basisbits[16], cc::BitNumbering::LittleEndian);
    214202    for (unsigned i = 0; i < 21; ++i) {
    215 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    216         const unsigned idx = (i/3) * 3 + 7 - (i & 7);
    217         kb->storeOutputStreamBlock("basisBits", kb->getInt32(idx), blockOffsetPhi, basisbits[i]);
    218 #else
    219         kb->storeOutputStreamBlock("basisBits", kb->getInt32(i), blockOffsetPhi, basisbits[i]);
    220 #endif
     203        const unsigned bitIdx = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? i : 21 - i;
     204        kb->storeOutputStreamBlock("basisBits", kb->getInt32(i), blockOffsetPhi, basisbits[bitIdx]);
    221205    }
    222206    Value * nextBlk = kb->CreateAdd(blockOffsetPhi, kb->getSize(1));
     
    245229    }
    246230    for (unsigned bit = 0; bit < mCodeUnitWidth; bit++) {
    247 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    248         pb->createAssign(pb->createExtract(getOutputStreamVar("basisBits"), pb->getInteger(bit)), streamSet[steps][mCodeUnitWidth-1-bit]);
    249 #else
    250         pb->createAssign(pb->createExtract(getOutputStreamVar("basisBits"), pb->getInteger(bit)), streamSet[steps][bit]);
    251 #endif
    252     }
    253 }
    254 
    255 S2P_PabloKernel::S2P_PabloKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth)
    256 : PabloKernel(b, "s2p_pablo" + std::to_string(codeUnitWidth),
     231        const unsigned bitIndex = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? bit : mCodeUnitWidth-1-bit;
     232        pb->createAssign(pb->createExtract(getOutputStreamVar("basisBits"), pb->getInteger(bitIndex)), streamSet[steps][bit]);
     233    }
     234}
     235
     236S2P_PabloKernel::S2P_PabloKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth, cc::BitNumbering numbering)
     237: PabloKernel(b, "s2p_pablo" + std::to_string(codeUnitWidth) + cc::numberingSuffix(numbering),
    257238    {Binding{b->getStreamSetTy(1, codeUnitWidth), "codeUnitStream"}},
    258239    {Binding{b->getStreamSetTy(codeUnitWidth, 1), "basisBits"}}),
    259   mCodeUnitWidth(codeUnitWidth) {
    260 }
    261 
    262 
    263 S2PByPextKernel::S2PByPextKernel(const std::unique_ptr<kernel::KernelBuilder> &b, std::string prefix)
    264         : BlockOrientedKernel(prefix + "s2pByPext",
     240  mCodeUnitWidth(codeUnitWidth),
     241  mBasisSetNumbering(numbering) {
     242}
     243
     244
     245S2PByPextKernel::S2PByPextKernel(const std::unique_ptr<kernel::KernelBuilder> &b, cc::BitNumbering numbering, std::string prefix)
     246        : BlockOrientedKernel(prefix + "s2pByPext" + cc::numberingSuffix(numbering),
    265247                           {Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()}},
    266                            {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {})
    267 {
    268 
     248                           {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
     249    mBasisSetNumbering(numbering) {
    269250}
    270251
     
    283264        for (size_t iDataIndex = 0; iDataIndex < 8; iDataIndex++) {
    284265            Value* inputData = b->CreateLoad(b->CreateGEP(inputBasePtr, b->getSize(iDataIndex + iBlockIndex * 8)));
    285             for (int iStreamIndex = 0; iStreamIndex < 8; iStreamIndex++) {
    286                 Value* targetMask = b->getInt64(base_mask << iStreamIndex);
     266            for (unsigned iStreamIndex = 0; iStreamIndex < 8; iStreamIndex++) {
     267                const unsigned bitIndex = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? iStreamIndex : 7 - iStreamIndex;
     268                Value* targetMask = b->getInt64(base_mask << bitIndex);
    287269                Value * const outputValue = b->CreateCall(pext, {inputData, targetMask});
    288270                tempValues[iStreamIndex] = b->CreateOr(tempValues[iStreamIndex], b->CreateShl(outputValue, b->getInt64(iDataIndex * 8)));
     
    290272        }
    291273        for (int iStreamIndex = 0; iStreamIndex < 8; iStreamIndex++) {
    292             b->CreateStore(tempValues[iStreamIndex], b->CreateGEP(outputPtrs[7 - iStreamIndex], b->getSize(iBlockIndex)));
    293         }
    294     }
    295 }
    296 }
     274            b->CreateStore(tempValues[iStreamIndex], b->CreateGEP(outputPtrs[iStreamIndex], b->getSize(iBlockIndex)));
     275        }
     276    }
     277}
     278}
Note: See TracChangeset for help on using the changeset viewer.