Ignore:
Timestamp:
Dec 3, 2017, 12:40:40 PM (20 months ago)
Author:
nmedfort
Message:

Bug fixes and simplified MultiBlockKernel? logic

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp

    r5706 r5755  
    1818
    1919#define printRTDebugMsg(MSG) \
    20     if (DEBUG_RT_PRINT) iBuilder->CallPrintMsgToStderr(MSG)
     20    if (DEBUG_RT_PRINT) b->CallPrintMsgToStderr(MSG)
    2121
    2222#define printRTDebugInt(NAME, X) \
    23     if (DEBUG_RT_PRINT) iBuilder->CallPrintIntToStderr(NAME, X)
     23    if (DEBUG_RT_PRINT) b->CallPrintIntToStderr(NAME, X)
    2424
    2525#define printGlobalPos() \
    26     printRTDebugInt("GlobalPos", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)))
     26    printRTDebugInt("GlobalPos", b->CreateAdd(blockStartPos, b->CreateLoad(sOffset)))
    2727
    2828namespace {
    2929
    30 Value * generateBitswap(const std::unique_ptr<KernelBuilder> & iBuilder, Value * v) {
    31     Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
     30Value * generateBitswap(const std::unique_ptr<KernelBuilder> & b, Value * v) {
     31    Value * bswapFunc = Intrinsic::getDeclaration(b->getModule(),
    3232            Intrinsic::bswap, v->getType());
    33     return iBuilder->CreateCall(bswapFunc, {v});
    34 }
    35 
    36 Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
    37     return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
    38 }
    39 
    40 Value * createStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
    41     Value * var = iBuilder->CreateAlloca(type, nullptr, name);
     33    return b->CreateCall(bswapFunc, {v});
     34}
     35
     36Value * createStackVar(const std::unique_ptr<KernelBuilder> & b, Type * type, StringRef name, Value * initializer = nullptr) {
     37    Value * var = b->CreateAlloca(type, nullptr, name);
    4238    if (initializer) {
    43         iBuilder->CreateStore(initializer, var);
     39        b->CreateStore(initializer, var);
    4440    } else {
    45         iBuilder->CreateStore(ConstantInt::get(type, 0), var);
     41        b->CreateStore(ConstantInt::get(type, 0), var);
    4642    }
    4743    return var;
    4844}
    4945
    50 void incStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Value * svar, Value * increment = nullptr) {
    51     Value * value = iBuilder->CreateLoad(svar);
     46void incStackVar(const std::unique_ptr<KernelBuilder> & b, Value * svar, Value * increment = nullptr) {
     47    Value * value = b->CreateLoad(svar);
    5248    if (increment) {
    53         value = iBuilder->CreateAdd(value, increment);
     49        value = b->CreateAdd(value, increment);
    5450    } else {
    55         value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
     51        value = b->CreateAdd(value, ConstantInt::get(value->getType(), 1));
    5652    }
    57     iBuilder->CreateStore(value, svar);
    58 }
    59 
    60 Value * getOutputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
    61     return iBuilder->CreateGEP(
    62             iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
     53    b->CreateStore(value, svar);
     54}
     55
     56Value * getOutputPtr(const std::unique_ptr<KernelBuilder> & b, Value * blockStartPtr, Value * offset) {
     57    return b->CreateGEP(
     58            b->CreatePointerCast(blockStartPtr, b->getInt32Ty()->getPointerTo()),
    6359            offset
    6460            );
     
    7066 * Get the offset within the current word.
    7167 */
    72 Value * LZ4IndexDecoderKernel::getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    73     Value * offset = iBuilder->CreateLoad(sOffset);
     68Value * LZ4IndexDecoderKernel::getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & b) {
     69    Value * offset = b->CreateLoad(sOffset);
    7470    IntegerType * type = cast<IntegerType>(offset->getType());
    7571    Constant * mask = ConstantInt::get(type, wordWidth - 1);
    76     return iBuilder->CreateAnd(offset, mask);
     72    return b->CreateAnd(offset, mask);
    7773}
    7874
     
    8076 * Get the offset of the start of the current word.
    8177 */
    82 Value * LZ4IndexDecoderKernel::getWordStartOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
    83     Value * offset = iBuilder->CreateLoad(sOffset);
     78Value * LZ4IndexDecoderKernel::getWordStartOffset(const std::unique_ptr<KernelBuilder> & b) {
     79    Value * offset = b->CreateLoad(sOffset);
    8480    IntegerType * type = cast<IntegerType>(offset->getType());
    8581    Constant * mask = ConstantExpr::getNeg(ConstantInt::get(type, wordWidth));
    86     return iBuilder->CreateAnd(offset, mask);
     82    return b->CreateAnd(offset, mask);
    8783}
    8884
     
    9187 * If offset is not provided, load the current byte by default.
    9288 */
    93 Value * LZ4IndexDecoderKernel::loadRawByte(const std::unique_ptr<KernelBuilder> & iBuilder, Value * offset) {
    94     Value * blockStartPtr = iBuilder->CreatePointerCast(
    95             iBuilder->getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
    96             iBuilder->getInt8PtrTy()
     89Value * LZ4IndexDecoderKernel::loadRawByte(const std::unique_ptr<KernelBuilder> & b, Value * offset) {
     90    Value * blockStartPtr = b->CreatePointerCast(
     91            b->getInputStreamBlockPtr("byteStream", b->getInt32(0)),
     92            b->getInt8PtrTy()
    9793            );
    9894    if (offset == nullptr) {
    99         offset = iBuilder->CreateLoad(sOffset);
     95        offset = b->CreateLoad(sOffset);
    10096    }
    101     Value * ptr = iBuilder->CreateGEP(blockStartPtr, offset);
    102     return iBuilder->CreateLoad(ptr);
     97    Value * ptr = b->CreateGEP(blockStartPtr, offset);
     98    return b->CreateLoad(ptr);
    10399}
    104100
     
    110106 * cleared  = ....111
    111107 */
    112 void LZ4IndexDecoderKernel::setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
     108void LZ4IndexDecoderKernel::setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & b) {
    113109    // Little-endian, offset counts from LSB
    114110    // extender = extender ^ ~((1 << offset) -1)
    115     Value * extender = iBuilder->CreateLoad(sExtender);
    116     Value * wordOffset = iBuilder->CreateZExt(
    117             getWordOffset(iBuilder),
    118             iBuilder->getSizeTy()
    119             );
    120     Value * one = iBuilder->getSize(1);
    121     Value * mask = iBuilder->CreateSub(
    122             iBuilder->CreateShl(one, wordOffset),
     111    Value * extender = b->CreateLoad(sExtender);
     112    Value * wordOffset = b->CreateZExt(
     113            getWordOffset(b),
     114            b->getSizeTy()
     115            );
     116    Value * one = b->getSize(1);
     117    Value * mask = b->CreateSub(
     118            b->CreateShl(one, wordOffset),
    123119            one);
    124     extender = iBuilder->CreateOr(extender, mask);
    125     iBuilder->CreateStore(extender, sExtender);
     120    extender = b->CreateOr(extender, mask);
     121    b->CreateStore(extender, sExtender);
    126122}
    127123
     
    131127 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
    132128 */
    133 void LZ4IndexDecoderKernel::loadCurrentExtender(const std::unique_ptr<KernelBuilder> & iBuilder) {
    134     Value * offset = iBuilder->CreateLoad(sOffset);
     129void LZ4IndexDecoderKernel::loadCurrentExtender(const std::unique_ptr<KernelBuilder> & b) {
     130    Value * offset = b->CreateLoad(sOffset);
    135131    IntegerType * type = cast<IntegerType>(offset->getType());
    136132    ConstantInt * shift = ConstantInt::get(type, std::log2(wordWidth));
    137     Value * shiftedOffset = iBuilder->CreateLShr(offset, shift);
    138     Value * extender = iBuilder->CreateExtractElement(extenders, shiftedOffset);
    139     iBuilder->CreateStore(extender, sExtender);
    140 }
    141 
    142 
    143 void LZ4IndexDecoderKernel::generateProduceOutput(const std::unique_ptr<KernelBuilder> &iBuilder) {
    144     Value * producedItem = iBuilder->getProducedItemCount("literalIndexes");
     133    Value * shiftedOffset = b->CreateLShr(offset, shift);
     134    Value * extender = b->CreateExtractElement(extenders, shiftedOffset);
     135    b->CreateStore(extender, sExtender);
     136}
     137
     138
     139void LZ4IndexDecoderKernel::generateProduceOutput(const std::unique_ptr<KernelBuilder> &b) {
     140    Value * producedItem = b->getProducedItemCount("literalIndexes");
    145141
    146142//#ifndef NDEBUG
    147 //    iBuilder->CallPrintInt("ProducedItem", producedItem);
     143//    b->CallPrintInt("ProducedItem", producedItem);
    148144//    // LiteralStart is adjusted to be relative to the block start, so that
    149145//    // the output can be compared against that of the reference implementation.
    150 //    Value * literalStart = iBuilder->CreateSub(iBuilder->getScalarField("LiteralStart"), iBuilder->getScalarField("LZ4BlockStart"));
    151 //    iBuilder->CallPrintInt("LiteralStart", literalStart);
    152 //    iBuilder->CallPrintInt("LiteralLength", iBuilder->getScalarField("LiteralLength"));
    153 //    iBuilder->CallPrintInt("MatchOffset", iBuilder->getScalarField("MatchOffset"));
    154 //    iBuilder->CallPrintInt("MatchLength", iBuilder->getScalarField("MatchLength"));
     146//    Value * literalStart = b->CreateSub(b->getScalarField("LiteralStart"), b->getScalarField("LZ4BlockStart"));
     147//    b->CallPrintInt("LiteralStart", literalStart);
     148//    b->CallPrintInt("LiteralLength", b->getScalarField("LiteralLength"));
     149//    b->CallPrintInt("MatchOffset", b->getScalarField("MatchOffset"));
     150//    b->CallPrintInt("MatchLength", b->getScalarField("MatchLength"));
    155151//#endif
    156152    printRTDebugMsg("--------------");
    157153
    158     Value * outputOffset = iBuilder->CreateAnd(
    159             iBuilder->CreateTrunc(producedItem, iBuilder->getInt32Ty()),
    160             iBuilder->getInt32(iBuilder->getBitBlockWidth() - 1)
    161             );  // producedItem % blockWidth (as blockWidth is always a power of 2)
    162     Value * literalStartPtr = getOutputPtr(iBuilder,
    163             iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
    164     Value * literalLengthPtr = getOutputPtr(iBuilder,
    165             iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
    166     Value * matchOffsetPtr = getOutputPtr(iBuilder,
    167             iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
    168     Value * matchLengthPtr = getOutputPtr(iBuilder,
    169             iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
    170     iBuilder->CreateStore(iBuilder->getScalarField("LiteralStart"), literalStartPtr);
    171     iBuilder->CreateStore(iBuilder->getScalarField("LiteralLength"), literalLengthPtr);
    172     iBuilder->CreateStore(iBuilder->getScalarField("MatchOffset"), matchOffsetPtr);
    173     iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), matchLengthPtr);
    174     iBuilder->setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
     154    Value * outputOffset = b->CreateAnd(b->CreateTrunc(producedItem, b->getInt32Ty()), b->getInt32(b->getBitBlockWidth() - 1));  // producedItem % blockWidth (as blockWidth is always a power of 2)
     155    Value * baseLiteralStartPtr = b->getOutputStreamBlockPtr("literalIndexes", b->getInt32(0));
     156
     157    Value * literalStartPtr = getOutputPtr(b, baseLiteralStartPtr, outputOffset);
     158    Value * literalLengthPtr = getOutputPtr(b,
     159            b->getOutputStreamBlockPtr("literalIndexes", b->getInt32(1)), outputOffset);
     160    Value * matchOffsetPtr = getOutputPtr(b,
     161            b->getOutputStreamBlockPtr("matchIndexes", b->getInt32(0)), outputOffset);
     162    Value * matchLengthPtr = getOutputPtr(b,
     163            b->getOutputStreamBlockPtr("matchIndexes", b->getInt32(1)), outputOffset);
     164
     165    b->CreateStore(b->getScalarField("LiteralStart"), literalStartPtr);
     166    b->CreateStore(b->getScalarField("LiteralLength"), literalLengthPtr);
     167    b->CreateStore(b->getScalarField("MatchOffset"), matchOffsetPtr);
     168    b->CreateStore(b->getScalarField("MatchLength"), matchLengthPtr);
     169    b->setProducedItemCount("literalIndexes", b->CreateAdd(producedItem, b->getSize(1)));
    175170    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
    176171}
    177172
    178173
    179 void LZ4IndexDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    180     BasicBlock * entry_block = iBuilder->GetInsertBlock();
    181     BasicBlock * exit_block = iBuilder->CreateBasicBlock("exit");
     174void LZ4IndexDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
     175    BasicBlock * entry_block = b->GetInsertBlock();
     176    BasicBlock * exit_block = b->CreateBasicBlock("exit");
    182177
    183178    // %entry
    184     iBuilder->SetInsertPoint(entry_block);
     179    b->SetInsertPoint(entry_block);
    185180    printRTDebugMsg("entry");
    186181    // Global positions in the byte stream.
    187     Value * blockNo = iBuilder->getScalarField("BlockNo");
    188     blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
    189     extenders = iBuilder->CreateBitCast(
    190             iBuilder->loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
    191             VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / iBuilder->getSizeTy()->getBitWidth()),
     182    Value * blockNo = b->getScalarField("BlockNo");
     183    blockStartPos = b->CreateMul(blockNo, b->getInt32(b->getBitBlockWidth()), "blockStartPos");
     184    extenders = b->CreateBitCast(
     185            b->loadInputStreamBlock("extenders", b->getInt32(0)),
     186            VectorType::get(b->getSizeTy(), b->getBitBlockWidth() / b->getSizeTy()->getBitWidth()),
    192187            "extenders");
    193188    // Create a series of stack variables which will be promoted by mem2reg.
    194     sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
     189    sOffset = createStackVar(b, b->getInt32Ty(), "offset");
    195190    // tempLength has different meanings in different states.
    196     sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", iBuilder->getScalarField("TempLength"));
    197     sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", iBuilder->getScalarField("TempCount"));
    198     sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", iBuilder->getScalarField("State"));
    199     sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
    200             iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
    201 
    202     BasicBlock * skippingBytes = iBuilder->CreateBasicBlock("skipping_bytes");
    203     BasicBlock * dispatch = iBuilder->CreateBasicBlock("dispatch");
    204 
    205     iBuilder->CreateCondBr(
    206             iBuilder->CreateICmpUGT(iBuilder->getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
     191    sTempLength = createStackVar(b, b->getInt32Ty(), "tempLength", b->getScalarField("TempLength"));
     192    sTempCount = createStackVar(b, b->getInt32Ty(), "tempCount", b->getScalarField("TempCount"));
     193    sState = createStackVar(b, b->getInt8Ty(), "state", b->getScalarField("State"));
     194    sExtender = createStackVar(b, b->getSizeTy(), "extender",
     195            b->CreateExtractElement(extenders, b->getInt32(0)));
     196
     197    BasicBlock * skippingBytes = b->CreateBasicBlock("skipping_bytes");
     198    BasicBlock * dispatch = b->CreateBasicBlock("dispatch");
     199
     200    b->CreateCondBr(
     201            b->CreateICmpUGT(b->getScalarField("BytesToSkip"), b->getInt32(0)),
    207202            skippingBytes, dispatch
    208203            );
    209204
    210205    // %skipping_bytes
    211     generateSkippingBytes(iBuilder, skippingBytes, exit_block);
     206    generateSkippingBytes(b, skippingBytes, exit_block);
    212207    // Insert point is at the end of skippingBytes.
    213     iBuilder->CreateBr(dispatch);
     208    b->CreateBr(dispatch);
    214209
    215210    // %dispatch
     
    217212
    218213    // %at_block_checksum
    219     BasicBlock * atBlockChecksum = iBuilder->CreateBasicBlock("at_block_checksum");
    220     generateAtBlockChecksum(iBuilder, atBlockChecksum, skippingBytes);
     214    BasicBlock * atBlockChecksum = b->CreateBasicBlock("at_block_checksum");
     215    generateAtBlockChecksum(b, atBlockChecksum, skippingBytes);
    221216 
    222217    // %at_block_size
    223     BasicBlock * atBlockSize = iBuilder->CreateBasicBlock("at_block_size");
    224     generateAtBlockSize(iBuilder, atBlockSize, skippingBytes, exit_block);
     218    BasicBlock * atBlockSize = b->CreateBasicBlock("at_block_size");
     219    generateAtBlockSize(b, atBlockSize, skippingBytes, exit_block);
    225220
    226221    // %at_token
    227     BasicBlock * atToken = iBuilder->CreateBasicBlock("at_token");
    228     generateAtToken(iBuilder, atToken, exit_block);
     222    BasicBlock * atToken = b->CreateBasicBlock("at_token");
     223    generateAtToken(b, atToken, exit_block);
    229224
    230225    // %extending_literal_length
    231     BasicBlock * extendingLiteralLen = iBuilder->CreateBasicBlock("extending_literal_length");
    232     generateExtendingLiteralLen(iBuilder, extendingLiteralLen, exit_block);
     226    BasicBlock * extendingLiteralLen = b->CreateBasicBlock("extending_literal_length");
     227    generateExtendingLiteralLen(b, extendingLiteralLen, exit_block);
    233228
    234229    // %at_literals
    235     BasicBlock * atLiterals = iBuilder->CreateBasicBlock("at_literals");
    236     generateAtLiterals(iBuilder, atLiterals);
    237     iBuilder->CreateBr(skippingBytes);
     230    BasicBlock * atLiterals = b->CreateBasicBlock("at_literals");
     231    generateAtLiterals(b, atLiterals);
     232    b->CreateBr(skippingBytes);
    238233
    239234    // %at_first_offset
     
    241236    // If the whole LZ4 block is done, process the (optional) checksum.
    242237    // Otherwise, go around to process the next sequence.
    243     BasicBlock * atOffset1 = iBuilder->CreateBasicBlock("at_first_offset");
    244     iBuilder->SetInsertPoint(atOffset1);
    245     Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
    246     BasicBlock * blockEnd_else = iBuilder->CreateBasicBlock("block_end_else");
     238    BasicBlock * atOffset1 = b->CreateBasicBlock("at_first_offset");
     239    b->SetInsertPoint(atOffset1);
     240    Value * nowGlobalPos = b->CreateAdd(blockStartPos, b->CreateLoad(sOffset));
     241    BasicBlock * blockEnd_else = b->CreateBasicBlock("block_end_else");
    247242    // Conditional branch inserted at the end of the last block.
    248     iBuilder->CreateUnlikelyCondBr(
    249             iBuilder->CreateICmpEQ(nowGlobalPos, iBuilder->getScalarField("LZ4BlockEnd")),
     243    b->CreateUnlikelyCondBr(
     244            b->CreateICmpEQ(nowGlobalPos, b->getScalarField("LZ4BlockEnd")),
    250245            atBlockChecksum, blockEnd_else
    251246            );
    252     generateAtFirstOffset(iBuilder, blockEnd_else, exit_block);
     247    generateAtFirstOffset(b, blockEnd_else, exit_block);
    253248
    254249    // %at_second_offset
    255     BasicBlock * atOffset2 = iBuilder->CreateBasicBlock("at_second_offset");
    256     generateAtSecondOffset(iBuilder, atOffset2, exit_block);
     250    BasicBlock * atOffset2 = b->CreateBasicBlock("at_second_offset");
     251    generateAtSecondOffset(b, atOffset2, exit_block);
    257252
    258253    // %extending_match_length
    259     BasicBlock * extendingMatchLen = iBuilder->CreateBasicBlock("extending_match_length");
    260     generateExtendingMatchLen(iBuilder, extendingMatchLen, exit_block);
    261     iBuilder->CreateBr(atToken);
     254    BasicBlock * extendingMatchLen = b->CreateBasicBlock("extending_match_length");
     255    generateExtendingMatchLen(b, extendingMatchLen, exit_block);
     256    b->CreateBr(atToken);
    262257
    263258    // Indirect branching.
    264     iBuilder->SetInsertPoint(dispatch);
     259    b->SetInsertPoint(dispatch);
    265260    printRTDebugMsg("dispatch");
    266261    // The order must comply with enum State.
     
    269264             BlockAddress::get(atOffset1), BlockAddress::get(atOffset2), BlockAddress::get(extendingMatchLen), BlockAddress::get(atBlockChecksum)}
    270265            );
    271     Value * target = iBuilder->CreateExtractElement(labels, iBuilder->CreateLoad(sState));
    272     IndirectBrInst * indirectBr = iBuilder->CreateIndirectBr(target);
     266    Value * target = b->CreateExtractElement(labels, b->CreateLoad(sState));
     267    IndirectBrInst * indirectBr = b->CreateIndirectBr(target);
    273268    indirectBr->addDestination(atBlockSize);
    274269    indirectBr->addDestination(atToken);
     
    281276
    282277    // %exit
    283     iBuilder->SetInsertPoint(exit_block);
     278    b->SetInsertPoint(exit_block);
    284279    printRTDebugMsg("exit");
    285     iBuilder->setScalarField("State", iBuilder->CreateLoad(sState));
    286     iBuilder->setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
    287     iBuilder->setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
    288     iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
     280    b->setScalarField("State", b->CreateLoad(sState));
     281    b->setScalarField("TempLength", b->CreateLoad(sTempLength));
     282    b->setScalarField("TempCount", b->CreateLoad(sTempCount));
     283    b->setScalarField("BlockNo", b->CreateAdd(blockNo, b->getInt32(1)));
    289284    // When the kernel builder uses indirectbr, doBlock is not a separate function.
    290285    // Hence, we branch to a new basic block and fall through instead of returning.
    291     BasicBlock * end_block = iBuilder->CreateBasicBlock("end_of_block");
    292     iBuilder->CreateBr(end_block);
    293     iBuilder->SetInsertPoint(end_block);
    294 }
    295 
    296 
    297 void LZ4IndexDecoderKernel::generateBoundaryDetection(const std::unique_ptr<KernelBuilder> & iBuilder, State state, BasicBlock * exit_block, bool updateExtenderWord) {
     286    BasicBlock * end_block = b->CreateBasicBlock("end_of_block");
     287    b->CreateBr(end_block);
     288    b->SetInsertPoint(end_block);
     289}
     290
     291
     292void LZ4IndexDecoderKernel::generateBoundaryDetection(const std::unique_ptr<KernelBuilder> & b, State state, BasicBlock * exit_block, bool updateExtenderWord) {
    298293    if (updateExtenderWord) {
    299         BasicBlock * wordBoundary_then = iBuilder->CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
    300         BasicBlock * blockBoundary_else = iBuilder->CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
    301         BasicBlock * wordBoundary_cont = iBuilder->CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
    302         iBuilder->CreateUnlikelyCondBr(
    303                 iBuilder->CreateICmpEQ(getWordOffset(iBuilder), iBuilder->getInt32(0)),
     294        BasicBlock * wordBoundary_then = b->CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
     295        BasicBlock * blockBoundary_else = b->CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
     296        BasicBlock * wordBoundary_cont = b->CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
     297        b->CreateUnlikelyCondBr(
     298                b->CreateICmpEQ(getWordOffset(b), b->getInt32(0)),
    304299                wordBoundary_then, wordBoundary_cont
    305300                );
    306301
    307         iBuilder->SetInsertPoint(wordBoundary_then);
    308         iBuilder->CreateUnlikelyCondBr(
    309                 iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
     302        b->SetInsertPoint(wordBoundary_then);
     303        b->CreateUnlikelyCondBr(
     304                b->CreateICmpEQ(b->CreateLoad(sOffset), b->getInt32(b->getBitBlockWidth())),
    310305                exit_block, blockBoundary_else
    311306                );
    312307
    313308        // Reaching word boundary but not block boundary.  Update the extender word as requested.
    314         iBuilder->SetInsertPoint(blockBoundary_else);
    315         loadCurrentExtender(iBuilder);
    316         iBuilder->CreateBr(wordBoundary_cont);
     309        b->SetInsertPoint(blockBoundary_else);
     310        loadCurrentExtender(b);
     311        b->CreateBr(wordBoundary_cont);
    317312
    318313        // Leave the insert point at the end and return.
    319         iBuilder->SetInsertPoint(wordBoundary_cont);
     314        b->SetInsertPoint(wordBoundary_cont);
    320315    } else {
    321         BasicBlock * blockBoundary_cont = iBuilder->CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
    322         iBuilder->CreateUnlikelyCondBr(
    323                 iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
     316        BasicBlock * blockBoundary_cont = b->CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
     317        b->CreateUnlikelyCondBr(
     318                b->CreateICmpEQ(b->CreateLoad(sOffset), b->getInt32(b->getBitBlockWidth())),
    324319                exit_block, blockBoundary_cont
    325320                );
    326321        // Leave the insert point at the end and return.
    327         iBuilder->SetInsertPoint(blockBoundary_cont);
     322        b->SetInsertPoint(blockBoundary_cont);
    328323    }
    329324}
    330325
    331326
    332 void LZ4IndexDecoderKernel::generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    333     iBuilder->SetInsertPoint(bb);
     327void LZ4IndexDecoderKernel::generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * bb, BasicBlock * exit_block) {
     328    b->SetInsertPoint(bb);
    334329    printRTDebugMsg("skipping bytes");
    335330
    336     Value * remainingBytesInBlock = iBuilder->CreateSub(
    337             iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
    338             );
    339     Value * remainingBytesToSkip = iBuilder->getScalarField("BytesToSkip");
    340     Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
    341     remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
    342     incStackVar(iBuilder, sOffset, advanceDist);
    343     iBuilder->setScalarField("BytesToSkip", remainingBytesToSkip);
    344 
    345     generateBoundaryDetection(iBuilder, State::SKIPPING_BYTES, exit_block);
     331    Value * remainingBytesInBlock = b->CreateSub(
     332            b->getInt32(b->getBitBlockWidth()), b->CreateLoad(sOffset)
     333            );
     334    Value * remainingBytesToSkip = b->getScalarField("BytesToSkip");
     335    Value * advanceDist = b->CreateUMin(remainingBytesInBlock, remainingBytesToSkip);
     336    remainingBytesToSkip = b->CreateSub(remainingBytesToSkip, advanceDist);
     337    incStackVar(b, sOffset, advanceDist);
     338    b->setScalarField("BytesToSkip", remainingBytesToSkip);
     339
     340    generateBoundaryDetection(b, State::SKIPPING_BYTES, exit_block);
    346341    // Falls through.
    347342}
    348343
    349344
    350 void LZ4IndexDecoderKernel::generateAtBlockSize(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
    351     iBuilder->CreateBr(bb);
    352     iBuilder->SetInsertPoint(bb);
     345void LZ4IndexDecoderKernel::generateAtBlockSize(const std::unique_ptr<KernelBuilder> &b, BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
     346    b->CreateBr(bb);
     347    b->SetInsertPoint(bb);
    353348    printRTDebugMsg("scanning block size");
    354349    printGlobalPos();
     
    360355
    361356    // A do-while loop.
    362     BasicBlock * loopBody = iBuilder->CreateBasicBlock("blocksize_loop_body");
    363     BasicBlock * loopExit = iBuilder->CreateBasicBlock("blocksize_loop_exit");
    364     iBuilder->CreateBr(loopBody);
    365 
    366     iBuilder->SetInsertPoint(loopBody);
    367     Value * byte = loadRawByte(iBuilder);
    368     Value * newTempLength = iBuilder->CreateAdd(
    369             iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
    370             iBuilder->CreateZExt(byte, iBuilder->getInt32Ty())
    371             );
    372     iBuilder->CreateStore(newTempLength, sTempLength);
    373     incStackVar(iBuilder, sTempCount);
    374     incStackVar(iBuilder, sOffset);
     357    BasicBlock * loopBody = b->CreateBasicBlock("blocksize_loop_body");
     358    BasicBlock * loopExit = b->CreateBasicBlock("blocksize_loop_exit");
     359    b->CreateBr(loopBody);
     360
     361    b->SetInsertPoint(loopBody);
     362    Value * byte = loadRawByte(b);
     363    Value * newTempLength = b->CreateAdd(
     364            b->CreateShl(b->CreateLoad(sTempLength), b->getInt32(8)),
     365            b->CreateZExt(byte, b->getInt32Ty())
     366            );
     367    b->CreateStore(newTempLength, sTempLength);
     368    incStackVar(b, sTempCount);
     369    incStackVar(b, sOffset);
    375370    // Stop when we read all four bytes or reach the end of the block.
    376     iBuilder->CreateCondBr(
    377             iBuilder->CreateOr(
    378                 iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
    379                 iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth()))
     371    b->CreateCondBr(
     372            b->CreateOr(
     373                b->CreateICmpEQ(b->CreateLoad(sTempCount), b->getInt32(4)),
     374                b->CreateICmpEQ(b->CreateLoad(sOffset), b->getInt32(b->getBitBlockWidth()))
    380375                ),
    381376            loopExit, loopBody
    382377            );
    383378
    384     iBuilder->SetInsertPoint(loopExit);
    385     BasicBlock * blockSizeCompleted_then = iBuilder->CreateBasicBlock("blocksize_completed_then");
    386     BasicBlock * blockSizeCompleted_cont = iBuilder->CreateBasicBlock("blocksize_completed_cont");
    387     iBuilder->CreateLikelyCondBr(
    388             iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
     379    b->SetInsertPoint(loopExit);
     380    BasicBlock * blockSizeCompleted_then = b->CreateBasicBlock("blocksize_completed_then");
     381    BasicBlock * blockSizeCompleted_cont = b->CreateBasicBlock("blocksize_completed_cont");
     382    b->CreateLikelyCondBr(
     383            b->CreateICmpEQ(b->CreateLoad(sTempCount), b->getInt32(4)),
    389384            blockSizeCompleted_then, blockSizeCompleted_cont
    390385            );
    391386
    392387    // All four bytes of the block size are read in.
    393     iBuilder->SetInsertPoint(blockSizeCompleted_then);
     388    b->SetInsertPoint(blockSizeCompleted_then);
    394389    // Remember to swap the block size back to little-endian.
    395     Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
    396     Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
    397     iBuilder->setScalarField("LZ4BlockStart", currentPos);
    398     iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
     390    Value * blockSize = generateBitswap(b, b->CreateLoad(sTempLength));
     391    Value * currentPos = b->CreateAdd(blockStartPos, b->CreateLoad(sOffset));
     392    b->setScalarField("LZ4BlockStart", currentPos);
     393    b->setScalarField("LZ4BlockEnd", b->CreateAdd(currentPos, blockSize));
    399394    printRTDebugInt("blockSize", blockSize);
    400395
    401     BasicBlock * uncompressedBlock_then = iBuilder->CreateBasicBlock("uncompressed_block_then");
    402     BasicBlock * uncompressedBlock_else = iBuilder->CreateBasicBlock("uncompressed_block_cont");
    403     iBuilder->CreateUnlikelyCondBr(
    404             iBuilder->CreateTrunc(
    405                 iBuilder->CreateLShr(blockSize, iBuilder->getInt32(31)),
    406                 iBuilder->getInt1Ty()
     396    BasicBlock * uncompressedBlock_then = b->CreateBasicBlock("uncompressed_block_then");
     397    BasicBlock * uncompressedBlock_else = b->CreateBasicBlock("uncompressed_block_cont");
     398    b->CreateUnlikelyCondBr(
     399            b->CreateTrunc(
     400                b->CreateLShr(blockSize, b->getInt32(31)),
     401                b->getInt1Ty()
    407402                ),
    408403            uncompressedBlock_then,
     
    410405            );
    411406
    412     iBuilder->SetInsertPoint(uncompressedBlock_then);
    413     Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
    414     iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
    415     iBuilder->setScalarField("BytesToSkip", realBlockSize);
    416     iBuilder->setScalarField("LiteralStart", currentPos);
    417     iBuilder->setScalarField("LiteralLength", realBlockSize);
     407    b->SetInsertPoint(uncompressedBlock_then);
     408    Value * realBlockSize = b->CreateXor(blockSize, b->getInt32(1L << 31));
     409    b->setScalarField("LZ4BlockEnd", b->CreateAdd(currentPos, realBlockSize));
     410    b->setScalarField("BytesToSkip", realBlockSize);
     411    b->setScalarField("LiteralStart", currentPos);
     412    b->setScalarField("LiteralLength", realBlockSize);
    418413    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
    419414    // because %atBlockChecksum will do so as the last sequence.
    420     iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_CHECKSUM), sState);
    421     iBuilder->CreateBr(skippingBytes);
    422 
    423     iBuilder->SetInsertPoint(uncompressedBlock_else);
     415    b->CreateStore(b->getInt8(State::AT_BLOCK_CHECKSUM), sState);
     416    b->CreateBr(skippingBytes);
     417
     418    b->SetInsertPoint(uncompressedBlock_else);
    424419    // Reset these temporary values for later use.
    425     iBuilder->CreateStore(iBuilder->getInt32(0), sTempLength);
    426     iBuilder->CreateStore(iBuilder->getInt32(0), sTempCount);
    427     iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
     420    b->CreateStore(b->getInt32(0), sTempLength);
     421    b->CreateStore(b->getInt32(0), sTempCount);
     422    b->CreateStore(b->getInt8(State::AT_TOKEN), sState);
    428423    // A block size of 0 is the end mark of the frame. Exit.
    429     iBuilder->CreateUnlikelyCondBr(
    430             iBuilder->CreateICmpEQ(blockSize, ConstantInt::getNullValue(blockSize->getType())),
     424    b->CreateUnlikelyCondBr(
     425            b->CreateICmpEQ(blockSize, ConstantInt::getNullValue(blockSize->getType())),
    431426            exit_block,
    432427            blockSizeCompleted_cont
     
    434429
    435430    // We could be at the boundary no matter the block size is completed or not.
    436     iBuilder->SetInsertPoint(blockSizeCompleted_cont);
    437     generateBoundaryDetection(iBuilder, State::AT_BLOCK_SIZE, exit_block);
     431    b->SetInsertPoint(blockSizeCompleted_cont);
     432    generateBoundaryDetection(b, State::AT_BLOCK_SIZE, exit_block);
    438433    // Falls through to %at_token.
    439434}
    440435
    441436
    442 void LZ4IndexDecoderKernel::generateAtToken(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    443     iBuilder->CreateBr(bb);
    444     iBuilder->SetInsertPoint(bb);
     437void LZ4IndexDecoderKernel::generateAtToken(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * bb, BasicBlock * exit_block) {
     438    b->CreateBr(bb);
     439    b->SetInsertPoint(bb);
    445440    printRTDebugMsg("reading token");
    446441
    447     Value * token = loadRawByte(iBuilder);
    448     Value * literalLen = iBuilder->CreateZExt(
    449         iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
    450         iBuilder->getInt32Ty()
     442    Value * token = loadRawByte(b);
     443    Value * literalLen = b->CreateZExt(
     444        b->CreateLShr(token, b->getInt8(4)),
     445        b->getInt32Ty()
    451446        );
    452     Value * matchLen = iBuilder->CreateZExt(
    453         iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)),
    454         iBuilder->getInt32Ty()
     447    Value * matchLen = b->CreateZExt(
     448        b->CreateAnd(token, b->getInt8(0xf)),
     449        b->getInt32Ty()
    455450        );
    456     incStackVar(iBuilder, sOffset);
     451    incStackVar(b, sOffset);
    457452    // Prepare extender word for scanning.
    458     loadCurrentExtender(iBuilder);
    459     setExtenderUntilOffset(iBuilder);
     453    loadCurrentExtender(b);
     454    setExtenderUntilOffset(b);
    460455    // Store the (partial) match length to be extended later.
    461     iBuilder->setScalarField("MatchLength", matchLen);
     456    b->setScalarField("MatchLength", matchLen);
    462457    // Use tempLength to accumulate extended lengths (until at_literals).
    463     iBuilder->CreateStore(literalLen, sTempLength);
    464     iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
    465 
    466     generateBoundaryDetection(iBuilder, State::AT_TOKEN, exit_block);
     458    b->CreateStore(literalLen, sTempLength);
     459    b->CreateStore(b->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
     460
     461    generateBoundaryDetection(b, State::AT_TOKEN, exit_block);
    467462    // Falls through to %extending_literal_length.
    468463}
    469464
    470465
    471 void LZ4IndexDecoderKernel::generateExtendingLiteralLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    472     iBuilder->CreateBr(bb);
    473     iBuilder->SetInsertPoint(bb);
     466void LZ4IndexDecoderKernel::generateExtendingLiteralLen(const std::unique_ptr<KernelBuilder> & b, BasicBlock * bb, BasicBlock * exit_block) {
     467    b->CreateBr(bb);
     468    b->SetInsertPoint(bb);
    474469    printRTDebugMsg("extending literal len");
    475470
    476     Value * wordOffset = getWordOffset(iBuilder);
    477     Value * blockOffset = getWordStartOffset(iBuilder);
    478     Value * literalLen = iBuilder->CreateLoad(sTempLength);
    479     Value * literalExtEnd = iBuilder->CreateTrunc(
    480                 iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
    481                 iBuilder->getInt32Ty());
     471    Value * wordOffset = getWordOffset(b);
     472    Value * blockOffset = getWordStartOffset(b);
     473    Value * literalLen = b->CreateLoad(sTempLength);
     474    Value * literalExtEnd = b->CreateTrunc(
     475                b->CreateCountForwardZeroes(b->CreateNot(b->CreateLoad(sExtender))),
     476                b->getInt32Ty());
    482477    printRTDebugInt("wordOffset", wordOffset);
    483478    printRTDebugInt("literalExtEnd", literalExtEnd);
    484479    // number of extender = literalExtEnd - wordOffset
    485     Value * numExtenders = iBuilder->CreateSub(literalExtEnd, wordOffset);
     480    Value * numExtenders = b->CreateSub(literalExtEnd, wordOffset);
    486481    Value * literalExtReachBoundary =
    487             iBuilder->CreateICmpEQ(literalExtEnd, iBuilder->getInt32(wordWidth));
     482            b->CreateICmpEQ(literalExtEnd, b->getInt32(wordWidth));
    488483    // There are literalExtEnd forward zeroes, we load bytes[literalExtEnd]
    489484    // which is the first non-extender.  If literalExtEnd == 64, we force the
    490485    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
    491     Value * loadOffset = iBuilder->CreateSelect(literalExtReachBoundary,
     486    Value * loadOffset = b->CreateSelect(literalExtReachBoundary,
    492487            ConstantInt::getNullValue(literalExtEnd->getType()),
    493488            literalExtEnd);
    494     Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
    495             iBuilder->getInt8(0),
    496             loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
    497     Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
    498     literalLen = iBuilder->CreateSelect(literalLenExted,
    499             iBuilder->CreateAdd(
     489    Value * lastByte = b->CreateSelect(literalExtReachBoundary,
     490            b->getInt8(0),
     491            loadRawByte(b, b->CreateAdd(blockOffset, loadOffset)));
     492    Value * literalLenExted = b->CreateICmpUGE(literalLen, b->getInt32(0xf));
     493    literalLen = b->CreateSelect(literalLenExted,
     494            b->CreateAdd(
    500495                literalLen,
    501                 iBuilder->CreateAdd(
    502                     iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
    503                     iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
     496                b->CreateAdd(
     497                    b->CreateMul(numExtenders, b->getInt32(0xff)),
     498                    b->CreateZExt(lastByte, b->getInt32Ty())
    504499                    )
    505500                ),      // literalLen + numExtenders * 255
    506501            literalLen);
    507     wordOffset = iBuilder->CreateSelect(literalLenExted,
     502    wordOffset = b->CreateSelect(literalLenExted,
    508503            literalExtEnd,
    509504            wordOffset);
    510505    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
    511     wordOffset = iBuilder->CreateSelect(
    512             iBuilder->CreateAnd(literalLenExted, iBuilder->CreateNot(literalExtReachBoundary)),
    513             iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
     506    wordOffset = b->CreateSelect(
     507            b->CreateAnd(literalLenExted, b->CreateNot(literalExtReachBoundary)),
     508            b->CreateAdd(wordOffset, b->getInt32(1)),
    514509            wordOffset
    515510            );
    516     iBuilder->CreateStore(literalLen, sTempLength);
    517     iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
    518     Value * unfinished = iBuilder->CreateAnd(literalExtReachBoundary, literalLenExted);
    519     Value * newState = iBuilder->CreateSelect(unfinished,
    520             iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH),
    521             iBuilder->getInt8(State::AT_LITERALS));
    522     iBuilder->CreateStore(newState, sState);
    523 
    524     generateBoundaryDetection(iBuilder, State::EXTENDING_LITERAL_LENGTH, exit_block, true);
    525     BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
     511    b->CreateStore(literalLen, sTempLength);
     512    b->CreateStore(b->CreateAdd(blockOffset, wordOffset), sOffset);
     513    Value * unfinished = b->CreateAnd(literalExtReachBoundary, literalLenExted);
     514    Value * newState = b->CreateSelect(unfinished,
     515            b->getInt8(State::EXTENDING_LITERAL_LENGTH),
     516            b->getInt8(State::AT_LITERALS));
     517    b->CreateStore(newState, sState);
     518
     519    generateBoundaryDetection(b, State::EXTENDING_LITERAL_LENGTH, exit_block, true);
     520    BasicBlock * cont_block = b->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
    526521    // Insert point is still in wordBoundary block now.
    527522    // See if there are still more extenders.
    528     iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
    529 
    530     iBuilder->SetInsertPoint(cont_block);
     523    b->CreateUnlikelyCondBr(unfinished, bb, cont_block);
     524
     525    b->SetInsertPoint(cont_block);
    531526    // Falls through to %at_literals.
    532527}
    533528
    534529
    535 void LZ4IndexDecoderKernel::generateAtLiterals(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb) {
    536     iBuilder->CreateBr(bb);
    537     iBuilder->SetInsertPoint(bb);
    538 
    539     iBuilder->setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
    540     iBuilder->setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
    541     iBuilder->setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
    542     iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
     530void LZ4IndexDecoderKernel::generateAtLiterals(const std::unique_ptr<KernelBuilder> & b, BasicBlock * bb) {
     531    b->CreateBr(bb);
     532    b->SetInsertPoint(bb);
     533    b->setScalarField("LiteralStart", b->CreateAdd(blockStartPos, b->CreateLoad(sOffset)));
     534    b->setScalarField("LiteralLength", b->CreateLoad(sTempLength));
     535    b->setScalarField("BytesToSkip", b->CreateLoad(sTempLength));
     536    b->CreateStore(b->getInt8(State::AT_FIRST_OFFSET), sState);
    543537
    544538    // No boundary detection here as we do not advance the cursor.
     
    547541
    548542
    549 void LZ4IndexDecoderKernel::generateAtFirstOffset(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    550     iBuilder->SetInsertPoint(bb);
     543void LZ4IndexDecoderKernel::generateAtFirstOffset(const std::unique_ptr<KernelBuilder> &b, BasicBlock * bb, BasicBlock * exit_block) {
     544    b->SetInsertPoint(bb);
    551545    printRTDebugMsg("reading first offset");
    552546
    553     Value * byte = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
     547    Value * byte = b->CreateZExt(loadRawByte(b), b->getInt32Ty());
    554548    // Use tempLength to store partial offset.
    555     iBuilder->CreateStore(byte, sTempLength);
    556     incStackVar(iBuilder, sOffset);
    557     iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
    558 
    559     generateBoundaryDetection(iBuilder, State::AT_FIRST_OFFSET, exit_block);
     549    b->CreateStore(byte, sTempLength);
     550    incStackVar(b, sOffset);
     551    b->CreateStore(b->getInt8(State::AT_SECOND_OFFSET), sState);
     552
     553    generateBoundaryDetection(b, State::AT_FIRST_OFFSET, exit_block);
    560554    // Falls through to %at_second_offset.
    561555}
    562556
    563557
    564 void LZ4IndexDecoderKernel::generateAtSecondOffset(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    565     iBuilder->CreateBr(bb);
    566     iBuilder->SetInsertPoint(bb);
     558void LZ4IndexDecoderKernel::generateAtSecondOffset(const std::unique_ptr<KernelBuilder> & b, BasicBlock * bb, BasicBlock * exit_block) {
     559    b->CreateBr(bb);
     560    b->SetInsertPoint(bb);
    567561    printRTDebugMsg("reading second offset");
    568562
    569     Value * byte1 = iBuilder->CreateLoad(sTempLength);
    570     Value * byte2 = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
    571     Value * offset = iBuilder->CreateAdd(
    572             iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
     563    Value * byte1 = b->CreateLoad(sTempLength);
     564    Value * byte2 = b->CreateZExt(loadRawByte(b), b->getInt32Ty());
     565    Value * offset = b->CreateAdd(
     566            b->CreateShl(byte2, b->getInt32(8)),
    573567            byte1
    574568            );
    575     iBuilder->setScalarField("MatchOffset", offset);
    576     incStackVar(iBuilder, sOffset);
     569    b->setScalarField("MatchOffset", offset);
     570    incStackVar(b, sOffset);
    577571    // Prepare extender word and tempLength for extending.
    578     loadCurrentExtender(iBuilder);
    579     setExtenderUntilOffset(iBuilder);
    580     iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), sTempLength);
    581     iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
    582 
    583     generateBoundaryDetection(iBuilder, State::AT_SECOND_OFFSET, exit_block);
     572    loadCurrentExtender(b);
     573    setExtenderUntilOffset(b);
     574    b->CreateStore(b->getScalarField("MatchLength"), sTempLength);
     575    b->CreateStore(b->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
     576
     577    generateBoundaryDetection(b, State::AT_SECOND_OFFSET, exit_block);
    584578    // Falls through to %extending_match_length.
    585579}
    586580
    587581
    588 void LZ4IndexDecoderKernel::generateExtendingMatchLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    589     iBuilder->CreateBr(bb);
    590     iBuilder->SetInsertPoint(bb);
     582void LZ4IndexDecoderKernel::generateExtendingMatchLen(const std::unique_ptr<KernelBuilder> & b, BasicBlock * bb, BasicBlock * exit_block) {
     583    b->CreateBr(bb);
     584    b->SetInsertPoint(bb);
    591585    printRTDebugMsg("extending match length");
    592586    printGlobalPos();
    593     printRTDebugInt("rawbyte", loadRawByte(iBuilder));
    594     printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
    595 
    596     Value * wordOffset = getWordOffset(iBuilder);
    597     Value * blockOffset = getWordStartOffset(iBuilder);
    598     Value * matchLen = iBuilder->CreateLoad(sTempLength);
    599     Value * matchExtEnd = iBuilder->CreateTrunc(
    600         iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
    601         iBuilder->getInt32Ty()
     587    printRTDebugInt("rawbyte", loadRawByte(b));
     588    printRTDebugInt("extword", b->CreateLoad(sExtender));
     589
     590    Value * wordOffset = getWordOffset(b);
     591    Value * blockOffset = getWordStartOffset(b);
     592    Value * matchLen = b->CreateLoad(sTempLength);
     593    Value * matchExtEnd = b->CreateTrunc(
     594        b->CreateCountForwardZeroes(b->CreateNot(b->CreateLoad(sExtender))),
     595        b->getInt32Ty()
    602596        );
    603597    printRTDebugInt("wordoffset", wordOffset);
    604598    printRTDebugInt("matchExtEnd", matchExtEnd);
    605599    // number of extender = matchExtEnd - wordOffset
    606     Value * numExtenders = iBuilder->CreateSub(matchExtEnd, wordOffset);
     600    Value * numExtenders = b->CreateSub(matchExtEnd, wordOffset);
    607601    Value * matchExtReachBoundary =
    608             iBuilder->CreateICmpEQ(matchExtEnd, iBuilder->getInt32(wordWidth));
     602            b->CreateICmpEQ(matchExtEnd, b->getInt32(wordWidth));
    609603    // There are matchExtEnd forward zeroes, we load bytes[matchExtEnd]
    610604    // which is the first non-extender.  If matchExtEnd == 64, we force the
    611605    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
    612     Value * loadOffset = iBuilder->CreateSelect(matchExtReachBoundary,
     606    Value * loadOffset = b->CreateSelect(matchExtReachBoundary,
    613607            ConstantInt::getNullValue(matchExtEnd->getType()),
    614608            matchExtEnd);
    615     Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
    616             iBuilder->getInt8(0),
    617             loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
    618     Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
    619     matchLen = iBuilder->CreateSelect(matchLenExted,
    620             iBuilder->CreateAdd(
     609    Value * lastByte = b->CreateSelect(matchExtReachBoundary,
     610            b->getInt8(0),
     611            loadRawByte(b, b->CreateAdd(blockOffset, loadOffset)));
     612    Value * matchLenExted = b->CreateICmpUGE(matchLen, b->getInt32(0xf));
     613    matchLen = b->CreateSelect(matchLenExted,
     614            b->CreateAdd(
    621615                matchLen,
    622                 iBuilder->CreateAdd(
    623                     iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
    624                     iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
     616                b->CreateAdd(
     617                    b->CreateMul(numExtenders, b->getInt32(0xff)),
     618                    b->CreateZExt(lastByte, b->getInt32Ty())
    625619                    )
    626620                ),      // matchLen + numExtenders * 255
    627621            matchLen);
    628     wordOffset = iBuilder->CreateSelect(matchLenExted,
     622    wordOffset = b->CreateSelect(matchLenExted,
    629623            matchExtEnd,
    630624            wordOffset);
    631625    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
    632     wordOffset = iBuilder->CreateSelect(
    633             iBuilder->CreateAnd(matchLenExted, iBuilder->CreateNot(matchExtReachBoundary)),
    634             iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
     626    wordOffset = b->CreateSelect(
     627            b->CreateAnd(matchLenExted, b->CreateNot(matchExtReachBoundary)),
     628            b->CreateAdd(wordOffset, b->getInt32(1)),
    635629            wordOffset
    636630            );
    637     iBuilder->CreateStore(matchLen, sTempLength);
    638     iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
    639 
    640     Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
    641     BasicBlock * output_then = iBuilder->CreateBasicBlock("output_then");
    642     BasicBlock * output_cont = iBuilder->CreateBasicBlock("output_cont");
    643     iBuilder->CreateLikelyCondBr(
    644             iBuilder->CreateNot(unfinished),
     631    b->CreateStore(matchLen, sTempLength);
     632    b->CreateStore(b->CreateAdd(blockOffset, wordOffset), sOffset);
     633
     634    Value * unfinished = b->CreateAnd(matchExtReachBoundary, matchLenExted);
     635    BasicBlock * output_then = b->CreateBasicBlock("output_then");
     636    BasicBlock * output_cont = b->CreateBasicBlock("output_cont");
     637    b->CreateLikelyCondBr(
     638            b->CreateNot(unfinished),
    645639            output_then, output_cont
    646640            );
    647     iBuilder->SetInsertPoint(output_then);
    648     iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
    649     matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
    650     iBuilder->setScalarField("MatchLength", matchLen);
    651     generateProduceOutput(iBuilder);
    652     iBuilder->CreateBr(output_cont);
    653 
    654     iBuilder->SetInsertPoint(output_cont);
    655     generateBoundaryDetection(iBuilder, State::EXTENDING_MATCH_LENGTH, exit_block, true);
    656     BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
     641    b->SetInsertPoint(output_then);
     642    b->CreateStore(b->getInt8(State::AT_TOKEN), sState);
     643    matchLen = b->CreateAdd(matchLen, b->getInt32(4));    // Add the constant at the end.
     644    b->setScalarField("MatchLength", matchLen);
     645    generateProduceOutput(b);
     646    b->CreateBr(output_cont);
     647
     648    b->SetInsertPoint(output_cont);
     649    generateBoundaryDetection(b, State::EXTENDING_MATCH_LENGTH, exit_block, true);
     650    BasicBlock * cont_block = b->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
    657651    // Insert point is still in wordBoundary block now.
    658652    // See if there are still more extenders.
    659     iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
    660 
    661     iBuilder->SetInsertPoint(cont_block);
    662 }
    663 
    664 
    665 void LZ4IndexDecoderKernel::generateAtBlockChecksum(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * skippingBytes) {
     653    b->CreateUnlikelyCondBr(unfinished, bb, cont_block);
     654
     655    b->SetInsertPoint(cont_block);
     656}
     657
     658
     659void LZ4IndexDecoderKernel::generateAtBlockChecksum(const std::unique_ptr<KernelBuilder> & b, BasicBlock * bb, BasicBlock * skippingBytes) {
    666660    // No branch here as we have made a conditional branch outside.
    667     iBuilder->SetInsertPoint(bb);
     661    b->SetInsertPoint(bb);
    668662    printRTDebugMsg("processing block checksum");
    669663
    670664    // Produce the partial output (fill matchIndexes with 0).
    671     iBuilder->setScalarField("MatchOffset", iBuilder->getInt32(0));
    672     iBuilder->setScalarField("MatchLength", iBuilder->getInt32(0));
    673     generateProduceOutput(iBuilder);
    674 
    675     BasicBlock * hasChecksum_then = iBuilder->CreateBasicBlock("has_checksum_then");
    676     BasicBlock * hasChecksum_cont = iBuilder->CreateBasicBlock("has_checksum_cont");
    677 
    678     iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
    679     iBuilder->CreateCondBr(iBuilder->getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
    680 
    681     iBuilder->SetInsertPoint(hasChecksum_then);
    682     iBuilder->setScalarField("BytesToSkip", iBuilder->getInt32(4));
    683     iBuilder->CreateBr(skippingBytes);
     665    b->setScalarField("MatchOffset", b->getInt32(0));
     666    b->setScalarField("MatchLength", b->getInt32(0));
     667    generateProduceOutput(b);
     668
     669    BasicBlock * hasChecksum_then = b->CreateBasicBlock("has_checksum_then");
     670    BasicBlock * hasChecksum_cont = b->CreateBasicBlock("has_checksum_cont");
     671
     672    b->CreateStore(b->getInt8(State::AT_BLOCK_SIZE), sState);
     673    b->CreateCondBr(b->getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
     674
     675    b->SetInsertPoint(hasChecksum_then);
     676    b->setScalarField("BytesToSkip", b->getInt32(4));
     677    b->CreateBr(skippingBytes);
    684678    // Boundary detection will be done in skipping_bytes.
    685679
    686     iBuilder->SetInsertPoint(hasChecksum_cont);
     680    b->SetInsertPoint(hasChecksum_cont);
    687681    // No checksum, offset not advanced.  Falls through to the next block (block_size).
    688682}
    689683
    690 LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
     684LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    691685: BlockOrientedKernel("lz4IndexDecoder",
    692686    // Inputs
    693     {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
    694      Binding{iBuilder->getStreamSetTy(1, 1), "extenders"}},
     687    {Binding{b->getStreamSetTy(1, 8), "byteStream"},
     688     Binding{b->getStreamSetTy(1, 1), "extenders"}},
    695689    // Outputs: literal start, literal length, match offset, match length
    696     {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
    697      Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes", RateEqualTo("literalIndexes")}},
     690    {Binding{b->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
     691     Binding{b->getStreamSetTy(2, 32), "matchIndexes", RateEqualTo("literalIndexes")}},
    698692    // Arguments
    699     {Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"}},
     693    {Binding{b->getInt1Ty(), "hasBlockChecksum"}},
    700694    {},
    701695    // Internal states:
    702     {Binding{iBuilder->getInt32Ty(), "BlockNo"},
    703      Binding{iBuilder->getInt8Ty(), "State"},
    704      Binding{iBuilder->getInt32Ty(), "LZ4BlockStart"},
    705      Binding{iBuilder->getInt32Ty(), "LZ4BlockEnd"},
    706      Binding{iBuilder->getInt32Ty(), "BytesToSkip"},
    707      Binding{iBuilder->getInt32Ty(), "TempLength"},
    708      Binding{iBuilder->getInt32Ty(), "TempCount"},
    709      Binding{iBuilder->getInt32Ty(), "LiteralStart"},
    710      Binding{iBuilder->getInt32Ty(), "LiteralLength"},
    711      Binding{iBuilder->getInt32Ty(), "MatchOffset"},
    712      Binding{iBuilder->getInt32Ty(), "MatchLength"}})
    713 , wordWidth{iBuilder->getSizeTy()->getBitWidth()} {
     696    {Binding{b->getInt32Ty(), "BlockNo"},
     697     Binding{b->getInt8Ty(), "State"},
     698     Binding{b->getInt32Ty(), "LZ4BlockStart"},
     699     Binding{b->getInt32Ty(), "LZ4BlockEnd"},
     700     Binding{b->getInt32Ty(), "BytesToSkip"},
     701     Binding{b->getInt32Ty(), "TempLength"},
     702     Binding{b->getInt32Ty(), "TempCount"},
     703     Binding{b->getInt32Ty(), "LiteralStart"},
     704     Binding{b->getInt32Ty(), "LiteralLength"},
     705     Binding{b->getInt32Ty(), "MatchOffset"},
     706     Binding{b->getInt32Ty(), "MatchLength"}})
     707, wordWidth{b->getSizeTy()->getBitWidth()} {
    714708    setNoTerminateAttribute(true);
    715709}
Note: See TracChangeset for help on using the changeset viewer.