Ignore:
Timestamp:
Jun 12, 2018, 4:09:27 AM (12 months ago)
Author:
xwa163
Message:
  1. Add command line parameter -enable-gather and -enable-scatter in lz4 parallel grep
  2. fix some bugs for AVX512 machine
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp

    r6077 r6081  
    4646}
    4747
    48 void LZ4BlockDecoderNewKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     48void LZ4BlockDecoderNewKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
    4949
    50     Constant* INT64_0 = iBuilder->getInt64(0);
     50    Constant* INT64_0 = b->getInt64(0);
    5151
    52     BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     52    BasicBlock * entryBlock = b->GetInsertBlock();
    5353
    5454    // Skip Header
    55     Value* hasSkipHeader = iBuilder->getScalarField("hasSkipHeader");
    56     iBuilder->setScalarField("hasSkipHeader", iBuilder->getTrue());
    57     Value* skipLength = iBuilder->CreateSelect(hasSkipHeader, iBuilder->getSize(0), iBuilder->getScalarField("headerSize"));
    58     Value* previousOffset = iBuilder->getScalarField("previousOffset");
    59     previousOffset = iBuilder->CreateAdd(skipLength, previousOffset);
    60     Value* initBlockStart = iBuilder->getScalarField("pendingBlockStart");
    61     Value* initBlockEnd = iBuilder->getScalarField("pendingBlockEnd");
    62     Value* initIsCompressed = iBuilder->getScalarField("pendingIsCompressed");
    63     Value * availableItemCount = iBuilder->getAvailableItemCount("byteStream");
    64     BasicBlock * processCon = iBuilder->CreateBasicBlock("process_con");
    65     iBuilder->CreateBr(processCon);
     55    Value* hasSkipHeader = b->getScalarField("hasSkipHeader");
     56    b->setScalarField("hasSkipHeader", b->getTrue());
     57    Value* skipLength = b->CreateSelect(hasSkipHeader, b->getSize(0), b->getScalarField("headerSize"));
     58    Value* previousOffset = b->getScalarField("previousOffset");
     59    previousOffset = b->CreateAdd(skipLength, previousOffset);
     60    Value* initBlockStart = b->getScalarField("pendingBlockStart");
     61    Value* initBlockEnd = b->getScalarField("pendingBlockEnd");
     62    Value* initIsCompressed = b->getScalarField("pendingIsCompressed");
     63    Value * availableItemCount = b->getAvailableItemCount("byteStream");
     64    BasicBlock * processCon = b->CreateBasicBlock("process_con");
     65    b->CreateBr(processCon);
    6666
    67     iBuilder->SetInsertPoint(processCon);
     67    b->SetInsertPoint(processCon);
    6868
    69     PHINode* phiIsCompressed = iBuilder->CreatePHI(initIsCompressed->getType(), 3);
    70     PHINode* phiBlockStart = iBuilder->CreatePHI(initBlockStart->getType(), 3);
    71     PHINode* phiBlockEnd = iBuilder->CreatePHI(initBlockEnd->getType(), 3);
    72     PHINode* sOffset = iBuilder->CreatePHI(previousOffset->getType(), 3);
     69    PHINode* phiIsCompressed = b->CreatePHI(initIsCompressed->getType(), 3);
     70    PHINode* phiBlockStart = b->CreatePHI(initBlockStart->getType(), 3);
     71    PHINode* phiBlockEnd = b->CreatePHI(initBlockEnd->getType(), 3);
     72    PHINode* sOffset = b->CreatePHI(previousOffset->getType(), 3);
    7373
    7474    phiIsCompressed->addIncoming(initIsCompressed, entryBlock);
     
    7878
    7979    // Store Output
    80     BasicBlock* storeOutputBlock = iBuilder->CreateBasicBlock("storeOutputBlock");
    81     BasicBlock * block_decoder_con = iBuilder->CreateBasicBlock("block_decoder_con_block");
     80    BasicBlock* storeOutputBlock = b->CreateBasicBlock("storeOutputBlock");
     81    BasicBlock * block_decoder_con = b->CreateBasicBlock("block_decoder_con_block");
    8282
    83     iBuilder->CreateUnlikelyCondBr(
    84             iBuilder->CreateAnd(
    85                     iBuilder->CreateICmpULE(phiBlockEnd, availableItemCount),
    86                     iBuilder->CreateNot(iBuilder->CreateICmpEQ(phiBlockEnd, INT64_0))
     83    b->CreateUnlikelyCondBr(
     84            b->CreateAnd(
     85                    b->CreateICmpULE(phiBlockEnd, availableItemCount),
     86                    b->CreateNot(b->CreateICmpEQ(phiBlockEnd, INT64_0))
    8787            ),
    8888            storeOutputBlock,
     
    9090    );
    9191
    92     iBuilder->SetInsertPoint(storeOutputBlock);
     92    b->SetInsertPoint(storeOutputBlock);
    9393
    94     appendOutput(iBuilder, phiIsCompressed, phiBlockStart, phiBlockEnd);
     94    appendOutput(b, phiIsCompressed, phiBlockStart, phiBlockEnd);
    9595
    9696
    97     phiIsCompressed->addIncoming(iBuilder->getInt8(0), storeOutputBlock);
     97    phiIsCompressed->addIncoming(b->getInt8(0), storeOutputBlock);
    9898    phiBlockStart->addIncoming(INT64_0, storeOutputBlock);
    9999    phiBlockEnd->addIncoming(INT64_0, storeOutputBlock);
    100100    sOffset->addIncoming(sOffset, storeOutputBlock);
    101101
    102     iBuilder->CreateBr(processCon);
     102    b->CreateBr(processCon);
    103103
    104104
    105105    // block decoder entry
    106     iBuilder->SetInsertPoint(block_decoder_con);
     106    b->SetInsertPoint(block_decoder_con);
    107107
    108     BasicBlock * block_decoder_body = iBuilder->CreateBasicBlock("block_decoder_body_block");
    109     BasicBlock * block_decoder_exit = iBuilder->CreateBasicBlock("block_decoder_exit_block");
     108    BasicBlock * block_decoder_body = b->CreateBasicBlock("block_decoder_body_block");
     109    BasicBlock * block_decoder_exit = b->CreateBasicBlock("block_decoder_exit_block");
    110110
    111     Value * reachFinalBlock = iBuilder->getScalarField("reachFinalBlock");
    112     iBuilder->CreateCondBr(
    113         iBuilder->CreateAnd(
    114             iBuilder->CreateICmpULT(sOffset, availableItemCount),
    115             iBuilder->CreateNot(reachFinalBlock)
     111    Value * reachFinalBlock = b->getScalarField("reachFinalBlock");
     112    b->CreateCondBr(
     113        b->CreateAnd(
     114            b->CreateICmpULT(sOffset, availableItemCount),
     115            b->CreateNot(reachFinalBlock)
    116116        ),
    117117        block_decoder_body,
     
    119119
    120120    //block_decoder_body
    121     iBuilder->SetInsertPoint(block_decoder_body);
    122     Value* currentBlockSize = iBuilder->getSize(0);
     121    b->SetInsertPoint(block_decoder_body);
     122    Value* currentBlockSize = b->getSize(0);
    123123    for (size_t i = 0; i < 4; i++) {
    124         Value * offset = iBuilder->CreateAdd(sOffset, iBuilder->getSize(i));
    125         Value * rawOffset = iBuilder->CreateZExt(generateLoadInput(iBuilder, offset), iBuilder->getSizeTy());
    126         currentBlockSize = iBuilder->CreateOr(currentBlockSize, iBuilder->CreateShl(rawOffset, iBuilder->getSize(8 * i)));
     124        Value * offset = b->CreateAdd(sOffset, b->getSize(i));
     125        Value * rawOffset = b->CreateZExt(generateLoadInput(b, offset), b->getSizeTy());
     126        currentBlockSize = b->CreateOr(currentBlockSize, b->CreateShl(rawOffset, b->getSize(8 * i)));
    127127    }
    128128
    129     Value * realBlockSize = iBuilder->CreateAnd(currentBlockSize, 0x7fffffff);
     129    Value * realBlockSize = b->CreateAnd(currentBlockSize, 0x7fffffff);
    130130
    131     Value * isCompressed = iBuilder->CreateNot(currentBlockSize);
    132     isCompressed = iBuilder->CreateLShr(isCompressed, 31);
    133     isCompressed = iBuilder->CreateTrunc(isCompressed, iBuilder->getInt1Ty());
     131    Value * isCompressed = b->CreateNot(currentBlockSize);
     132    isCompressed = b->CreateLShr(isCompressed, 31);
     133    isCompressed = b->CreateTrunc(isCompressed, b->getInt1Ty());
    134134
    135     Value * isFinalBlock = iBuilder->CreateICmpEQ(realBlockSize, iBuilder->getSize(0));
    136     iBuilder->setScalarField("reachFinalBlock", isFinalBlock);
     135    Value * isFinalBlock = b->CreateICmpEQ(realBlockSize, b->getSize(0));
     136    b->setScalarField("reachFinalBlock", isFinalBlock);
    137137
    138     Value * blockStart = iBuilder->CreateAdd(sOffset, iBuilder->getSize(4));
    139     Value * blockEnd = iBuilder->CreateAdd(blockStart, realBlockSize);
     138    Value * blockStart = b->CreateAdd(sOffset, b->getSize(4));
     139    Value * blockEnd = b->CreateAdd(blockStart, realBlockSize);
    140140
    141141    Value * newOffset = sOffset;
    142     newOffset = iBuilder->CreateAdd(newOffset, iBuilder->getSize(4)); // Block Size
    143     newOffset = iBuilder->CreateAdd(newOffset, realBlockSize); // Block Content
    144     Value * const blockChecksumOffset = iBuilder->CreateSelect(iBuilder->getScalarField("hasBlockChecksum"), iBuilder->getSize(4), iBuilder->getSize(0));
    145     newOffset = iBuilder->CreateAdd(newOffset, blockChecksumOffset);
     142    newOffset = b->CreateAdd(newOffset, b->getSize(4)); // Block Size
     143    newOffset = b->CreateAdd(newOffset, realBlockSize); // Block Content
     144    Value * const blockChecksumOffset = b->CreateSelect(b->getScalarField("hasBlockChecksum"), b->getSize(4), b->getSize(0));
     145    newOffset = b->CreateAdd(newOffset, blockChecksumOffset);
    146146
    147147    sOffset->addIncoming(newOffset, block_decoder_body);
    148     phiIsCompressed->addIncoming(isCompressed, block_decoder_body);
     148    phiIsCompressed->addIncoming(b->CreateZExt(isCompressed, b->getInt8Ty()), block_decoder_body);
    149149    phiBlockStart->addIncoming(blockStart, block_decoder_body);
    150150    phiBlockEnd->addIncoming(blockEnd, block_decoder_body);
    151     iBuilder->CreateBr(processCon);
     151    b->CreateBr(processCon);
    152152
    153153    // block_decoder_exit_block
    154     iBuilder->SetInsertPoint(block_decoder_exit);
    155     iBuilder->setScalarField("pendingIsCompressed", phiIsCompressed);
    156     iBuilder->setScalarField("pendingBlockStart", phiBlockStart);
    157     iBuilder->setScalarField("pendingBlockEnd", phiBlockEnd);
    158     iBuilder->setScalarField("previousOffset", sOffset);
    159     iBuilder->setProcessedItemCount("byteStream", availableItemCount);
    160     iBuilder->setTerminationSignal(mIsFinal);
     154    b->SetInsertPoint(block_decoder_exit);
     155    b->setScalarField("pendingIsCompressed", phiIsCompressed);
     156    b->setScalarField("pendingBlockStart", phiBlockStart);
     157    b->setScalarField("pendingBlockEnd", phiBlockEnd);
     158    b->setScalarField("previousOffset", sOffset);
     159    b->setProcessedItemCount("byteStream", availableItemCount);
     160    b->setTerminationSignal(mIsFinal);
    161161}
    162162
Note: See TracChangeset for help on using the changeset viewer.