Changeset 5895 for icGREP/icgrep-devel


Ignore:
Timestamp:
Mar 8, 2018, 4:58:50 PM (13 months ago)
Author:
xwa163
Message:
  1. Fix match copy kernel in large file for new infrastructure
  2. Enable testing for full LZ4 decode pipeline
Location:
icGREP/icgrep-devel
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/QA/lz4d_ext_dep/main.py

    r5886 r5895  
    1010failure_count = 0
    1111
    12 # TODO Only test extract-only and extract-and-deposit-only now
    1312test_options = [
    1413    ('extract_only', '-extract-only', {'extract_only' : True}),
    1514    ('extract_and_deposit', '-extract-and-deposit-only', {'extract_and_deposit_only': True}),
    16     # ('normal', '', {}),
     15    ('normal', '', {}),
    1716]
    1817
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.cpp

    r5864 r5895  
    55#include <kernels/kernel_builder.h>
    66#include <kernels/streamset.h>
     7#include <toolchain/toolchain.h>
     8
     9#define OUTPUT_BIT_STREAM_NAME "outputStream"
    710
    811using namespace llvm;
     
    1013using namespace std;
    1114
    12 void LZ4MatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder) {
    13     BasicBlock *entryBlock = iBuilder->GetInsertBlock();
    14     Value *previousCopy = iBuilder->getScalarField("previousCopy");
    15     Value* previousProduced = iBuilder->getProducedItemCount("outputStream");
    16     Value* copyStart = iBuilder->CreateSelect(
    17             iBuilder->CreateICmpULT(previousCopy, previousProduced),
    18             previousProduced,
    19             previousCopy
    20     );
    21 
    22 
    23     Value * itemsToDo = mAvailableItemCount[0];
    24     Value *itemsAvailable = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("decompressedStream"), previousCopy);
    25 //    iBuilder->CallPrintInt("itemsAvailable", iBuilder->getAvailableItemCount("decompressedStream"));
    26 
     15void LZ4MatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, Value *outputBlocks) {
     16
     17    Value *SIZE_ZERO = iBuilder->getSize(0);
     18    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     19
     20    Value *previousProcessed = iBuilder->getProcessedItemCount("decompressedStream");
     21
     22//    BasicBlock *entryBlock = iBuilder->GetInsertBlock();
     23    Value *inputBasePtr = iBuilder->getInputStreamBlockPtr("decompressedStream", SIZE_ZERO);
     24
     25    Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
     26    Value *itemsToDo = mAvailableItemCount[0];
     27    Value *copySize = iBuilder->CreateUMin(
     28            itemsToDo,
     29            iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH)
     30    );
    2731//    iBuilder->CallPrintInt("itemsToDo", itemsToDo);
    28 
    29 
    30     size_t decompressedStreamBufferSize = this->getInputBufferSize(iBuilder, "decompressedStream");
    31     Value *bufferSize = iBuilder->getSize(decompressedStreamBufferSize);
    32 
    33     Value *inputBasePtr = iBuilder->getRawInputPointer("decompressedStream", iBuilder->getSize(0));
    34     Value *outputBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0));
    35 //    iBuilder->CallPrintInt("copyStart", copyStart);
    36 
    37 
    38     Value *previousRound = iBuilder->CreateUDiv(copyStart, bufferSize);
    39     Value *previousOffset = iBuilder->CreateURem(copyStart, bufferSize);
    40 
    41     Value *curRound = iBuilder->CreateUDiv(itemsAvailable, bufferSize);
    42     Value *curOffset = iBuilder->CreateURem(itemsAvailable, bufferSize);
    43 
    44 
    45 //    iBuilder->CallPrintInt("previousRound", previousRound);
    46 //    iBuilder->CallPrintInt("previousOffset", previousOffset);
    47 //    iBuilder->CallPrintInt("curRound", curRound);
    48 //    iBuilder->CallPrintInt("curOffset", curOffset);
    49 
    50     Value *notReachEnd = iBuilder->CreateICmpEQ(previousRound, curRound);
    51     Value *copyEndOffset1 = iBuilder->CreateSelect(notReachEnd, curOffset, bufferSize);
    52     Value *copySize1 = iBuilder->CreateSub(copyEndOffset1, previousOffset);
    53 
     32//    iBuilder->CallPrintInt("itemsToDo1", mAvailableItemCount[1]);
     33//    iBuilder->CallPrintInt("itemsToDo2", mAvailableItemCount[2]);
     34//    iBuilder->CallPrintInt("itemsToDo3", mAvailableItemCount[3]);
     35//    iBuilder->CallPrintInt("copySize", copySize);
    5436
    5537    iBuilder->CreateMemCpy(
    56             iBuilder->CreateGEP(outputBasePtr, previousOffset),
    57             iBuilder->CreateGEP(inputBasePtr, previousOffset),
    58             copySize1,
    59             1 // Not align guaranteed
    60     );
    61 //    iBuilder->CallPrintInt("bbb", iBuilder->getSize(0));
    62     iBuilder->CreateMemCpy(
    63             iBuilder->CreateGEP(outputBasePtr, iBuilder->getSize(0)),
    64             iBuilder->CreateGEP(inputBasePtr, iBuilder->getSize(0)),
    65             iBuilder->CreateSelect(notReachEnd, iBuilder->getSize(0), curOffset),
    66             1 // Not align guaranteed
    67     );
    68 //    iBuilder->CallPrintInt("ccc", iBuilder->getSize(0));
    69 
    70     iBuilder->setProcessedItemCount("decompressedStream", itemsAvailable);
    71 //    iBuilder->setProducedItemCount("outputStream", itemsAvailable);
    72 //    iBuilder->CallPrintInt("producedItemCount", iBuilder->getProducedItemCount("outputStream"));
    73     iBuilder->setScalarField("previousCopy", itemsAvailable);
    74 
    75 }
    76 
    77 void LZ4MatchCopyKernel::generateMultiBlockLogic(const unique_ptr<KernelBuilder> &iBuilder, Value * const numOfStrides) {
     38            outputBasePtr,
     39            inputBasePtr,
     40            copySize,
     41            1 // Not align guaranteed in final block
     42    );
     43//    iBuilder->CallPrintInt("outputCpyPtr", outputBasePtr);
     44//    iBuilder->CallPrintInt("outputBlocks", outputBlocks);
     45    Value *newProcessed = iBuilder->CreateAdd(previousProcessed, copySize);
     46    iBuilder->setProcessedItemCount("decompressedStream", newProcessed);
     47    iBuilder->setProducedItemCount(OUTPUT_BIT_STREAM_NAME, newProcessed);
     48
     49}
     50
     51Value *LZ4MatchCopyKernel::getMaximumMatchCopyBlock(const unique_ptr<KernelBuilder> &iBuilder) {
     52    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     53    Value *SIZE_ZERO = iBuilder->getSize(0);
     54    Value *SIZE_ONE = iBuilder->getSize(1);
     55    Value *m0EndInitOffset = iBuilder->CreateURem(iBuilder->getProcessedItemCount("m0End"), SIZE_BIT_BLOCK_WIDTH);
     56    Value *m0EndItemsToDo = mAvailableItemCount[2];
     57    Value *m0EndBasePtr = iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO);
     58    m0EndBasePtr = iBuilder->CreatePointerCast(m0EndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     59    Value *lastM0 = iBuilder->CreateLoad(
     60            iBuilder->CreateGEP(
     61                    m0EndBasePtr,
     62                    iBuilder->CreateSub(
     63                            iBuilder->CreateAdd(m0EndInitOffset, m0EndItemsToDo),
     64                            SIZE_ONE
     65                    )
     66
     67            )
     68    );
     69    Value *lastDepositPosition = iBuilder->CreateAdd(lastM0, SIZE_ONE);
     70
     71    // TODO maybe we can not use mIsFinal here
     72    Value *currentMaxBlock = iBuilder->CreateSelect(
     73            this->mIsFinalBlock,
     74            iBuilder->CreateUDivCeil(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH),
     75            iBuilder->CreateUDiv(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH)
     76    );
     77
     78    // Produced Item Count will always be full bitblock except for final block
     79    Value *previousProducedBlocks = iBuilder->CreateUDiv(
     80            iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME),
     81            SIZE_BIT_BLOCK_WIDTH
     82    );
     83
     84    // (m0 + 1) / BitBlockWidth - produceItemCount / BitBlockWidth
     85    return iBuilder->CreateSub(currentMaxBlock, previousProducedBlocks);
     86}
     87
     88void LZ4MatchCopyKernel::generateMultiBlockLogic(const unique_ptr<KernelBuilder> &iBuilder, Value *const numOfStrides) {
     89    // Const
     90    Constant *SIZE_ZERO = iBuilder->getSize(0);
     91    Constant *SIZE_ONE = iBuilder->getSize(1);
     92    Constant *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     93
     94    Value *itemsToDo = mAvailableItemCount[0];
     95
     96    Value *isFinalBlock =
     97            iBuilder->CreateOr(
     98                    iBuilder->CreateICmpULT(itemsToDo, iBuilder->CreateMul(numOfStrides, SIZE_BIT_BLOCK_WIDTH)),
     99                    iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0))
     100            );
     101
     102    this->mIsFinalBlock = isFinalBlock;
     103//    iBuilder->CallPrintInt("isFinalBlock", isFinalBlock);
     104    iBuilder->setTerminationSignal(isFinalBlock);
     105
     106
     107
     108
     109    Value *previousProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME);
     110
     111
     112    // Space Calculation
     113    Value *outputBufferBlocks = iBuilder->getSize(
     114            this->getAnyStreamSetBuffer(OUTPUT_BIT_STREAM_NAME)->getBufferBlocks());
     115    // TODO need to take previous produced size into account
     116
     117
     118    Value *outputRawBeginPtr = iBuilder->CreatePointerCast(
     119            iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO),
     120            iBuilder->getBitBlockType()->getPointerTo());
     121    Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
     122    Value *producedOffset = iBuilder->CreatePtrDiff(outputCurrentPtr, outputRawBeginPtr);
     123    Value *remainSpace = iBuilder->CreateSub(outputBufferBlocks, producedOffset);
     124    Value *matchCopyWindowBlock = iBuilder->getSize(256 * 256 / codegen::BlockSize);
     125    Value *remainWindowBlock = iBuilder->CreateSelect(
     126            iBuilder->CreateICmpUGE(producedOffset, matchCopyWindowBlock),
     127            iBuilder->getSize(0),
     128            iBuilder->CreateSub(matchCopyWindowBlock, producedOffset)
     129    );
     130    Value *writableBlocks = iBuilder->CreateSub(remainSpace,
     131                                                remainWindowBlock); //TODO handle beginning, if producedItemCount / bitblockWidth < windowBlock, there is no need for the substraction here
     132//    iBuilder->CallPrintInt("remainSpace", remainSpace);
     133//    iBuilder->CallPrintInt("writableBlocks", writableBlocks);
     134    Value *outputBlocks = iBuilder->CreateUMin(writableBlocks, numOfStrides);
     135    // outputBlock === min(writableBlocks, numOfStrides, (matchOffsetPosition + matchLength - producedItemCount) / bitBlockWidth )
     136
     137    outputBlocks = iBuilder->CreateUMin(outputBlocks, this->getMaximumMatchCopyBlock(iBuilder));
     138
     139
    78140//    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    79141
    80     // Copy
    81 //    iBuilder->CallPrintInt("available", iBuilder->getAvailableItemCount("decompressedStream"));
    82     this->generateOutputCopy(iBuilder);
    83 
     142    // Output Copy
     143    this->generateOutputCopy(iBuilder, outputBlocks);
     144//    return;
     145
     146    Value *newProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME);
    84147
    85148    BasicBlock *copyEndBlock = iBuilder->CreateBasicBlock("copyEnd");
    86149    iBuilder->CreateBr(copyEndBlock);
    87150    iBuilder->SetInsertPoint(copyEndBlock);
    88 //    return;
     151
     152    // TODO match Copy
    89153    BasicBlock *exitBlock = iBuilder->CreateBasicBlock("exit_block");
    90     Value *initProcessIndex = iBuilder->getScalarField("currentProcessIndex");
    91     Value *itemsAvailable = iBuilder->CreateAdd(
    92             iBuilder->getProcessedItemCount("m0Start"),
    93             iBuilder->getAvailableItemCount("m0Start")
    94     );
    95 
    96 
    97     BasicBlock *iterLoopCon = iBuilder->CreateBasicBlock("iter_loop_con");
    98     BasicBlock *iterLoopBody = iBuilder->CreateBasicBlock("iter_loop_body");
    99     BasicBlock *iterLoopExit = iBuilder->CreateBasicBlock("iter_loop_exit");
    100 
    101     iBuilder->CreateBr(iterLoopCon);
    102 
    103     // Con
    104     iBuilder->SetInsertPoint(iterLoopCon);
    105     PHINode *currentProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    106     currentProcessIndex->addIncoming(initProcessIndex, copyEndBlock);
     154
     155    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
     156    Value *totalM0StartItemsCount = iBuilder->CreateAdd(initM0StartProcessIndex, mAvailableItemCount[1]);
     157
     158    Value *initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
     159    Value *initMatchLength = iBuilder->getScalarField("pendingMatchLength");
     160    Value *initMatchPos = iBuilder->getScalarField("pendingMatchPos");
     161
     162
     163    BasicBlock *matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
     164    iBuilder->CreateBr(matchCopyLoopCon);
     165
     166    iBuilder->SetInsertPoint(matchCopyLoopCon);
     167
     168
     169    PHINode *phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     170    phiProcessIndex->addIncoming(initM0StartProcessIndex, copyEndBlock);
     171
     172    PHINode *phiMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     173    phiMatchOffset->addIncoming(initMatchOffset, copyEndBlock);
     174
     175    PHINode *phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     176    phiMatchLength->addIncoming(initMatchLength, copyEndBlock);
     177
     178    PHINode *phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     179    phiMatchPos->addIncoming(initMatchPos, copyEndBlock);
     180
     181    BasicBlock *loadNextMatchInfoConBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoConBlock");
     182    BasicBlock *loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
     183
     184    BasicBlock *matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
     185    BasicBlock *matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
     186
    107187
    108188    iBuilder->CreateCondBr(
    109             iBuilder->CreateICmpULT(currentProcessIndex, itemsAvailable),
    110             iterLoopBody,
    111             iterLoopExit
    112     );
    113 
    114     // Body
    115     iBuilder->SetInsertPoint(iterLoopBody);
    116 
    117 
    118     Value *currentM0Start = this->generateLoadCircularInput(iBuilder, "m0Start", currentProcessIndex,
    119                                                             iBuilder->getInt64Ty()->getPointerTo());
    120     Value *currentDepositStart = currentM0Start;
    121 
    122     BasicBlock *matchCopyBody = iBuilder->CreateBasicBlock("match_copy_body");
    123     Value *producedItemsCount = iBuilder->getProcessedItemCount("decompressedStream");
    124 
    125     iBuilder->CreateCondBr(
    126             iBuilder->CreateICmpULE(
    127                     iBuilder->CreateSub(currentDepositStart, iBuilder->getInt64(1)),
    128                     producedItemsCount
    129             ),
    130             matchCopyBody,
    131             iterLoopExit
    132     );
    133 
    134     // matchCopyBody
    135     iBuilder->SetInsertPoint(matchCopyBody);
    136     this->generateMatchCopy(iBuilder, currentProcessIndex); // TODO main logic here
    137     BasicBlock *matchCopyFinishBlock = iBuilder->CreateBasicBlock("match_copy_finish");
    138     iBuilder->CreateBr(matchCopyFinishBlock);
    139     iBuilder->SetInsertPoint(matchCopyFinishBlock);
    140 
    141 
    142     Value *m0End = this->generateLoadCircularInput(iBuilder, "m0End", currentProcessIndex,
    143                                                    iBuilder->getInt64Ty()->getPointerTo());
    144     Value *depositEnd = iBuilder->CreateAdd(m0End, iBuilder->getInt64(1));
    145     Value *maxProducedCount = iBuilder->CreateSelect(
    146             iBuilder->CreateICmpUGT(
    147                     producedItemsCount,
    148                     depositEnd
    149             ),
    150             producedItemsCount,
    151             depositEnd
    152     );
    153     iBuilder->setProducedItemCount("outputStream", maxProducedCount);
    154     currentProcessIndex->addIncoming(
    155             iBuilder->CreateAdd(currentProcessIndex, iBuilder->getSize(1)),
    156             matchCopyFinishBlock
    157     );
    158     iBuilder->CreateBr(iterLoopCon);
    159 
    160 
    161     // loop exit
    162     iBuilder->SetInsertPoint(iterLoopExit);
    163     iBuilder->setScalarField("currentProcessIndex", currentProcessIndex);
    164 
    165     iBuilder->CreateBr(exitBlock);
    166 
    167     // Exit
     189            iBuilder->CreateICmpEQ(phiMatchLength, iBuilder->getSize(0)),
     190            loadNextMatchInfoConBlock,
     191            matchCopyConBlock
     192    );
     193
     194
     195    iBuilder->SetInsertPoint(loadNextMatchInfoConBlock);
     196
     197    Value *hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
     198    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, exitBlock);
     199
     200    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
     201
     202    Value *m0StartBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0Start", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
     203    Value *m0EndBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
     204    Value *matchOffsetBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("matchOffset", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
     205
     206
     207    Value *m0StartBaseOffset = iBuilder->CreateURem(initM0StartProcessIndex, SIZE_BIT_BLOCK_WIDTH);
     208//    iBuilder->CallPrintInt("rawPtr", iBuilder->getRawInputPointer("m0Start", SIZE_ZERO));
     209//    iBuilder->CallPrintInt("ptr", m0StartBasePtr);
     210//    iBuilder->CallPrintInt("initM0StartProcessIndex", initM0StartProcessIndex);
     211    Value *m0StartLoadOffset = iBuilder->CreateAdd(m0StartBaseOffset,
     212                                                   iBuilder->CreateSub(phiProcessIndex, initM0StartProcessIndex));
     213
     214    Value *newM0Start = iBuilder->CreateLoad(iBuilder->CreateGEP(m0StartBasePtr, m0StartLoadOffset));
     215    Value *newM0End = iBuilder->CreateLoad(iBuilder->CreateGEP(m0EndBasePtr, m0StartLoadOffset));
     216    Value *newMatchOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(matchOffsetBasePtr, m0StartLoadOffset));
     217
     218    Value *depositStart = newM0Start;
     219//    iBuilder->CallPrintInt("depositStart", depositStart);
     220//    iBuilder->CallPrintInt("newMatchLength", newMatchLength);
     221
     222    Value *depositEnd = iBuilder->CreateAdd(newM0End, iBuilder->getInt64(1));
     223    Value *newMatchLength = iBuilder->CreateSub(depositEnd, depositStart);
     224    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), iBuilder->GetInsertBlock());
     225
     226    phiMatchPos->addIncoming(depositStart, iBuilder->GetInsertBlock());
     227    phiMatchOffset->addIncoming(newMatchOffset, iBuilder->GetInsertBlock());
     228    phiMatchLength->addIncoming(newMatchLength, iBuilder->GetInsertBlock());
     229
     230    iBuilder->CreateBr(matchCopyLoopCon);
     231
     232
     233    iBuilder->SetInsertPoint(matchCopyConBlock);
     234    Value *hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, newProducedItemCount);
     235//    iBuilder->CallPrintInt("newProducedItemCount", newProducedItemCount);
     236    iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, exitBlock);
     237
     238    iBuilder->SetInsertPoint(matchCopyBodyBlock);
     239    Value* matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
     240    Value* rawOutputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
     241//    iBuilder->CallPrintInt("rawOutputBasePtr", rawOutputBasePtr);
     242//    iBuilder->CallPrintInt("rawOutputBasePtr1", iBuilder->CreateGEP(
     243//            rawOutputBasePtr,
     244//            iBuilder->CreateURem(matchCopyFromPos, iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH))
     245//    ));
     246    Value* matchCopyFromValue = iBuilder->CreateLoad(
     247            iBuilder->CreateGEP(
     248                    rawOutputBasePtr,
     249                    iBuilder->CreateURem(matchCopyFromPos, iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH))
     250            ));
     251
     252    // Output is guranteed to be full bit block except for final block
     253    Value* outputBlockBasePtr = iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
     254    Value* outputTargetPtr = iBuilder->CreateGEP(outputBlockBasePtr, iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
     255//    iBuilder->CallPrintInt("matchCopyFromValue", matchCopyFromValue);
     256//    iBuilder->CallPrintInt("phiMatchPos", phiMatchPos);
     257//    iBuilder->CallPrintInt("aa", iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
     258    iBuilder->CreateStore(matchCopyFromValue, outputTargetPtr);
     259
     260    phiProcessIndex->addIncoming(phiProcessIndex, iBuilder->GetInsertBlock());
     261    phiMatchOffset->addIncoming(phiMatchOffset, iBuilder->GetInsertBlock());
     262    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, SIZE_ONE), iBuilder->GetInsertBlock());
     263    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, SIZE_ONE), iBuilder->GetInsertBlock());
     264
     265    iBuilder->CreateBr(matchCopyLoopCon);
     266
    168267    iBuilder->SetInsertPoint(exitBlock);
    169 }
    170 
    171 Value *LZ4MatchCopyKernel::generateMatchCopy(const unique_ptr<KernelBuilder> &iBuilder, Value *currentProcessIndex) {
    172 
    173     BasicBlock *entryBlock = iBuilder->GetInsertBlock();
    174 
    175 
    176     Value *m0Start = this->generateLoadCircularInput(iBuilder, "m0Start", currentProcessIndex,
    177                                                      iBuilder->getInt64Ty()->getPointerTo());
    178     Value *depositStart = m0Start;
    179     Value *m0End = this->generateLoadCircularInput(iBuilder, "m0End", currentProcessIndex,
    180                                                    iBuilder->getInt64Ty()->getPointerTo());
    181     Value *depositEnd = iBuilder->CreateAdd(m0End, iBuilder->getInt64(1));
    182 
    183     Value *matchOffset = this->generateLoadCircularInput(iBuilder, "matchOffset", currentProcessIndex,
    184                                                          iBuilder->getInt64Ty()->getPointerTo());
    185 
    186     Value *matchLength = iBuilder->CreateSub(depositEnd, depositStart);
    187 
    188     Value *matchStart = iBuilder->CreateSub(depositStart, matchOffset);
    189 //    iBuilder->CallPrintInt("depositStart", depositStart);
    190 //    iBuilder->CallPrintInt("matchOffset", matchOffset);
    191 //    iBuilder->CallPrintInt("matchStart", matchStart);
    192 //    iBuilder->CallPrintInt("matchLength", matchLength);
    193 
    194 
    195     BasicBlock* copyLoopCon = iBuilder->CreateBasicBlock("copy_loop_con");
    196     BasicBlock* copyLoopBody = iBuilder->CreateBasicBlock("copy_loop_body");
    197     BasicBlock* copyLoopExit = iBuilder->CreateBasicBlock("copy_loop_exit");
    198 
    199     iBuilder->CreateBr(copyLoopCon);
    200     iBuilder->SetInsertPoint(copyLoopCon);
    201 
    202     PHINode* currentCopyIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    203     currentCopyIndex->addIncoming(iBuilder->getSize(0), entryBlock);
    204 
    205     iBuilder->CreateCondBr(iBuilder->CreateICmpULT(currentCopyIndex, matchLength), copyLoopBody, copyLoopExit);
    206 
    207     iBuilder->SetInsertPoint(copyLoopBody);
    208     Value* value = this->generateLoadCircularOutput(iBuilder, "outputStream", iBuilder->CreateAdd(matchStart, currentCopyIndex), iBuilder->getInt8Ty()->getPointerTo());
    209 //    iBuilder->CallPrintInt("value", value);
    210 //    iBuilder->CallPrintInt("storePos", iBuilder->CreateAdd(currentCopyIndex, depositStart));
    211     this->generateStoreCircularOutput(iBuilder, "outputStream", iBuilder->CreateAdd(currentCopyIndex, depositStart),iBuilder->getInt8Ty()->getPointerTo(), value);
    212     currentCopyIndex->addIncoming(iBuilder->CreateAdd(currentCopyIndex, iBuilder->getSize(1)), copyLoopBody);
    213 
    214     iBuilder->CreateBr(copyLoopCon);
    215 
    216     iBuilder->SetInsertPoint(copyLoopExit);
    217 
    218 }
    219 
    220 void LZ4MatchCopyKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName, Value* offset, Type* pointerType, Value* value) {
     268//    iBuilder->CallPrintInt("test", SIZE_ZERO);
     269    iBuilder->setScalarField("pendingMatchOffset", phiMatchOffset);
     270    iBuilder->setScalarField("pendingMatchLength", phiMatchLength);
     271    iBuilder->setScalarField("pendingMatchPos", phiMatchPos);
     272//    iBuilder->CallPrintInt("pendingMatchLength", phiMatchLength);
     273    iBuilder->setProcessedItemCount("m0Start", phiProcessIndex);
     274    iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
     275    iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
     276}
     277
     278
     279void LZ4MatchCopyKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName,
     280                                                     Value *offset, Type *pointerType, Value *value) {
    221281    size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
    222     Value* offsetMask = iBuilder->getSize(inputSize - 1);
    223     Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    224 
    225     Value* outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
     282    Value *offsetMask = iBuilder->getSize(inputSize - 1);
     283    Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
     284
     285    Value *outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
    226286
    227287    outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
     
    229289}
    230290
    231 Value* LZ4MatchCopyKernel::generateLoadCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value* offset, Type* pointerType) {
     291Value *LZ4MatchCopyKernel::generateLoadCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
     292                                                      Value *offset, Type *pointerType) {
    232293    size_t inputSize = this->getOutputBufferSize(iBuilder, inputBufferName);
    233     Value* offsetMask = iBuilder->getSize(inputSize - 1);
    234     Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    235 
    236     Value* inputBufferPtr = iBuilder->getRawOutputPointer(inputBufferName, iBuilder->getSize(0));
     294    Value *offsetMask = iBuilder->getSize(inputSize - 1);
     295    Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
     296
     297    Value *inputBufferPtr = iBuilder->getRawOutputPointer(inputBufferName, iBuilder->getSize(0));
    237298
    238299    inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
     
    263324        : MultiBlockKernel("lz4MatchCopyKernel",
    264325        // Inputs
    265                               {
    266                                       Binding{iBuilder->getStreamSetTy(1, 8), "decompressedStream"},
    267                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
    268                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
    269                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
    270 
    271                               },
     326                           {
     327                                   Binding{iBuilder->getStreamSetTy(1, 8), "decompressedStream", BoundedRate(0, 1), AlwaysConsume()},
     328                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), AlwaysConsume()},
     329                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), AlwaysConsume()},
     330                                   Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), AlwaysConsume()},
     331
     332                           },
    272333        // Outputs
    273                               {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
     334                           {Binding{iBuilder->getStreamSetTy(1, 8), OUTPUT_BIT_STREAM_NAME, BoundedRate(0, 1)}},
    274335        // Arguments
    275                               {},
    276                               {},
    277                               {
    278                                       Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
    279                                       Binding{iBuilder->getSizeTy(), "previousCopy"}
    280                               }) {
     336                           {},
     337                           {},
     338                           {
     339                                   Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
     340                                   Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
     341                                   Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
     342                                   Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
     343                           }) {
    281344//    setNoTerminateAttribute(true);
    282 }
     345    addAttribute(MustExplicitlyTerminate());
     346}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.h

    r5864 r5895  
    2323        size_t getInputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName);
    2424        size_t getOutputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName);
    25         void generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder);
    26         llvm::Value* generateMatchCopy(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* currentProcessIndex);
     25        void generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* outputBlocks);
     26
     27        llvm::Value* getMaximumMatchCopyBlock(const std::unique_ptr<KernelBuilder> &iBuilder);
     28        llvm::Value* mIsFinalBlock;
    2729    };
    2830}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_numbers_to_bitstream_kernel.cpp

    r5885 r5895  
    3838
    3939
    40         size_t outputBufferSize = this->getAnyBufferSize(iBuilder, OUTPUT_BIT_STREAM_NAME) / iBuilder->getStride();
     40        size_t outputBufferBlocks = this->getAnyBufferSize(iBuilder, OUTPUT_BIT_STREAM_NAME) / iBuilder->getStride();
    4141        Value *outputRawBeginPtr = iBuilder->CreatePointerCast(
    4242                iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO),
    4343                iBuilder->getBitBlockType()->getPointerTo());
    4444        Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
    45 //        outputRawBeginPtr->getType()->print(outs());
    46 //        outputCurrentPtr->getType()->print(outs());
    47 
    4845        Value *offset = iBuilder->CreatePtrDiff(outputCurrentPtr, outputRawBeginPtr);
    49         Value *remainSpace = iBuilder->CreateSub(iBuilder->getSize(outputBufferSize), offset);
     46        Value *remainSpace = iBuilder->CreateSub(iBuilder->getSize(outputBufferBlocks), offset);
    5047//        iBuilder->CallPrintInt("remainSpace",
    5148//                               remainSpace); //TODO workaround here, kernel infrastructure should provide the information about how much data we can produced
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r5874 r5895  
    341341
    342342int LZ4Generator::getDecompressedBufferBlocks() {
    343     const unsigned decompressBufBlocks = 256U * 256U / codegen::BlockSize * 2 * 2;
     343    const unsigned decompressBufBlocks = 256U * 256U / codegen::BlockSize * 2 * 2; // TODO at least *2 since we need to leave 1 for match copy window
    344344    return decompressBufBlocks;
    345345}
Note: See TracChangeset for help on using the changeset viewer.