Changeset 5303


Ignore:
Timestamp:
Feb 5, 2017, 3:25:17 PM (10 months ago)
Author:
cameron
Message:

Refined copy-back buffers, use copyback in p2s_compressed; stdout/filesink mods for circular buffers

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5301 r5303  
    5858
    5959void KernelBuilder::prepareKernelSignature() {
    60     unsigned blockSize = iBuilder->getBitBlockWidth();
    6160    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    6261        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
     
    292291}
    293292
    294 inline const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
     293const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
    295294    const unsigned structIdx = getStreamSetIndex(name);
    296295    if (structIdx < mStreamSetInputs.size()) {
     
    301300}
    302301
    303 inline Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
     302Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
    304303    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
    305304}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5297 r5303  
    1010
    1111using namespace llvm;
     12using namespace parabix;
    1213
    1314namespace kernel{
     
    176177        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32Ty);
    177178    }
    178     i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    179     setProducedItemCount("i16Stream", i16UnitsGenerated);
     179    Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
     180    setProducedItemCount("i16Stream", i16UnitsFinal);
     181    auto const &b  = getStreamSetBuffer("i16Stream");
     182
     183    if (auto cb = dyn_cast<CircularCopybackBuffer>(b)) {
     184        BasicBlock * copyBack = CreateBasicBlock("copyBack");
     185        BasicBlock * p2sCompressDone = CreateBasicBlock("p2sCompressDone");
     186       
     187        // Check for overflow into the buffer overflow area and copy data back if so.
     188        Value * accessible = cb->getLinearlyAccessibleItems(i16UnitsGenerated);
     189        offset = iBuilder->CreateZExt(offset, iBuilder->getSizeTy());
     190        Value * wraparound = iBuilder->CreateICmpULT(accessible, offset);
     191        iBuilder->CreateCondBr(wraparound, copyBack, p2sCompressDone);
     192       
     193        iBuilder->SetInsertPoint(copyBack);
     194        Value * copyItems = iBuilder->CreateSub(offset, accessible);
     195        cb->createCopyBack(getStreamSetBufferPtr("i16Stream"), copyItems);
     196        iBuilder->CreateBr(p2sCompressDone);
     197        iBuilder->SetInsertPoint(p2sCompressDone);
     198    }
    180199}
    181200   
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5292 r5303  
    1212
    1313using namespace llvm;
     14using namespace parabix;
    1415
    1516namespace kernel {
    1617           
    1718// Rather than using doBlock logic to write one block at a time, this custom
    18 // doSegment method, writes the entire segment with a single write call.
     19// doSegment method attempts to write the entire segment with a single write call.
     20// However, if the segment spans two memory areas (e.g., because of wraparound),
     21// then two write calls are made.
    1922void StdOutKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
    20 
    2123    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    2224
     
    2628    Value * processed = getProcessedItemCount("codeUnitBuffer");
    2729    Value * itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     30    // There may be two memory areas if we are at the physical end of a circular buffer.
     31    auto const &b  = getStreamSetBuffer("codeUnitBuffer");
     32    Value * wraparound = nullptr;
     33    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
     34        Value * accessible = b->getLinearlyAccessibleItems(processed);
     35        wraparound = iBuilder->CreateICmpULT(accessible, itemsToDo);
     36        itemsToDo = iBuilder->CreateSelect(wraparound, accessible, itemsToDo);
     37    }
    2838   
    29     Value * blockNo = getBlockNo();
     39    //Value * blockNo = getBlockNo();
     40    Value * blockNo = iBuilder->CreateUDiv(processed, blockItems);
    3041    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    3142    Value * bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
    3243    iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
    33 
    3444    processed = iBuilder->CreateAdd(processed, itemsToDo);
    3545    setProcessedItemCount("codeUnitBuffer", processed);
    36     setBlockNo(iBuilder->CreateUDiv(processed, blockItems));
    37 
     46    //setBlockNo(iBuilder->CreateUDiv(processed, blockItems));
     47   
     48    // Now we may process the second area (if required).
     49    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
     50        BasicBlock * wrapAroundWrite = CreateBasicBlock("wrapAroundWrite");
     51        BasicBlock * stdoutExit = CreateBasicBlock("stdoutExit");
     52        iBuilder->CreateCondBr(wraparound, wrapAroundWrite, stdoutExit);
     53        iBuilder->SetInsertPoint(wrapAroundWrite);
     54       
     55        // Calculate from the updated value of processed;
     56        blockNo = iBuilder->CreateUDiv(processed, blockItems);
     57        byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
     58        bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     59        itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     60        iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
     61        processed = iBuilder->CreateAdd(processed, itemsToDo);
     62        setProcessedItemCount("codeUnitBuffer", producerPos[0]);
     63        iBuilder->CreateBr(stdoutExit);
     64        iBuilder->SetInsertPoint(stdoutExit);
     65    }
    3866}
    3967
     
    6593    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    6694   
     95    Value * IOstreamPtr = getScalarField("IOstreamPtr");
    6796    Value * processed = getProcessedItemCount("codeUnitBuffer");
    6897    Value * itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
    69     Value * IOstreamPtr = getScalarField("IOstreamPtr");
     98    // There may be two memory areas if we are at the physical end of a circular buffer.
     99    auto const &b  = getStreamSetBuffer("codeUnitBuffer");
     100    Value * wraparound = nullptr;
     101    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
     102        Value * accessible = b->getLinearlyAccessibleItems(processed);
     103        wraparound = iBuilder->CreateICmpULT(accessible, itemsToDo);
     104        itemsToDo = iBuilder->CreateSelect(wraparound, accessible, itemsToDo);
     105    }
    70106   
    71     Value * blockNo = getBlockNo();
     107    Value * blockNo = iBuilder->CreateUDiv(processed, blockItems);
    72108    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    73109    Value * bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
    74110    iBuilder->CreateFWriteCall(bytePtr, itemsToDo, itemBytes, IOstreamPtr);
    75111   
     112   
    76113    processed = iBuilder->CreateAdd(processed, itemsToDo);
    77114    setProcessedItemCount("codeUnitBuffer", processed);
    78     setBlockNo(iBuilder->CreateUDiv(processed, blockItems));
     115    //setBlockNo(iBuilder->CreateUDiv(processed, blockItems));
     116   
     117    // Now we may process the second area (if required).
     118    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
     119        BasicBlock * wrapAroundWrite = CreateBasicBlock("wrapAroundWrite");
     120        BasicBlock * checkFinal = CreateBasicBlock("checkFinal");
     121        iBuilder->CreateCondBr(wraparound, wrapAroundWrite, checkFinal);
     122        iBuilder->SetInsertPoint(wrapAroundWrite);
     123       
     124        // Calculate from the updated value of processed;
     125        blockNo = iBuilder->CreateUDiv(processed, blockItems);
     126        byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
     127        bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     128        itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     129        iBuilder->CreateFWriteCall(bytePtr, itemsToDo, itemBytes, IOstreamPtr);
     130        processed = iBuilder->CreateAdd(processed, itemsToDo);
     131        setProcessedItemCount("codeUnitBuffer", producerPos[0]);
     132        iBuilder->CreateBr(checkFinal);
     133        iBuilder->SetInsertPoint(checkFinal);
     134    }
    79135    iBuilder->CreateCondBr(doFinal, closeFile, fileOutExit);
    80136   
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5301 r5303  
    124124}
    125125
    126 void CircularCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) {
    127     // Must copy back one full block for each of the streams in the stream set.
     126void CircularCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
     127    Function * f = iBuilder->GetInsertBlock()->getParent();
     128    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
     129    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
     130    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
     131    Type * i8ptr = iBuilder->getInt8PtrTy();
     132    unsigned numStreams = getType()->getArrayNumElements();
     133    auto elemTy = getType()->getArrayElementType();
     134    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
    128135    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
    129     Constant * blockSizeLess1 = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
    130     Value * overFlowBlocks = iBuilder->CreateUDiv(iBuilder->CreateAdd(overFlowItems, blockSizeLess1), blockSize);
    131     Value * overFlowAreaPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, iBuilder->getSize(mBufferBlocks));
    132     DataLayout dl(iBuilder->getModule());
    133     Constant * blockBytes = ConstantInt::get(iBuilder->getSizeTy(), dl.getTypeAllocSize(mStreamSetType) * iBuilder->getBitBlockWidth());
     136    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
     137    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
     138    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
     139    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
     140    iBuilder->SetInsertPoint(wholeBlockCopy);
     141    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
     142    Constant * blockBytes = iBuilder->getSize(fieldWidth * iBuilder->getBitBlockWidth()/8);
    134143    Value * copyLength = iBuilder->CreateMul(overFlowBlocks, blockBytes);
    135     Type * i8ptr = iBuilder->getInt8Ty()->getPointerTo();
    136     unsigned alignment = iBuilder->getBitBlockWidth() / 8;
    137     iBuilder->CreateMemMove(iBuilder->CreateBitCast(mStreamSetBufferPtr, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
     144    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
     145    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
     146    iBuilder->SetInsertPoint(partialBlockCopy);
     147    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
     148    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
     149    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth));
     150    Value * copyBytes = iBuilder->CreateUDiv(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(8));
     151    for (unsigned strm = 0; strm < numStreams; strm++) {
     152        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
     153        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
     154        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
     155    }
     156    iBuilder->CreateBr(copyBackDone);
     157    iBuilder->SetInsertPoint(copyBackDone);
    138158}
    139159
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5301 r5303  
    138138   
    139139    // Generate copyback code for the given number of overflowItems.
    140     void createCopyBack(llvm::Value * self, llvm::Value * overflowItems);
     140    void createCopyBack(llvm::Value * self, llvm::Value * overflowItems) const;
    141141   
    142142   
Note: See TracChangeset for help on using the changeset viewer.