Changeset 5505 for icGREP


Ignore:
Timestamp:
Jun 13, 2017, 1:48:31 PM (22 months ago)
Author:
cameron
Message:

Multi block bug fix

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5503 r5505  
    756756    multiBlockFunction->setCallingConv(CallingConv::C);
    757757    multiBlockFunction->setDoesNotThrow();
     758    mCurrentMethod = multiBlockFunction;
     759    kb->SetInsertPoint(BasicBlock::Create(kb->getContext(), "multiBlockEntry", multiBlockFunction, 0));
     760
    758761    auto args = multiBlockFunction->arg_begin();
    759762    args->setName("self");
     
    772775    // Now use the generateMultiBlockLogic method of the MultiBlockKernelBuilder subtype to
    773776    // provide the required multi-block kernel logic.
    774     mCurrentMethod = multiBlockFunction;
    775     kb->SetInsertPoint(BasicBlock::Create(kb->getContext(), "multiBlockEntry", multiBlockFunction, 0));
    776777    generateMultiBlockLogic(kb);
    777778
     
    10781079    }
    10791080
    1080     std::vector<Value *> blockBasePos;
     1081    Value * outputBasePos[outputSetCount];
    10811082    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    10821083        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
     
    10841085        tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
    10851086        producedItemCount[i] = kb->getProducedItemCount(mStreamSetOutputs[i].name);
    1086         blockBasePos.push_back(kb->CreateAnd(producedItemCount[i], blockBaseMask));
    1087         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, outputBlockPtr[i], kb->CreateSub(producedItemCount[i], blockBasePos[i]));
     1087        outputBasePos[i] = kb->CreateAnd(producedItemCount[i], blockBaseMask);
     1088        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, outputBlockPtr[i], kb->CreateSub(producedItemCount[i], outputBasePos[i]));
    10881089        Value * itemAddress = kb->CreatePtrToInt(kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), producedItemCount[i]), intAddrTy);
    10891090        Value * outputPtr = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
     
    10941095
    10951096    kb->CreateCall(multiBlockFunction, tempArgs);
    1096 
     1097   
     1098    //  The items have been processed and output generated to the temporary areas.
     1099    //  Update the processed item count (and hence all the counts derived automatically
     1100    //  therefrom).
     1101    kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemCountNeeded[0]);
     1102   
    10971103    // Copy back data to the actual output buffers.
    1098 
    10991104    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    11001105        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
    11011106        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    1102         Value * finalItems = kb->getProducedItemCount(mStreamSetOutputs[i].name);
    1103         Value * copyItems = kb->CreateSub(finalItems, blockBasePos[i]);
     1107        Value * finalOutputItems = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     1108        Value * copyItems = kb->CreateSub(finalOutputItems, outputBasePos[i]);
    11041109        // Round up to exact multiple of block size.
    11051110        //copyItems = kb->CreateAnd(kb->CreateAdd(copyItems, kb->getSize(bitBlockWidth - 1)), blockBaseMask);
    1106         Value * writableFromBase = kb->getLinearlyWritableItems(mStreamSetOutputs[i].name, blockBasePos[i]); // must be a whole number of blocks.
     1111        Value * writableFromBase = kb->getLinearlyWritableItems(mStreamSetOutputs[i].name, outputBasePos[i]); // must be a whole number of blocks.
    11071112        Value * allWritable = kb->CreateICmpULE(copyItems, writableFromBase);
    11081113        Value * copyItems1 = kb->CreateSelect(allWritable, copyItems, writableFromBase);
    11091114        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBlockPtr[i], tempBufPtr, copyItems1);
    1110         BasicBlock * copyRemaining = kb->CreateBasicBlock("copyRemaining");
    1111         BasicBlock * copyDone = kb->CreateBasicBlock("copyDone");
    1112         kb->CreateCondBr(allWritable, copyDone, copyRemaining);
    1113         kb->SetInsertPoint(copyRemaining);
     1115        BasicBlock * copyBackRemaining = kb->CreateBasicBlock("copyBackRemaining");
     1116        BasicBlock * copyBackDone = kb->CreateBasicBlock("copyBackDone");
     1117        kb->CreateCondBr(allWritable, copyBackDone, copyBackRemaining);
     1118        kb->SetInsertPoint(copyBackRemaining);
    11141119        Value * copyItems2 = kb->CreateSub(copyItems, copyItems1);
    1115         Value * nextBasePos = kb->CreateAdd(blockBasePos[i], copyItems1);
     1120        Value * nextBasePos = kb->CreateAdd(outputBasePos[i], copyItems1);
    11161121        Type * bufPtrType = mStreamSetOutputBuffers[i]->getPointerType();
    11171122        Value * nextOutputPtr = kb->CreatePointerCast(kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), nextBasePos), bufPtrType);
    11181123        tempBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
    11191124        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), nextOutputPtr, tempBufPtr, copyItems2);
    1120         kb->CreateBr(copyDone);
    1121         kb->SetInsertPoint(copyDone);
    1122     }
    1123 
    1124     kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemCountNeeded[0]);
     1125        kb->CreateBr(copyBackDone);
     1126        kb->SetInsertPoint(copyBackDone);
     1127    }
     1128
    11251129
    11261130    //  We've dealt with the partial block processing and copied information back into the
     
    11281132    //
    11291133    stridesRemaining->addIncoming(kb->CreateSub(stridesRemaining, kb->CreateZExt(haveStrides, kb->getSizeTy())), kb->GetInsertBlock());
    1130     kb->CreateCondBr(haveStrides, doSegmentOuterLoop, segmentDone);
     1134    BasicBlock * setTermination = kb->CreateBasicBlock("mBsetTermination");
     1135    kb->CreateCondBr(haveStrides, doSegmentOuterLoop, setTermination);
     1136    kb->SetInsertPoint(setTermination);
     1137    kb->setTerminationSignal();
     1138    kb->CreateBr(segmentDone);
    11311139    kb->SetInsertPoint(segmentDone);
    11321140}
Note: See TracChangeset for help on using the changeset viewer.