Changeset 5194
- Timestamp:
- Oct 15, 2016, 11:22:19 PM (2 years ago)
- Location:
- icGREP/icgrep-devel/icgrep/kernels
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/kernels/kernel.cpp
r5193 r5194 164 164 iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0)); 165 165 BasicBlock * entryBlock = iBuilder->GetInsertBlock(); 166 BasicBlock * blockLoopCond = BasicBlock::Create(iBuilder->getContext(), "blockLoopCond", doSegmentFunction, 0); 167 BasicBlock * blockLoopBody = BasicBlock::Create(iBuilder->getContext(), "blockLoopBody", doSegmentFunction, 0); 168 BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0); 169 BasicBlock * checkFinalBlock = BasicBlock::Create(iBuilder->getContext(), "checkFinalBlock", doSegmentFunction, 0); 166 BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0); 167 BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0); 168 BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0); 169 BasicBlock * checkFinalStride = BasicBlock::Create(iBuilder->getContext(), "checkFinalStride", doSegmentFunction, 0); 170 BasicBlock * checkEndSignals = BasicBlock::Create(iBuilder->getContext(), "checkEndSignals", doSegmentFunction, 0); 170 171 BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0); 171 172 BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0); 173 BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0); 172 174 Type * const size_ty = iBuilder->getSizeTy(); 173 175 Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride()); … … 193 195 Value * availablePos = producerPos[0]; 194 196 for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) { 195 196 197 LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]); 197 198 producerPos.push_back(p); … … 204 205 iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail); 205 206 #endif 206 Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride); 207 Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks); 208 Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride); 207 209 /* Adjust the number of full blocks to do, based on the available data, if necessary. */ 208 Value * lessThanFullSegment = iBuilder->CreateICmpULT( blocksAvail, blocksToDo);209 blocksToDo = iBuilder->CreateSelect(lessThanFullSegment, blocksAvail, blocksToDo);210 //iBuilder->CallPrintInt(mKernelName + "_ blocksAvail", blocksAvail);211 iBuilder->CreateBr( blockLoopCond);212 213 iBuilder->SetInsertPoint( blockLoopCond);214 PHINode * blocksRemaining = iBuilder->CreatePHI(size_ty, 2, "blocksRemaining");215 blocksRemaining->addIncoming(blocksToDo, entryBlock);216 Value * notDone = iBuilder->CreateICmpUGT( blocksRemaining, ConstantInt::get(size_ty, 0));217 iBuilder->CreateCondBr(notDone, blockLoopBody, blocksDone);218 219 iBuilder->SetInsertPoint( blockLoopBody);210 Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo); 211 stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo); 212 //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail); 213 iBuilder->CreateBr(strideLoopCond); 214 215 iBuilder->SetInsertPoint(strideLoopCond); 216 PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining"); 217 stridesRemaining->addIncoming(stridesToDo, entryBlock); 218 Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0)); 219 iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone); 220 221 iBuilder->SetInsertPoint(strideLoopBody); 220 222 Value * blockNo = getScalarField(self, blockNoScalar); 221 223 222 224 generateDoBlockLogic(self, blockNo); 223 225 setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks)); 224 blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, ConstantInt::get(size_ty, 1)), blockLoopBody);225 iBuilder->CreateBr( blockLoopCond);226 227 iBuilder->SetInsertPoint( blocksDone);228 processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul( blocksToDo, stride));226 stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody); 227 iBuilder->CreateBr(strideLoopCond); 228 229 iBuilder->SetInsertPoint(stridesDone); 230 processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride)); 229 231 setProcessedItemCount(self, processed); 230 iBuilder->CreateCondBr(lessThanFullSegment, checkFinal Block, segmentDone);231 232 iBuilder->SetInsertPoint(checkFinal Block);232 iBuilder->CreateCondBr(lessThanFullSegment, checkFinalStride, segmentDone); 233 234 iBuilder->SetInsertPoint(checkFinalStride); 233 235 234 236 /* We had less than a full segment of data; we may have reached the end of input 235 237 on one of the stream sets. */ 236 238 239 Value * alreadyDone = getTerminationSignal(self); 240 iBuilder->CreateCondBr(alreadyDone, finalExit, checkEndSignals); 241 242 iBuilder->SetInsertPoint(checkEndSignals); 237 243 Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]); 238 244 if (endSignalPtrs.size() > 1) { … … 249 255 iBuilder->SetInsertPoint(callFinalBlock); 250 256 251 Value * remainingItems = iBuilder->Create URem(availablePos, stride);257 Value * remainingItems = iBuilder->CreateSub(availablePos, processed); 252 258 createFinalBlockCall(self, remainingItems); 253 259 setProcessedItemCount(self, availablePos); … … 257 263 mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr); 258 264 } 259 265 setTerminationSignal(self); 260 266 iBuilder->CreateBr(segmentDone); 261 267 … … 273 279 // Must be the last action, for synchronization. 274 280 setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(size_ty, 1))); 281 iBuilder->CreateBr(finalExit); 282 283 iBuilder->SetInsertPoint(finalExit); 275 284 276 285 iBuilder->CreateRetVoid(); … … 312 321 } 313 322 314 // By default, kernels do not terminate early.315 323 Value * KernelBuilder::getTerminationSignal(Value * self) { 316 return ConstantInt::getNullValue(iBuilder->getInt1Ty());324 return getScalarField(self, terminationSignal); 317 325 } 318 326 … … 333 341 } 334 342 335 void KernelBuilder::setTerminationSignal(Value * self, Value * newFieldVal) { 336 llvm::report_fatal_error("This kernel type does not support setTerminationSignal."); 337 } 343 void KernelBuilder::setTerminationSignal(Value * self) { 344 Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)}); 345 iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr); 346 } 347 338 348 339 349 -
icGREP/icgrep-devel/icgrep/kernels/kernel.h
r5185 r5194 54 54 virtual llvm::Value * getProcessedItemCount(llvm::Value * kernelInstance) override; 55 55 virtual llvm::Value * getProducedItemCount(llvm::Value * kernelInstance) override; 56 virtual llvm::Value * getTerminationSignal(llvm::Value * kernelInstance) override;56 llvm::Value * getTerminationSignal(llvm::Value * kernelInstance); 57 57 58 58 … … 113 113 114 114 llvm::Value * getStreamSetBlockPtr(Value * self, std::string ssName, Value * blockNo); 115 115 116 116 void setBlockNo(Value * self, Value * newFieldVal); 117 117 virtual void setLogicalSegmentNo(llvm::Value * self, Value * newFieldVal); 118 118 virtual void setProcessedItemCount(llvm::Value * self, Value * newFieldVal); 119 119 virtual void setProducedItemCount(llvm::Value * self, Value * newFieldVal); 120 v irtual void setTerminationSignal(llvm::Value * self, Value * newFieldVal);120 void setTerminationSignal(llvm::Value * self); 121 121 122 122 123 123 protected: 124 124 -
icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp
r5175 r5194 38 38 // Create the basic blocks for the thread function. 39 39 BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0); 40 BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentCond", threadFunc, 0); 41 BasicBlock * finalSegmentLoopExit = BasicBlock::Create(iBuilder->getContext(), "partialSegmentCond", threadFunc, 0); 40 BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc, 0); 42 41 BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0); 43 42 std::vector<BasicBlock *> segmentWait; 44 43 std::vector<BasicBlock *> segmentLoopBody; 45 std::vector<BasicBlock *> partialSegmentWait;46 std::vector<BasicBlock *> partialSegmentLoopBody;47 44 for (unsigned i = 0; i < kernels.size(); i++) { 48 45 segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0)); 49 46 segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0)); 50 partialSegmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "partialSegmentWait"+std::to_string(i), threadFunc, 0));51 partialSegmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "partialSegmentLoopBody"+std::to_string(i), threadFunc, 0));52 47 } 53 48 54 49 iBuilder->SetInsertPoint(entryBlock); 55 50 Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0)); 56 Value * myThreadId = ConstantInt::get(size_ty, id); 57 Value * fileSize = iBuilder->CreateLoad(iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0)})); 51 Constant * myThreadId = ConstantInt::get(size_ty, id); 58 52 std::vector<Value *> instancePtrs; 59 53 for (unsigned i = 0; i < kernels.size(); i++) { … … 65 59 int segmentSize = codegen::SegmentSize; 66 60 Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize); 67 Constant * segmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize);68 Constant * hypersegmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize * threadNum);69 Constant * const blockSize = ConstantInt::get(size_ty, iBuilder->getStride());70 71 Value * myFirstSegNo = myThreadId; //72 // The offset of my starting segment within the thread group hypersegment.73 Value * myOffset = iBuilder->CreateMul(segmentBytes, myThreadId);74 Value * fullSegLimit = iBuilder->CreateAdd(myOffset, segmentBytes);75 76 61 iBuilder->CreateBr(segmentLoop); 77 62 78 63 iBuilder->SetInsertPoint(segmentLoop); 79 PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");80 remainingBytes->addIncoming(fileSize, entryBlock);81 64 PHINode * segNo = iBuilder->CreatePHI(size_ty, 2, "segNo"); 82 segNo->addIncoming(my FirstSegNo, entryBlock);83 84 Value * LT_fullSegment = iBuilder->CreateICmpSLT(remainingBytes, fullSegLimit);85 iBuilder->CreateCondBr( LT_fullSegment, finalSegmentLoopExit, segmentWait[0]);65 segNo->addIncoming(myThreadId, entryBlock); 66 unsigned last_kernel = kernels.size() - 1; 67 Value * alreadyDone = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]); 68 iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, segmentWait[0]); 86 69 87 70 for (unsigned i = 0; i < kernels.size(); i++) { … … 93 76 iBuilder->SetInsertPoint(segmentLoopBody[i]); 94 77 kernels[i]->createDoSegmentCall(instancePtrs[i], segmentBlocks); 95 if (i == kernels.size() - 1) break;78 if (i == last_kernel) break; 96 79 iBuilder->CreateBr(segmentWait[i+1]); 97 80 } 98 81 99 remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, hypersegmentBytes), segmentLoopBody[kernels.size()-1]); 100 segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[kernels.size()-1]); 101 iBuilder->CreateBr(segmentLoop); 102 103 // Now we may have a partial segment, or we may be completely done 104 // because the last segment was handled by a previous thread in the group. 105 iBuilder->SetInsertPoint(finalSegmentLoopExit); 106 Value * alreadyDone = iBuilder->CreateICmpSLT(remainingBytes, myOffset); 107 Value * remainingForMe = iBuilder->CreateSub(remainingBytes, myOffset); 108 Value * blocksToDo = iBuilder->CreateUDiv(remainingForMe, blockSize); 109 iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, partialSegmentWait[0]); 110 111 // Full Block Pipeline loop 112 for (unsigned i = 0; i < kernels.size(); i++) { 113 iBuilder->SetInsertPoint(partialSegmentWait[i]); 114 Value * processedSegmentCount = kernels[i]->getLogicalSegmentNo(instancePtrs[i]); 115 Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount); 116 iBuilder->CreateCondBr(cond, partialSegmentLoopBody[i], partialSegmentWait[i]); 117 118 iBuilder->SetInsertPoint(partialSegmentLoopBody[i]); 119 kernels[i]->createDoSegmentCall(instancePtrs[i], blocksToDo); 120 kernels[i]->createFinalBlockCall(instancePtrs[i], iBuilder->CreateURem(remainingForMe, blockSize)); 121 if (i == kernels.size() - 1) break; 122 iBuilder->CreateBr(partialSegmentWait[i+1]); 123 } 124 iBuilder->CreateBr(exitThreadBlock); 125 82 segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[last_kernel]); 83 Value * endSignal = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]); 84 iBuilder->CreateCondBr(endSignal, exitThreadBlock, segmentLoop); 85 126 86 iBuilder->SetInsertPoint(exitThreadBlock); 127 87 Value * nullVal = Constant::getNullValue(voidPtrTy); … … 244 204 245 205 // Create the basic blocks for the loop. 246 BasicBlock * segmentCondBlock = nullptr; 247 BasicBlock * segmentBodyBlock = nullptr; 248 if (segmentSize > 1) { 249 segmentCondBlock = BasicBlock::Create(iBuilder->getContext(), "segmentCond", main, 0); 250 segmentBodyBlock = BasicBlock::Create(iBuilder->getContext(), "segmentBody", main, 0); 251 } 252 BasicBlock * fullCondBlock = BasicBlock::Create(iBuilder->getContext(), "fullCond", main, 0); 253 BasicBlock * fullBodyBlock = BasicBlock::Create(iBuilder->getContext(), "fullBody", main, 0); 254 BasicBlock * finalBlock = BasicBlock::Create(iBuilder->getContext(), "final", main, 0); 255 BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", main, 0); 256 257 258 Value * initialBufferSize = nullptr; 259 Value * initialBlockNo = nullptr; 260 BasicBlock * initialBlock = nullptr; 261 262 if (segmentSize > 1) { 263 iBuilder->CreateBr(segmentCondBlock); 264 iBuilder->SetInsertPoint(segmentCondBlock); 265 PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes"); 266 remainingBytes->addIncoming(fileSize, entryBlock); 267 PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo"); 268 blockNo->addIncoming(ConstantInt::get(size_ty, 0), entryBlock); 269 270 Constant * const step = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize); 271 Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step); 272 iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock); 273 274 iBuilder->SetInsertPoint(segmentBodyBlock); 275 Value * segBlocks = ConstantInt::get(size_ty, segmentSize); 276 for (unsigned i = 0; i < kernels.size(); i++) { 277 kernels[i]->createDoSegmentCall(instances[i], segBlocks); 278 } 279 remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock); 280 blockNo->addIncoming(iBuilder->CreateAdd(blockNo, segBlocks), segmentBodyBlock); 281 282 iBuilder->CreateBr(segmentCondBlock); 283 initialBufferSize = remainingBytes; 284 initialBlockNo = blockNo; 285 initialBlock = segmentCondBlock; 286 } else { 287 initialBufferSize = fileSize; 288 initialBlockNo = ConstantInt::get(size_ty, 0); 289 initialBlock = entryBlock; 290 iBuilder->CreateBr(fullCondBlock); 291 } 292 293 iBuilder->SetInsertPoint(fullCondBlock); 294 PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes"); 295 remainingBytes->addIncoming(initialBufferSize, initialBlock); 296 PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo"); 297 blockNo->addIncoming(initialBlockNo, initialBlock); 298 299 Constant * const step = ConstantInt::get(size_ty, iBuilder->getStride()); 300 Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step); 301 iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock); 302 303 // Full Block Pipeline loop 304 iBuilder->SetInsertPoint(fullBodyBlock); 305 for (unsigned i = 0; i < kernels.size(); i++) { 306 kernels[i]->createDoSegmentCall(instances[i], ConstantInt::get(size_ty, 1)); 307 } 308 309 remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), fullBodyBlock); 310 blockNo->addIncoming(iBuilder->CreateAdd(blockNo, ConstantInt::get(size_ty, 1)), fullBodyBlock); 311 iBuilder->CreateBr(fullCondBlock); 312 313 iBuilder->SetInsertPoint(finalBlock); 314 for (unsigned i = 0; i < kernels.size(); i++) { 315 kernels[i]->createFinalBlockCall(instances[i], remainingBytes); 316 } 317 iBuilder->CreateBr(exitBlock); 206 BasicBlock * segmentBlock = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0); 207 BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0); 208 iBuilder->CreateBr(segmentBlock); 209 iBuilder->SetInsertPoint(segmentBlock); 210 Constant * segBlocks = ConstantInt::get(size_ty, segmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth()); 211 for (unsigned i = 0; i < kernels.size(); i++) { 212 kernels[i]->createDoSegmentCall(instances[i], segBlocks); 213 } 214 Value * endSignal = kernels[kernels.size()-1]->getTerminationSignal(instances[kernels.size()-1]); 215 iBuilder->CreateCondBr(endSignal, exitBlock, segmentBlock); 318 216 iBuilder->SetInsertPoint(exitBlock); 319 217
Note: See TracChangeset
for help on using the changeset viewer.