Changeset 5188
- Timestamp:
- Oct 11, 2016, 10:40:35 AM (2 years ago)
- Location:
- icGREP/icgrep-devel/icgrep/kernels
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/kernels/kernel.cpp
r5185 r5188 167 167 BasicBlock * blockLoopBody = BasicBlock::Create(iBuilder->getContext(), "blockLoopBody", doSegmentFunction, 0); 168 168 BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0); 169 BasicBlock * checkFinalBlock = BasicBlock::Create(iBuilder->getContext(), "checkFinalBlock", doSegmentFunction, 0); 170 BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0); 171 BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0); 169 172 Type * const size_ty = iBuilder->getSizeTy(); 170 173 Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride()); … … 175 178 Value * blocksToDo = &*(args); 176 179 Value * segmentNo = getLogicalSegmentNo(self); 180 177 181 std::vector<Value *> inbufProducerPtrs; 178 182 std::vector<Value *> endSignalPtrs; 179 183 for (unsigned i = 0; i < mStreamSetInputs.size(); i++) { 180 184 Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].ssName); 181 185 inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr)); 182 } 183 186 endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(ssStructPtr)); 187 } 188 189 std::vector<Value *> producerPos; 184 190 /* Determine the actually available data examining all input stream sets. */ 185 LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[0], sizeof(size_t)); 186 producerPos->setOrdering(AtomicOrdering::Acquire); 187 Value * availablePos = producerPos; 191 LoadInst * p = iBuilder->CreateAlignedLoad(inbufProducerPtrs[0], sizeof(size_t)); 192 p->setOrdering(AtomicOrdering::Acquire); 193 producerPos.push_back(p); 194 Value * availablePos = producerPos[0]; 188 195 for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) { 189 LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t)); 190 producerPos->setOrdering(AtomicOrdering::Acquire); 196 LoadInst * p = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t)); 197 p->setOrdering(AtomicOrdering::Acquire); 198 producerPos.push_back(p); 191 199 /* Set the available position to be the minimum of availablePos and producerPos. */ 192 availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p roducerPos), availablePos, producerPos);200 availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p); 193 201 } 194 202 Value * processed = getProcessedItemCount(self); … … 199 207 Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride); 200 208 /* Adjust the number of full blocks to do, based on the available data, if necessary. */ 201 blocksToDo = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocksToDo, blocksAvail), blocksToDo, blocksAvail); 209 Value * lessThanFullSegment = iBuilder->CreateICmpULT(blocksAvail, blocksToDo); 210 blocksToDo = iBuilder->CreateSelect(lessThanFullSegment, blocksAvail, blocksToDo); 202 211 //iBuilder->CallPrintInt(mKernelName + "_blocksAvail", blocksAvail); 203 212 iBuilder->CreateBr(blockLoopCond); … … 220 229 processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(blocksToDo, stride)); 221 230 setProcessedItemCount(self, processed); 231 iBuilder->CreateCondBr(lessThanFullSegment, checkFinalBlock, segmentDone); 232 233 iBuilder->SetInsertPoint(checkFinalBlock); 234 235 /* We had less than a full segment of data; we may have reached the end of input 236 on one of the stream sets. */ 237 238 Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]); 239 if (endSignalPtrs.size() > 1) { 240 /* If there is more than one input stream set, then we need to confirm that one of 241 them has both the endSignal set and the length = to availablePos. */ 242 endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0])); 243 for (unsigned i = 1; i < endSignalPtrs.size(); i++) { 244 Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i])); 245 endOfInput = iBuilder->CreateOr(endOfInput, e); 246 } 247 } 248 iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone); 249 250 iBuilder->SetInsertPoint(callFinalBlock); 251 252 Value * remainingItems = iBuilder->CreateURem(availablePos, stride); 253 createFinalBlockCall(self, remainingItems); 254 setProcessedItemCount(self, availablePos); 255 256 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 257 Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName); 258 mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr); 259 } 260 261 iBuilder->CreateBr(segmentDone); 262 263 iBuilder->SetInsertPoint(segmentDone); 222 264 Value * produced = getProducedItemCount(self); 223 265 #ifndef NDEBUG … … 465 507 iBuilder->SetInsertPoint(endSignalCheckBlock); 466 508 467 LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], sizeof(size_t)); 468 // iBuilder->CallPrintInt(name + ":endSignal", endSignal); 469 endSignal->setOrdering(AtomicOrdering::Acquire); 509 LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]); 470 510 for (unsigned i = 1; i < endSignalPtrs.size(); i++){ 471 LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], sizeof(size_t)); 472 endSignal_next->setOrdering(AtomicOrdering::Acquire); 511 LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]); 473 512 iBuilder->CreateAnd(endSignal, endSignal_next); 474 513 } 475 514 476 iBuilder->CreateCondBr( iBuilder->CreateICmpEQ(endSignal, ConstantInt::get(iBuilder->getInt8Ty(), 1)), endBlock, inputCheckBlock);515 iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock); 477 516 478 517 iBuilder->SetInsertPoint(doSegmentBlock); -
icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp
r5185 r5188 44 44 iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0)); 45 45 Constant * stride = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride()); 46 Constant * strideBytes = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride() *mCodeUnitWidth/8);46 Constant * itemBytes = ConstantInt::get(iBuilder->getSizeTy(), mCodeUnitWidth/8); 47 47 48 48 Function::arg_iterator args = doSegmentFunction->arg_begin(); … … 57 57 producerPos->setOrdering(AtomicOrdering::Acquire); 58 58 //iBuilder->CallPrintInt("producerPos", producerPos); 59 Value * endSignal = iBuilder->CreateLoad(mStreamSetInputBuffers[0]->hasEndOfInputPtr(streamStructPtr)); 59 60 61 Value * blockNo = getScalarField(self, blockNoScalar); 62 //iBuilder->CallPrintInt("blockNo", blockNo); 63 Value * basePtr = getStreamSetBlockPtr(self, "codeUnitBuffer", blockNo); 64 //iBuilder->CallPrintInt("basePtr", iBuilder->CreatePtrToInt(basePtr, iBuilder->getInt64Ty())); 65 60 66 Value * processed = getProcessedItemCount(self); 61 67 Value * itemsAvail = iBuilder->CreateSub(producerPos, processed); … … 63 69 Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride); 64 70 //iBuilder->CallPrintInt("blocksAvail", blocksAvail); 71 72 Value * lessThanFullSegment = iBuilder->CreateICmpULT(blocksAvail, blocksToDo); 73 Value * inFinalSegment = iBuilder->CreateAnd(endSignal, lessThanFullSegment); 65 74 /* Adjust the number of full blocks to do, based on the available data, if necessary. */ 66 blocksToDo = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocksToDo, blocksAvail), blocksToDo, blocksAvail); 67 Value * blockNo = getScalarField(self, blockNoScalar); 68 //iBuilder->CallPrintInt("blockNo", blockNo); 69 Value * basePtr = getStreamSetBlockPtr(self, "codeUnitBuffer", blockNo); 70 //iBuilder->CallPrintInt("basePtr", iBuilder->CreatePtrToInt(basePtr, iBuilder->getInt64Ty())); 71 Value * bytesToDo = iBuilder->CreateMul(blocksToDo, strideBytes); 75 blocksToDo = iBuilder->CreateSelect(lessThanFullSegment, blocksAvail, blocksToDo); 76 77 Value * itemsToDo = iBuilder->CreateMul(blocksToDo, stride); 78 itemsToDo = iBuilder->CreateSelect(inFinalSegment, itemsAvail, itemsToDo); 72 79 //iBuilder->CallPrintInt("bytesToDo", bytesToDo); 73 iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(basePtr, i8PtrTy), bytesToDo}));80 iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(basePtr, i8PtrTy), iBuilder->CreateMul(itemsToDo, itemBytes)})); 74 81 75 82 setScalarField(self, blockNoScalar, iBuilder->CreateAdd(blockNo, blocksToDo)); 76 processed = iBuilder->CreateAdd(processed, i Builder->CreateMul(blocksToDo, stride));83 processed = iBuilder->CreateAdd(processed, itemsToDo); 77 84 setProcessedItemCount(self, processed); 78 85 mStreamSetInputBuffers[0]->setConsumerPos(streamStructPtr, processed); -
icGREP/icgrep-devel/icgrep/kernels/streamset.cpp
r5185 r5188 65 65 66 66 void StreamSetBuffer::setEndOfInput(Value * bufferStructPtr){ 67 iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt 8Ty(), 1), iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));67 iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)})); 68 68 } 69 69 … … 75 75 llvm::Value * StreamSetBuffer::allocateBuffer() { 76 76 Type * const size_ty = iBuilder->getSizeTy(); 77 Type * const int 8ty = iBuilder->getInt8Ty();77 Type * const int1ty = iBuilder->getInt1Ty(); 78 78 mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetType.getStreamSetBlockType(iBuilder), ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks)); 79 79 mStreamSetStructPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetStructType); … … 82 82 iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)})); 83 83 iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)})); 84 iBuilder->CreateStore(ConstantInt::get(int 8ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));84 iBuilder->CreateStore(ConstantInt::get(int1ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)})); 85 85 iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)})); 86 86 … … 101 101 102 102 Type * const size_ty = iBuilder->getSizeTy(); 103 Type * const int 8ty = iBuilder->getInt8Ty();103 Type * const int1ty = iBuilder->getInt1Ty(); 104 104 105 105 PointerType * t = getStreamBufferPointerType(); … … 109 109 iBuilder->CreateStore(fileSize, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)})); 110 110 iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)})); 111 iBuilder->CreateStore(ConstantInt::get(int 8ty, 1), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));111 iBuilder->CreateStore(ConstantInt::get(int1ty, 1), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)})); 112 112 iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)})); 113 113 } -
icGREP/icgrep-devel/icgrep/kernels/streamset.h
r5185 r5188 81 81 std::vector<Type *>({iBuilder->getSizeTy(), 82 82 iBuilder->getSizeTy(), 83 iBuilder->getInt 8Ty(),83 iBuilder->getInt1Ty(), 84 84 PointerType::get(mStreamSetType.getStreamSetBlockType(iBuilder), AddressSpace)})); 85 85 }
Note: See TracChangeset
for help on using the changeset viewer.