Changeset 5958
- Timestamp:
- Apr 9, 2018, 11:11:01 PM (11 months ago)
- Location:
- icGREP/icgrep-devel/icgrep/kernels/lz4
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp
r5948 r5958 17 17 18 18 namespace kernel{ 19 19 20 LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder) 20 : MultiBlockKernel("LZ4IndexBuilderKernel", 21 // Inputs 22 { 23 Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}, 24 Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream"), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}}, 25 // Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")}, 26 // Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")}, 27 28 // block data 29 Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1), 30 AlwaysConsume()}, 31 Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1), 32 AlwaysConsume()}, 33 Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1), 34 AlwaysConsume()} 35 36 }, 37 //Outputs 38 { 39 // Uncompressed_data 40 Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos", 41 BoundedRate(0, 1)}, 42 Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength", 43 BoundedRate(0, 1)}, 44 Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos", 45 BoundedRate(0, 1)}, 46 47 Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableSufficientChecking()}}, 48 Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()}, //TODO disable temporary buffer for all output streams 49 Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()}, 50 Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()}, 51 Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableTemporaryBuffer()}} 52 }, 53 //Arguments 54 { 55 Binding{iBuilder->getSizeTy(), "fileSize"} 56 }, 57 {}, 58 //Internal states: 59 { 60 Binding{iBuilder->getSizeTy(), "blockDataIndex"}, 61 Binding{iBuilder->getInt64Ty(), "m0OutputPos"} 62 }) { 21 : SegmentOrientedKernel("LZ4IndexBuilderKernel", 22 // Inputs 23 { 24 Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}, 25 Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")}, 26 27 // block data 28 Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1), 29 AlwaysConsume()}, 30 Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1), 31 AlwaysConsume()}, 32 Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1), 33 AlwaysConsume()} 34 35 }, 36 //Outputs 37 { 38 // Uncompressed_data 39 Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos", 40 BoundedRate(0, 1)}, 41 Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength", 42 BoundedRate(0, 1)}, 43 Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos", 44 BoundedRate(0, 1)}, 45 46 Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)}, 47 Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)}, 48 Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)}, 49 Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}, 50 Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)} 51 }, 52 //Arguments 53 { 54 Binding{iBuilder->getSizeTy(), "fileSize"} 55 }, 56 {}, 57 //Internal states: 58 { 59 Binding{iBuilder->getSizeTy(), "blockDataIndex"}, 60 Binding{iBuilder->getInt64Ty(), "m0OutputPos"} 61 }) { 63 62 this->setStride(4 * 1024 * 1024); 64 63 addAttribute(MustExplicitlyTerminate()); 65 64 } 66 65 67 void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) { 66 void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) { 67 68 68 BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock"); 69 69 BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock"); 70 70 71 this->resetPreviousProducedMap(iBuilder, {"deletionMarker", "m0Start", "m0End", "matchOffset", "M0Marker"}); 72 73 Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex"); 74 75 Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd")); 76 Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")); 77 78 Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex); 71 Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex"); 72 73 Value * totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd")); 74 Value * totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")); 75 76 Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex); 79 77 80 78 iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock); 81 79 82 80 iBuilder->SetInsertPoint(blockEndConBlock); 83 84 85 Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex); 86 87 BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock"); 88 // iBuilder->CallPrintInt("----totalExtender", totalExtender); 89 // iBuilder->CallPrintInt("----blockStart", blockStart); 90 // iBuilder->CallPrintInt("----blockEnd", blockEnd); 91 81 Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex); 82 BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock"); 92 83 iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock); 93 // iBuilder->CreateBr(processBlock);94 84 95 85 iBuilder->SetInsertPoint(processBlock); … … 99 89 this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd); 100 90 101 Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));91 Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1)); 102 92 iBuilder->setScalarField("blockDataIndex", newBlockDataIndex); 103 93 iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex); … … 105 95 iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex); 106 96 107 108 97 iBuilder->setProcessedItemCount("byteStream", blockEnd); 109 110 111 // iBuilder->setProcessedItemCount("extender", blockEnd);112 // iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);113 // iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);114 115 98 iBuilder->CreateBr(exitBlock); 116 99 … … 121 104 BasicBlock* entryBlock = iBuilder->GetInsertBlock(); 122 105 123 Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0)); 124 // iBuilder->CallPrintInt("token", token); 106 Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0)); 125 107 126 108 BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body"); … … 141 123 phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock); 142 124 143 Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos); 144 // iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize); 145 Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral); 125 Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos); 126 Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral); 146 127 finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty()); 147 Value * literalLengthExtendValue = iBuilder->CreateSelect(128 Value * literalLengthExtendValue = iBuilder->CreateSelect( 148 129 iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)), 149 130 iBuilder->CreateAdd( … … 168 149 // TODO Clear Output Buffer at the beginning instead of marking 0 169 150 this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), true); 170 // iBuilder->CallPrintInt("markStart", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));171 // iBuilder->CallPrintInt("phiCursorPosAfterLiteral", phiCursorPosAfterLiteral);172 151 this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, false); 173 152 this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields … … 204 183 205 184 Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos); 206 // iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));207 // iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);208 209 185 extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf)); 210 186 Value* matchExtensionSize = iBuilder->CreateSelect( … … 251 227 iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8)) 252 228 ); 253 this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(),outputPos);254 this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(),outputEndPos);255 this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(),matchOffset);229 this->generateStoreNumberOutput(iBuilder, "m0Start", outputPos); 230 this->generateStoreNumberOutput(iBuilder, "m0End", outputEndPos); 231 this->generateStoreNumberOutput(iBuilder, "matchOffset", matchOffset); 256 232 this->increaseScalarField(iBuilder, "m0OutputPos", matchLength); 257 233 this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false); … … 259 235 return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE); 260 236 } 261 262 237 263 238 void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) { … … 292 267 //TODO add acceleration here 293 268 Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue); 294 295 // iBuilder->CallPrintInt("tokenPos", phiCursorValue);296 // iBuilder->CallPrintInt("token", token);297 298 269 // Process Literal 299 270 BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock"); … … 302 273 303 274 Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd); 304 // iBuilder->CallPrintInt("offsetPos", offsetPos);305 275 // Process Match 306 276 BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body"); … … 316 286 iBuilder->SetInsertPoint(handleM0BodyBlock); 317 287 Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd); 318 // iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);319 288 phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock()); 320 289 … … 328 297 // Store final M0 pos to make sure the bit stream will be long enough 329 298 Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos"); 330 // iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos); 331 this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos); 332 this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos); 333 this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0)); 299 this->generateStoreNumberOutput(iBuilder, "m0Start", finalM0OutputPos); 300 this->generateStoreNumberOutput(iBuilder, "m0End", finalM0OutputPos); 301 this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64(0)); 334 302 iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos); 335 303 // finalM0OutputPos should always be 4MB * n except for the final block … … 341 309 } 342 310 343 Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) { 344 return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos); 345 } 346 347 Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) { 348 return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos); 349 } 350 351 Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) { 311 Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) { 312 352 313 unsigned int bitBlockWidth = iBuilder->getBitBlockWidth(); 353 314 Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth); 354 Constant* SIZE_ZERO = iBuilder->getSize(0);355 315 Type* bitBlockType = iBuilder->getBitBlockType(); 356 316 Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth); 357 358 Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);359 317 360 318 BasicBlock* entryBlock = iBuilder->GetInsertBlock(); … … 376 334 iBuilder->SetInsertPoint(advanceBodyBlock); 377 335 378 379 Value* currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH); 380 Value* currentPosBitBlockIndex = iBuilder->CreateSub(currentBlockGlobalPos, baseInputBlockIndex); 381 382 Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH); 383 384 Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex); 385 Value* rawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_ZERO), bitBlockType->getPointerTo()); 386 Value* ptr2 = iBuilder->CreateGEP(rawPtr, iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks()))); 387 ptr = ptr2; //TODO workaround here 388 389 390 Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy); 391 336 Value * currentBlockGlobalPos = iBuilder->CreateAnd(phiCurrentPos, ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH)); 337 Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH); 338 339 Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, currentBlockGlobalPos), bitBlockType->getPointerTo()); 340 341 Value * currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy); 392 342 currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy)); 393 if (isNextZero) { 394 currentBitValue = iBuilder->CreateNot(currentBitValue); 395 } 396 Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty()); 397 Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount); 343 currentBitValue = iBuilder->CreateNot(currentBitValue); 344 345 Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty()); 346 Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount); 398 347 newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH); 399 348 400 Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);401 Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);349 Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset); 350 Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue); 402 351 if (maxPos) { 403 352 newPos = iBuilder->CreateUMin(maxPos, newPos); … … 406 355 } 407 356 408 phiIsFinish->addIncoming(iBuilder->Create Not(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());357 phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, INT64_BIT_BLOCK_WIDTH), iBuilder->GetInsertBlock()); 409 358 phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock()); 410 359 iBuilder->CreateBr(advanceConBlock); … … 414 363 } 415 364 416 Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) { 417 Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getStride()); 418 Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth()); 419 Constant* SIZE_ZERO = iBuilder->getSize(0); 420 421 // Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH); 422 423 //TODO possible bug here, maybe we need to use iBuilder->getStride() 424 Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->CreateMul(iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_STRIDE_SIZE), SIZE_STRIDE_SIZE)); 425 426 Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH); 427 Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH); 428 429 //[64 x <4 x i64>]* 430 Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex); 431 ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo()); 432 //GEP here is safe 433 Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset); 365 Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) { 366 Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride()); 367 Value * processed = iBuilder->getProcessedItemCount(inputBufferName); 368 processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE)); 369 Value * offset = iBuilder->CreateSub(globalOffset, processed); 370 Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset); 434 371 return iBuilder->CreateLoad(valuePtr); 435 372 } 436 373 437 Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) { 438 // The external buffer is always linear accessible, so the GEP here is safe 439 Value *blockStartPtr = iBuilder->CreatePointerCast( 440 iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)), 441 iBuilder->getInt8PtrTy() 442 ); 443 Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset); 374 Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) { 375 Value * ptr = iBuilder->getRawInputPointer("byteStream", offset); 444 376 return iBuilder->CreateLoad(ptr); 445 377 } … … 463 395 Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth); 464 396 Value* SIZE_ONE = iBuilder->getSize(1); 465 Value* SIZE_ZERO = iBuilder->getSize(0);466 397 Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth); 467 398 Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType(); … … 469 400 Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0); 470 401 471 Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;472 Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);473 474 402 BasicBlock *entryBlock = iBuilder->GetInsertBlock(); 475 403 BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con"); … … 477 405 BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit"); 478 406 479 Value * startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);407 Value * startBlockLocalIndex = iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH); 480 408 481 409 iBuilder->CreateBr(conBlock); … … 486 414 PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2); 487 415 curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock); 416 417 488 418 iBuilder->CreateCondBr( 489 iBuilder->CreateICmpULT(iBuilder->CreateMul( iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),419 iBuilder->CreateICmpULT(iBuilder->CreateMul(curBlockLocalIndex, SIZE_BIT_BLOCK_WIDTH), end), 490 420 bodyBlock, 491 421 exitBlock … … 495 425 iBuilder->SetInsertPoint(bodyBlock); 496 426 497 Value *outputLowestBitValue = iBuilder->CreateSelect( 498 iBuilder->CreateICmpULE( 499 iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), 500 start 501 ), 502 iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)), 503 INT_BIT_BLOCK_ONE 504 ); 505 506 Value *hasNotReachEnd = iBuilder->CreateICmpULE( 507 iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH), 508 end 509 ); 510 Value *producedItemsCount = iBuilder->CreateSelect( 511 hasNotReachEnd, 512 iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH), 513 end 514 ); 515 516 517 Value *outputHighestBitValue = iBuilder->CreateSelect( 518 hasNotReachEnd, 519 INT_BIT_BLOCK_ZERO, 520 iBuilder->CreateShl( 521 INT_BIT_BLOCK_ONE, 522 iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY) 523 ) 524 ); 525 526 527 Value *bitMask = iBuilder->CreateSub( 528 outputHighestBitValue, 529 outputLowestBitValue 530 ); 531 532 if (!isOne) { 533 bitMask = iBuilder->CreateNot(bitMask); 534 } 535 536 Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex); 537 Value *rawInputPointer = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo()); 538 Value * ptr = iBuilder->CreateGEP(rawInputPointer, iBuilder->CreateURem(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks()))); 539 // iBuilder->CallPrintInt("targetPtr", targetPtr); 540 // iBuilder->CallPrintInt("targetPtr2", ptr); 541 targetPtr = ptr; //TODO workaround here 542 543 544 //TODO fixed circular here 545 546 Value *oldValue = iBuilder->CreateLoad(targetPtr); 547 oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY); 548 Value *newValue = NULL; 427 Value * const currentPosition = iBuilder->CreateMul(curBlockLocalIndex, SIZE_BIT_BLOCK_WIDTH); 428 Value * lowestBitPosition = iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH); 429 lowestBitPosition = iBuilder->CreateZExt(lowestBitPosition, INT_BIT_BLOCK_TY); 430 Value * outputLowestBitValue = iBuilder->CreateShl(INT_BIT_BLOCK_ONE, lowestBitPosition); 431 Value * const hasNotReachedStart = iBuilder->CreateICmpULE(currentPosition, start); 432 outputLowestBitValue = iBuilder->CreateSelect(hasNotReachedStart, outputLowestBitValue, INT_BIT_BLOCK_ONE); 433 434 Value * const nextPosition = iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), SIZE_BIT_BLOCK_WIDTH); 435 Value * const hasNotReachEnd = iBuilder->CreateICmpULE(nextPosition, end); 436 Value * producedItemsCount = iBuilder->CreateSelect(hasNotReachEnd, nextPosition, end); 437 Value * highestBitPosition = iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH); 438 highestBitPosition = iBuilder->CreateZExt(highestBitPosition, INT_BIT_BLOCK_TY); 439 Value * outputHighestBitValue = iBuilder->CreateShl(INT_BIT_BLOCK_ONE, highestBitPosition); 440 outputHighestBitValue = iBuilder->CreateSelect(hasNotReachEnd, INT_BIT_BLOCK_ZERO, outputHighestBitValue); 441 Value * bitMask = iBuilder->CreateSub(outputHighestBitValue, outputLowestBitValue); 442 bitMask = iBuilder->CreateBitCast(bitMask, BIT_BLOCK_TY); 443 444 Value * targetPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, currentPosition), iBuilder->getBitBlockType()->getPointerTo()); 445 Value * oldValue = iBuilder->CreateBlockAlignedLoad(targetPtr); 446 Value * newValue = nullptr; 549 447 if (isOne) { 550 448 newValue = iBuilder->CreateOr(oldValue, bitMask); 551 449 } else { 552 newValue = iBuilder->CreateAnd(oldValue, bitMask);450 newValue = iBuilder->CreateAnd(oldValue, iBuilder->CreateNot(bitMask)); 553 451 } 554 555 iBuilder->CreateStore( 556 iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY), 557 targetPtr 558 ); 452 iBuilder->CreateStore(newValue, targetPtr); 453 559 454 if (setProduced) { 560 455 iBuilder->setProducedItemCount(bitstreamName, producedItemsCount); … … 570 465 571 466 572 573 467 void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder, 574 const string &outputBufferName, Type *pointerType, 575 Value *value) { 576 577 Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth()); 578 Value* SIZE_ZERO = iBuilder->getSize(0); 579 Value* SIZE_ONE = iBuilder->getSize(1); 580 581 Value* previousProduced = previousProducedMap.find(outputBufferName)->second; 582 // iBuilder->CallPrintInt("previousProduced", previousProduced); 583 584 Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH); 585 Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName); 586 Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH); 587 588 Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH); 589 590 // i8, [8 x <4 x i64>]* 591 // i64, [64 x <4 x i64>]* 592 Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase)); 593 ptr = iBuilder->CreatePointerCast(ptr, pointerType); 594 ptr = iBuilder->CreateGEP(ptr, blockOffset); 595 596 Value* tmpOffset = iBuilder->CreateURem(outputOffset, iBuilder->getSize(this->getAnyStreamSetBuffer(outputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth())); 597 Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(outputBufferName, SIZE_ZERO), pointerType); 598 Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset); 599 ptr = ptr2; 600 // iBuilder->CallPrintInt("ptr", ptr); 601 // iBuilder->CallPrintInt("ptr2", ptr2); 602 603 // GEP here is safe 604 iBuilder->CreateStore(value, ptr); 605 606 if (outputBufferName == "m0End") { 607 // iBuilder->CallPrintInt("output:m0End", value); 608 } 609 610 iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE)); 611 } 612 613 614 void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder, 615 std::vector<std::string> outputList) { 616 previousProducedMap.clear(); 617 for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) { 618 previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter))); 619 } 620 } 468 const string & outputBufferName, 469 Value * value) { 470 471 Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName); 472 Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset); 473 iBuilder->CreateStore(value, outputRawPtr); 474 iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1))); 475 } 476 621 477 } -
icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.h
r5923 r5958 21 21 22 22 namespace kernel { 23 class LZ4IndexBuilderKernel final : public MultiBlockKernel {23 class LZ4IndexBuilderKernel final : public SegmentOrientedKernel { 24 24 public: 25 25 LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder); 26 26 27 27 protected: 28 void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, 29 llvm::Value *const numOfStrides) override; 28 void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override; 30 29 31 30 private: … … 41 40 llvm::Value *advanceUntilNextZero(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName, 42 41 llvm::Value *startPos, llvm::Value *maxPos = nullptr); 43 44 llvm::Value *advanceUntilNextOne(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,45 llvm::Value *startPos, llvm::Value *maxPos = nullptr);46 47 llvm::Value *advanceUntilNextValue(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,48 llvm::Value *startPos, bool isNextZero, llvm::Value *maxPos = nullptr);49 42 50 43 void increaseScalarField(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &fieldName, … … 68 61 69 62 void generateStoreNumberOutput(const std::unique_ptr<KernelBuilder> &iBuilder, 70 const std::string &outputBufferName, llvm::Type *pointerType,63 const std::string &outputBufferName, 71 64 llvm::Value *value); 72 65 73 void resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder, std::vector<std::string> outputList);74 std::map<std::string, llvm::Value*> previousProducedMap;75 66 }; 76 67 }
Note: See TracChangeset
for help on using the changeset viewer.