Changeset 5715
 Timestamp:
 Oct 28, 2017, 5:03:07 PM (18 months ago)
 Location:
 icGREP/icgrepdevel/icgrep
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/IR_Gen/idisa_avx_builder.cpp
r5713 r5715 199 199 return std::pair<Value *, Value *>{bitCast(carryOut), bitCast(result)}; 200 200 } 201 else {201 else if (shiftAmount <= mBitBlockWidth) { 202 202 // The shift amount is always greater than the popcount of the individual 203 203 // elements that we deal with. This simplifies some of the logic. 204 Value * carry = CreateBitCast(shiftIn, iBitBlock);205 Value * result = allZeroes();204 Value * carry = CreateBitCast(shiftIn, iBitBlock); 205 Value * result = allZeroes(); 206 206 for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) { 207 207 Value * s = mvmd_extract(bitWidth, strm, i); … … 215 215 return std::pair<Value *, Value *>{bitCast(carry), bitCast(result)}; 216 216 } 217 } 218 219 } 217 else { 218 // The shift amount is greater than the total popcount. We will consume popcount 219 // bits from the shiftIn value only, and produce a carry out value of the selected bits. 220 // elements that we deal with. This simplifies some of the logic. 221 Value * carry = CreateBitCast(shiftIn, iBitBlock); 222 Value * result = allZeroes(); 223 Value * carryOut = CreateBitCast(allZeroes(), iBitBlock); 224 Value * generated = getSize(0); 225 for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) { 226 Value * s = mvmd_extract(bitWidth, strm, i); 227 Value * ix = mvmd_extract(bitWidth, index_strm, i); 228 Value * ix_popcnt = CreateCall(popcount_f, {ix}); 229 Value * bits = CreateCall(PEXT_f, {s, ix}); // All these bits are shifted out (appended to carry). 230 result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i); 231 carry = CreateLShr(carry, CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits. 232 carryOut = CreateOr(carryOut, CreateShl(CreateZExt(bits, iBitBlock), generated)); 233 generated = CreateAdd(generated, ix_popcnt); 234 } 235 return std::pair<Value *, Value *>{bitCast(carryOut), bitCast(result)}; 236 } 237 } 238 239 } 
icGREP/icgrepdevel/icgrep/pablo/carry_manager.cpp
r5713 r5715 113 113 } 114 114 for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) { 115 kernel>addScalar(b>getSizeTy(), " LongAdvancePosition" + std::to_string(i));115 kernel>addScalar(b>getSizeTy(), "IndexedAdvancePosition" + std::to_string(i)); 116 116 } 117 117 } … … 635 635 return result; 636 636 } else { 637 unsigned summaryFrame = mCurrentFrameIndex; 638 if (mIfDepth > 0) { 639 // Skip over summary frame to perform the long indexed advance. 640 mCurrentFrameIndex++; 641 } 642 Type * iBitBlock = b>getIntNTy(b>getBitBlockWidth()); 643 Constant * blockWidth = b>getSize(b>getBitBlockWidth()); 644 Constant * blockWidth_1 = b>getSize(b>getBitBlockWidth()  1); 645 Value * carryPosition = b>getScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex)); 646 Value * carryBlockEndPos = b>CreateAdd(carryPosition, blockWidth_1); 647 unsigned carry_blocks = nearest_pow2(20+ceil_udiv(shiftAmount, b>getBitBlockWidth())); 648 Constant * carryQueueBlocks = b>getSize(carry_blocks); 649 Value * carryBlock = b>CreateTrunc(b>CreateURem(b>CreateUDiv(carryPosition, blockWidth), carryQueueBlocks), b>getInt32Ty()); 650 Value * carryEndBlock = b>CreateTrunc(b>CreateURem(b>CreateUDiv(carryBlockEndPos, blockWidth), carryQueueBlocks), b>getInt32Ty()); 651 Value * lo_GEP = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex), carryBlock}); 652 Value * hi_GEP = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex), carryEndBlock}); 653 Value * c_lo = b>CreateBitCast(b>CreateBlockAlignedLoad(lo_GEP), iBitBlock); 654 Value * c_hi = b>CreateBitCast(b>CreateBlockAlignedLoad(hi_GEP), iBitBlock); 655 Value * lo_shift = b>CreateZExt(b>CreateURem(carryPosition, blockWidth), iBitBlock); 656 Value * hi_shift = b>CreateZExt(b>CreateSub(blockWidth_1, b>CreateURem(carryBlockEndPos, blockWidth)), iBitBlock); 657 Value * carryIn = b>CreateOr(b>CreateLShr(c_lo, lo_shift), b>CreateShl(c_hi, hi_shift)); 658 Value * carryOut, * result; 659 std::tie(carryOut, result) = b>bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount); 660 carryOut = b>CreateBitCast(carryOut, iBitBlock); 661 Value * adv = b>mvmd_extract(sizeof(size_t) * 8, b>simd_popcount(b>getBitBlockWidth(), index_strm), 0); 662 b>setScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex), b>CreateAdd(carryPosition, adv)); 663 Value * carryOutPosition = b>CreateAdd(carryPosition, b>getSize(shiftAmount)); 664 Value * carryOutEndPos = b>CreateAdd(carryOutPosition, blockWidth_1); 665 carryBlock = b>CreateTrunc(b>CreateURem(b>CreateUDiv(carryOutPosition, blockWidth), carryQueueBlocks), b>getInt32Ty()); 666 carryEndBlock = b>CreateTrunc(b>CreateURem(b>CreateUDiv(carryOutEndPos, blockWidth), carryQueueBlocks), b>getInt32Ty()); 667 lo_GEP = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex), carryBlock}); 668 hi_GEP = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex), carryEndBlock}); 669 lo_shift = b>CreateZExt(b>CreateURem(carryOutPosition, blockWidth), iBitBlock); 670 hi_shift = b>CreateZExt(b>CreateSub(blockWidth_1, b>CreateURem(carryOutEndPos, blockWidth)), iBitBlock); 671 c_lo = b>CreateOr(b>CreateBitCast(b>CreateBlockAlignedLoad(lo_GEP), iBitBlock), b>CreateShl(carryOut, lo_shift)); 672 c_hi = b>CreateLShr(carryOut, hi_shift); 673 b>CreateBlockAlignedStore(b>CreateBitCast(c_lo, b>getBitBlockType()), lo_GEP); 674 b>CreateBlockAlignedStore(b>CreateBitCast(c_hi, b>getBitBlockType()), hi_GEP); 637 675 mIndexedLongAdvanceIndex++; 638 llvm::report_fatal_error("IndexedAdvance > BlockSize not yet supported."); 676 mCurrentFrameIndex++; 677 // Now handle the summary. 678 if (mIfDepth > 0) { 679 const auto summaryBlocks = ceil_udiv(shiftAmount, b>getBitBlockWidth()); 680 const auto summarySize = ceil_udiv(summaryBlocks, b>getBitBlockWidth()); 681 for (unsigned i = 0; i < summarySize; i++) { 682 // All ones summary for now. 683 b>CreateBlockAlignedStore(b>allOnes(), b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(summaryFrame), b>getInt32(i)})); 684 } 685 } 686 return result; 639 687 } 640 688 } … … 940 988 const auto blockWidth = b>getBitBlockWidth(); 941 989 const auto blocks = ceil_udiv(amount, blockWidth); 942 type = ArrayType::get(blockTy, nearest_pow2(blocks + ( (loopDepth != 0) ? 1 : 0)));990 type = ArrayType::get(blockTy, nearest_pow2(blocks + (isa<IndexedAdvance>(stmt) ? 20:0) + ((loopDepth != 0) ? 1 : 0))); 943 991 if (LLVM_UNLIKELY(ifDepth > 0 && blocks != 1)) { 944 992 const auto summarySize = ceil_udiv(blocks, blockWidth);
Note: See TracChangeset
for help on using the changeset viewer.