- Timestamp:
- Oct 27, 2017, 9:28:02 AM (16 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp
r5710 r5711 615 615 Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) { 616 616 const auto shiftAmount = advance->getAmount(); 617 if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) { 617 Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy()); 618 Value * PEXT_f = nullptr; 619 Value * PDEP_f = nullptr; 620 unsigned bitWidth = sizeof(size_t) * 8; 621 if (bitWidth == 64) { 622 PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64); 623 PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64); 624 } 625 else if ((bitWidth == 32) && (shiftAmount < 32)) { 626 PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32); 627 PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32); 628 } 629 else { 630 llvm::report_fatal_error("indexed_advance unsupported bit width"); 631 } 632 if (LLVM_LIKELY(shiftAmount < bitWidth)) { 618 633 Value * const carryIn = getNextCarryIn(b); 619 unsigned bitWidth = sizeof(size_t) * 8;620 Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());621 Value * PEXT_f = nullptr;622 Value * PDEP_f = nullptr;623 if (bitWidth == 64) {624 PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);625 PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);626 }627 else if ((bitWidth == 32) && (shiftAmount < 32)) {628 PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);629 PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);630 }631 else {632 llvm::report_fatal_error("indexed_advance unsupported bit width");633 }634 634 Value * shiftVal = b->getSize(shiftAmount); 635 635 Value * carry = b->mvmd_extract(bitWidth, carryIn, 0); … … 652 652 setNextCarryOut(b, carryOut); 653 653 return result; 654 } else if (shiftAmount <= b->getBitBlockWidth()) { 655 // A single bitblock still holds all the shifted bits. In this case, we know 656 // that the shift amount is always greater than the popcount of the individual 657 // elements that we deal with. This simplifies some of the logic. 658 Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth()); 659 Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)}); 660 Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr); 661 Value * shiftVal = b->getSize(shiftAmount); 662 Value * carry = b->CreateBitCast(carryIn, iBitBlock); 663 Value * result = b->allZeroes(); 664 for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) { 665 Value * s = b->mvmd_extract(bitWidth, strm, i); 666 Value * ix = b->mvmd_extract(bitWidth, index_strm, i); 667 Value * ix_popcnt = b->CreateCall(popcount_f, {ix}); 668 Value * bits = b->CreateCall(PEXT_f, {s, ix}); // All these bits are shifted out (appended to carry). 669 result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i); 670 carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits. 671 carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock))); 672 } 673 b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr); 674 return result; 654 675 } else { 655 llvm::report_fatal_error("IndexedAdvance > LONG_ADVANCE_BREAKPOINT not yet supported."); 676 mIndexedLongAdvanceIndex++; 677 llvm::report_fatal_error("IndexedAdvance > BlockSize not yet supported."); 656 678 } 657 679 } -
icGREP/icgrep-devel/icgrep/re/re_compiler.cpp
r5710 r5711 399 399 PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos); 400 400 return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound")); 401 } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (lb < sizeof(size_t) * 8) && AVX2_available()) {401 } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (lb <= codegen::BlockSize) && AVX2_available()) { 402 402 PabloAST * cc = markerVar(compile(repeated, pb)); 403 403 PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb); … … 445 445 PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, nonFinal), "bounded"); 446 446 return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded); 447 } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)&& (ub < sizeof(size_t) * 8) && AVX2_available()) {447 } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)&& (ub <= codegen::BlockSize) && AVX2_available()) { 448 448 // log2 upper bound for fixed length (=1) class 449 449 // Create a mask of positions reachable within ub from current marker.
Note: See TracChangeset
for help on using the changeset viewer.