Changeset 5730


Ignore:
Timestamp:
Nov 10, 2017, 12:07:28 PM (18 months ago)
Author:
cameron
Message:

Generic indexed advance

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5729 r5730  
    486486
    487487// full shift producing {shiftout, shifted}
    488 std::pair<Value *, Value *> IDISA_Builder::bitblock_indexed_advance(Value * a, Value * index, Value * shiftin, unsigned shift) {
    489     llvm::report_fatal_error("bitblock_indexed_advance unimplemented for this architecture");
    490 }
     488std::pair<Value *, Value *> IDISA_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
     489    Value * popcount_f = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     490    unsigned bitWidth = sizeof(size_t) * 8;
     491    Type * iBitBlock = getIntNTy(getBitBlockWidth());
     492    Value * shiftVal = getSize(shiftAmount);
     493    Value * extracted_bits = simd_pext(bitWidth, strm, index_strm);
     494    Value * ix_popcounts = simd_popcount(bitWidth, index_strm);
     495
     496   
     497    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     498        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
     499        Value * result = allZeroes();
     500        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     501            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
     502            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);
     503            Value * adv = CreateOr(CreateShl(bits, shiftAmount), carry);
     504            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
     505            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
     506            Value * carry_if_popcount_small =
     507                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
     508                            CreateLShr(carry, ix_popcnt));
     509            Value * carry_if_popcount_large = CreateLShr(bits, CreateSub(ix_popcnt, shiftVal));
     510            carry = CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     511            result = mvmd_insert(bitWidth, result, adv, i);
     512        }
     513        Value * carryOut = mvmd_insert(bitWidth, allZeroes(), carry, 0);
     514        return std::pair<Value *, Value *>{bitCast(carryOut), simd_pdep(bitWidth, result, index_strm)};
     515    }
     516    else if (shiftAmount <= mBitBlockWidth) {
     517        // The shift amount is always greater than the popcount of the individual
     518        // elements that we deal with.   This simplifies some of the logic.
     519        Value * carry = CreateBitCast(shiftIn, iBitBlock);
     520        Value * result = allZeroes();
     521        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     522            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
     523            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);  // All these bits are shifted out (appended to carry).
     524            result = mvmd_insert(bitWidth, result, mvmd_extract(bitWidth, carry, 0), i);
     525            carry = CreateLShr(carry, CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
     526            carry = CreateOr(carry, CreateShl(CreateZExt(bits, iBitBlock), CreateZExt(CreateSub(shiftVal, ix_popcnt), iBitBlock)));
     527        }
     528        return std::pair<Value *, Value *>{bitCast(carry), simd_pdep(bitWidth, result, index_strm)};
     529    }
     530    else {
     531        // The shift amount is greater than the total popcount.   We will consume popcount
     532        // bits from the shiftIn value only, and produce a carry out value of the selected bits.
     533        // elements that we deal with.   This simplifies some of the logic.
     534        Value * carry = CreateBitCast(shiftIn, iBitBlock);
     535        Value * result = allZeroes();
     536        Value * carryOut = CreateBitCast(allZeroes(), iBitBlock);
     537        Value * generated = getSize(0);
     538        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     539            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
     540            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);  // All these bits are shifted out (appended to carry).
     541            result = mvmd_insert(bitWidth, result, mvmd_extract(bitWidth, carry, 0), i);
     542            carry = CreateLShr(carry, CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed.
     543            carryOut = CreateOr(carryOut, CreateShl(CreateZExt(bits, iBitBlock), CreateZExt(generated, iBitBlock)));
     544            generated = CreateAdd(generated, ix_popcnt);
     545        }
     546        return std::pair<Value *, Value *>{bitCast(carryOut), simd_pdep(bitWidth, result, index_strm)};
     547    }
     548}
     549
    491550
    492551Value * IDISA_Builder::bitblock_mask_from(Value * pos) {
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5724 r5730  
    413413            return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    414414        }
    415         else if (isUnicodeUnitLength(repeated) && AVX2_available()) {
     415        else if (isUnicodeUnitLength(repeated)) {
    416416            PabloAST * cc = markerVar(compile(repeated, pb));
    417417            PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb);
     
    420420            return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    421421        }
    422         else if (isTypeForLocal(repeated) && AVX2_available()) {
     422        else if (isTypeForLocal(repeated)) {
    423423            CC * firstSymSet = RE_Local::first(repeated);
    424424            std::map<CC *, CC*> followMap;
     
    491491            return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
    492492        }
    493         else if (isUnicodeUnitLength(repeated) && AVX2_available()) {
     493        else if (isUnicodeUnitLength(repeated)) {
    494494            // For a regexp which represent a single Unicode codepoint, we can use the mFinal stream
    495495            // as an index stream for an indexed advance operation.
     
    500500            return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
    501501        }
    502         else if (isTypeForLocal(repeated) && AVX2_available()) {
     502        else if (isTypeForLocal(repeated)) {
    503503            CC * firstSymSet = RE_Local::first(repeated);
    504504            std::map<CC *, CC*> followMap;
Note: See TracChangeset for help on using the changeset viewer.