Changeset 5832 for icGREP


Ignore:
Timestamp:
Jan 15, 2018, 3:42:27 PM (15 months ago)
Author:
nmedfort
Message:

Bug fix for UntilN

Location:
icGREP/icgrep-devel/icgrep
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5828 r5832  
    170170   
    171171std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    172     Value * popcount_f = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     172    Value * const popcount = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
    173173    Value * PEXT_f = nullptr;
    174174    Value * PDEP_f = nullptr;
    175     unsigned bitWidth = sizeof(size_t) * 8;
     175    const unsigned bitWidth = getSizeTy()->getBitWidth();
    176176    if (bitWidth == 64) {
    177177        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_64);
    178178        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_64);
    179     }
    180     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     179    } else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    181180        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
    182181        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_32);
    183     }
    184     else {
     182    } else {
    185183        llvm::report_fatal_error("indexed_advance unsupported bit width");
    186184    }
    187185    Type * iBitBlock = getIntNTy(getBitBlockWidth());
    188186    Value * shiftVal = getSize(shiftAmount);
     187    const auto n = getBitBlockWidth() / bitWidth;
     188    VectorType * const vecTy = VectorType::get(getSizeTy(), n);
    189189    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    190190        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
    191         Value * result = allZeroes();
    192         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     191        Value * result = UndefValue::get(vecTy);
     192        for (unsigned i = 0; i < n; i++) {
    193193            Value * s = mvmd_extract(bitWidth, strm, i);
    194194            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    195             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     195            Value * ix_popcnt = CreateCall(popcount, {ix});
    196196            Value * bits = CreateCall(PEXT_f, {s, ix});
    197197            Value * adv = CreateOr(CreateShl(bits, shiftAmount), carry);
    198198            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
    199199            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
    200             Value * carry_if_popcount_small = 
     200            Value * carry_if_popcount_small =
    201201                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
    202202                            CreateLShr(carry, ix_popcnt));
     
    212212        // elements that we deal with.   This simplifies some of the logic.
    213213        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    214         Value * result = allZeroes();
    215         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     214        Value * result = UndefValue::get(vecTy);
     215        for (unsigned i = 0; i < n; i++) {
    216216            Value * s = mvmd_extract(bitWidth, strm, i);
    217217            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    218             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     218            Value * ix_popcnt = CreateCall(popcount, {ix});
    219219            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    220220            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
     
    229229        // elements that we deal with.   This simplifies some of the logic.
    230230        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    231         Value * result = allZeroes();
    232         Value * carryOut = CreateBitCast(allZeroes(), iBitBlock);
     231        Value * result = UndefValue::get(vecTy);
     232        Value * carryOut = ConstantInt::getNullValue(iBitBlock);
    233233        Value * generated = getSize(0);
    234         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     234        for (unsigned i = 0; i < n; i++) {
    235235            Value * s = mvmd_extract(bitWidth, strm, i);
    236236            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    237             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     237            Value * ix_popcnt = CreateCall(popcount, {ix});
    238238            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    239239            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5830 r5832  
    476476// full shift producing {shiftout, shifted}
    477477std::pair<Value *, Value *> IDISA_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    478     unsigned bitWidth = sizeof(size_t) * 8;
    479     Type * iBitBlock = getIntNTy(getBitBlockWidth());
    480     Value * shiftVal = getSize(shiftAmount);
     478    const unsigned bitWidth = getSizeTy()->getBitWidth();
     479    Type * const iBitBlock = getIntNTy(getBitBlockWidth());
     480    Value * const shiftVal = getSize(shiftAmount);
    481481    Value * extracted_bits = simd_pext(bitWidth, strm, index_strm);
    482482    Value * ix_popcounts = simd_popcount(bitWidth, index_strm);
    483 
    484    
     483    const auto n = getBitBlockWidth() / bitWidth;
     484    VectorType * const vecTy = VectorType::get(getSizeTy(), n);
    485485    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    486486        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
    487         Value * result = allZeroes();
    488         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     487        Value * result = UndefValue::get(vecTy);
     488        for (unsigned i = 0; i < n; i++) {
    489489            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
    490490            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);
     
    492492            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
    493493            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
    494             Value * carry_if_popcount_small = 
     494            Value * carry_if_popcount_small =
    495495                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
    496496                            CreateLShr(carry, ix_popcnt));
     
    506506        // elements that we deal with.   This simplifies some of the logic.
    507507        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    508         Value * result = allZeroes();
    509         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     508        Value * result = UndefValue::get(vecTy);
     509        for (unsigned i = 0; i < n; i++) {
    510510            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
    511511            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);  // All these bits are shifted out (appended to carry).
     
    521521        // elements that we deal with.   This simplifies some of the logic.
    522522        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    523         Value * result = allZeroes();
    524         Value * carryOut = CreateBitCast(allZeroes(), iBitBlock);
     523        Value * result = UndefValue::get(vecTy);
     524        Value * carryOut = ConstantInt::getNullValue(iBitBlock);
    525525        Value * generated = getSize(0);
    526         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     526        for (unsigned i = 0; i < n; i++) {
    527527            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
    528528            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);  // All these bits are shifted out (appended to carry).
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5828 r5832  
    106106    llvm::Value * simd_popcount(unsigned fw, llvm::Value * a) {
    107107        if (LLVM_UNLIKELY(fw < 8)) {
     108            assert ("field width is less than 8" && false);
    108109            llvm::report_fatal_error("Unsupported field width: popcount " + std::to_string(fw));
    109110        }
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5464 r5832  
    6666}
    6767
    68 Value * IDISA_SSE_Builder::hsimd_signmask(unsigned fw, Value * a) {
     68Value * IDISA_SSE_Builder::hsimd_signmask(const unsigned fw, Value * a) {
    6969    // SSE special cases using Intrinsic::x86_sse_movmsk_ps (fw=32 only)
    7070    if (fw == 32) {
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5831 r5832  
    142142            Value * const startPtr = iBuilder->getRawInputPointer("InputStream", matchRecordStart);
    143143            Value * const endPtr = iBuilder->getRawInputPointer("InputStream", matchRecordEnd);
    144             iBuilder->CreateCall(dispatcher, {accumulator, matchRecordNum, startPtr, endPtr});
     144            const auto matchRecNumArg = ++dispatcher->getArgumentList().begin();
     145            Value * const matchRecNum = iBuilder->CreateZExtOrTrunc(matchRecordNum, matchRecNumArg->getType());
     146            iBuilder->CreateCall(dispatcher, {accumulator, matchRecNum, startPtr, endPtr});
    145147            Value * remaining_matches = iBuilder->CreateResetLowestBit(phiMatchWord);
    146148            phiMatchWord->addIncoming(remaining_matches, loop_final_block);
  • icGREP/icgrep-devel/icgrep/kernels/until_n.cpp

    r5831 r5832  
    3737*/
    3838
    39     const unsigned packSize = b->getSizeTy()->getBitWidth();
     39    IntegerType * const sizeTy = b->getSizeTy();
     40    const unsigned packSize = sizeTy->getBitWidth();
    4041    Constant * const ZERO = b->getSize(0);
    4142    Constant * const ONE = b->getSize(1);
     
    4344    Constant * const PACK_SIZE = b->getSize(packSize);
    4445    Constant * const PACKS_PER_BLOCK = b->getSize(packsPerBlock);
    45     Value * const ZEROES = b->allZeroes();
    46     Type * packTy = b->getIntNTy(packSize);
     46    VectorType * const vTy = VectorType::get(sizeTy, packsPerBlock);
     47    Value * const ZEROES = Constant::getNullValue(vTy);
    4748
    4849    BasicBlock * const entry = b->GetInsertBlock();
    4950    BasicBlock * const strideLoop = b->CreateBasicBlock("strideLoop");
    5051
     52    Value * const allAvailableItems = b->getAvailableItemCount("bits");
     53
    5154    b->CreateBr(strideLoop);
    5255    b->SetInsertPoint(strideLoop);
    53     PHINode * const strideIndex = b->CreatePHI(b->getSizeTy(), 2);
     56    PHINode * const strideIndex = b->CreatePHI(sizeTy, 2);
    5457    strideIndex->addIncoming(ZERO, entry);
    5558
     
    6467        b->CreateBlockAlignedStore(inputValue, outputPtr);
    6568        Value * markers = b->CreateNot(b->simd_eq(packSize, inputValue, ZEROES));
    66         Value * blockMask = b->CreateZExtOrTrunc(b->hsimd_signmask(packSize, markers), packTy);
     69        Value * blockMask = b->CreateZExtOrTrunc(b->hsimd_signmask(packSize, markers), sizeTy);
    6770        if (i) {
    6871            blockMask = b->CreateShl(blockMask, i * packsPerBlock);
     
    9093    groupMarkers->addIncoming(groupMask, processGroups);
    9194
    92     Value * const groupIndex = b->CreateZExtOrTrunc(b->CreateCountForwardZeroes(groupMarkers), b->getSizeTy());
     95    Value * const groupIndex = b->CreateZExtOrTrunc(b->CreateCountForwardZeroes(groupMarkers), sizeTy);
    9396    Value * const blockIndex = b->CreateNUWAdd(baseOffset, b->CreateUDiv(groupIndex, PACKS_PER_BLOCK));
    9497    Value * const packOffset = b->CreateURem(groupIndex, PACKS_PER_BLOCK);
    9598    Value * const groupPtr = b->getInputStreamBlockPtr("bits", ZERO, blockIndex);
    9699    Value * const groupValue = b->CreateBlockAlignedLoad(groupPtr);
    97     Value * const packBits = b->CreateExtractElement(groupValue, packOffset);
    98 
     100    Value * const packBits = b->CreateExtractElement(b->CreateBitCast(groupValue, vTy), packOffset);
    99101    //Type * packPtrTy = packTy->getPointerTo();
    100102    //Value * const packPtr = b->CreateGEP(b->CreatePointerCast(groupPtr, packPtrTy), packOffset);
    101103    //Value * const packBits = b->CreateLoad(packPtr);
    102     Value * const packCount = b->CreateZExtOrTrunc(b->CreatePopcount(packBits), b->getSizeTy());
     104    Value * const packCount = b->CreateZExtOrTrunc(b->CreatePopcount(packBits), sizeTy);
    103105    Value * const observedUpTo = b->CreateNUWAdd(observed, packCount);
    104106
     
    126128
    127129    b->SetInsertPoint(findNthBit);
    128     PHINode * const remainingPositions = b->CreatePHI(bitsToFind->getType(), 2);
    129     remainingPositions->addIncoming(bitsToFind, seenNOrMore);
     130    PHINode * const remainingBitsToFind = b->CreatePHI(bitsToFind->getType(), 2);
     131    remainingBitsToFind->addIncoming(bitsToFind, seenNOrMore);
    130132    PHINode * const remainingBits = b->CreatePHI(packBits->getType(), 2);
    131133    remainingBits->addIncoming(packBits, seenNOrMore);
    132     Value * const nextRemainingPositions = b->CreateNUWSub(remainingPositions, ONE);
    133     remainingPositions->addIncoming(nextRemainingPositions, findNthBit);
    134134    Value * const nextRemainingBits = b->CreateResetLowestBit(remainingBits);
    135135    remainingBits->addIncoming(nextRemainingBits, findNthBit);
    136 
    137     b->CreateLikelyCondBr(b->CreateIsNull(nextRemainingPositions), foundNthBit, findNthBit);
     136    Value * const nextRemainingBitsToFind = b->CreateNUWSub(remainingBitsToFind, ONE);
     137    remainingBitsToFind->addIncoming(nextRemainingBitsToFind, findNthBit);
     138    b->CreateLikelyCondBr(b->CreateIsNull(nextRemainingBitsToFind), foundNthBit, findNthBit);
    138139
    139140    // If we've found the n-th bit, end the segment after clearing the markers
     
    141142    Value * const inputPtr = b->getInputStreamBlockPtr("bits", ZERO, blockIndex);
    142143    Value * const inputValue = b->CreateBlockAlignedLoad(inputPtr);
    143     Value * const packPosition = b->CreateZExtOrTrunc(b->CreateCountForwardZeroes(remainingBits), b->getSizeTy());
     144    Value * const packPosition = b->CreateZExtOrTrunc(b->CreateCountForwardZeroes(remainingBits), sizeTy);
    144145    Value * const basePosition = b->CreateNUWMul(packOffset, PACK_SIZE);
    145146    Value * const blockOffset = b->CreateNUWAdd(b->CreateOr(basePosition, packPosition), ONE);
     
    160161    b->CreateLikelyCondBr(b->CreateICmpEQ(nextStrideIndex, numOfStrides), segmentDone, strideLoop);
    161162
    162     Constant * const FULL_STRIDE = b->getSize(packSize * packSize);
    163 
    164163    b->SetInsertPoint(segmentDone);
    165     PHINode * const produced = b->CreatePHI(b->getSizeTy(), 2);
     164    PHINode * const produced = b->CreatePHI(sizeTy, 2);
    166165    produced->addIncoming(positionOfNthItem, foundNthBit);
    167     produced->addIncoming(FULL_STRIDE, nextStride);
    168 
     166    produced->addIncoming(allAvailableItems, nextStride);
    169167    Value * producedCount = b->getProducedItemCount("uptoN");
    170     producedCount = b->CreateNUWAdd(producedCount, b->CreateNUWMul(FULL_STRIDE, strideIndex));
    171168    producedCount = b->CreateNUWAdd(producedCount, produced);
    172169    b->setProducedItemCount("uptoN", producedCount);
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5831 r5832  
    499499            Value * EOFbit = b->getScalarField("EOFbit");
    500500            Value * EOFmask = b->getScalarField("EOFmask");
    501             Value * const to_count = b->simd_and(b->simd_or(b->simd_not(EOFmask), EOFbit), compileExpression(b, c->getExpr()));
    502             const unsigned counterSize = b->getSizeTy()->getBitWidth();
     501            Value * const to_count = b->simd_and(b->simd_or(b->simd_not(EOFmask), EOFbit), compileExpression(b, c->getExpr()));           
    503502            const auto f = mAccumulator.find(c);
    504503            if (LLVM_UNLIKELY(f == mAccumulator.end())) {
    505504                report_fatal_error("Unknown accumulator: " + c->getName().str());
    506505            }
    507             Value * ptr = b->getScalarFieldPtr(f->second);
     506            Value * const ptr = b->getScalarFieldPtr(f->second);
    508507            const auto alignment = getPointerElementAlignment(ptr);
    509             Value * countSoFar = b->CreateAlignedLoad(ptr, alignment, c->getName() + "_accumulator");
    510             auto fields = (b->getBitBlockWidth() / counterSize);
    511             Value * fieldCounts = b->simd_popcount(counterSize, to_count);
     508            Value * const countSoFar = b->CreateAlignedLoad(ptr, alignment, c->getName() + "_accumulator");
     509            const auto fieldWidth = b->getSizeTy()->getBitWidth();
     510            auto fields = (b->getBitBlockWidth() / fieldWidth);
     511            Value * fieldCounts = b->simd_popcount(fieldWidth, to_count);
    512512            while (fields > 1) {
    513                 fields = fields/2;
    514                 fieldCounts = b->CreateAdd(fieldCounts, b->mvmd_srli(counterSize, fieldCounts, fields));
    515             }
    516             value = b->CreateAdd(b->mvmd_extract(counterSize, fieldCounts, 0), countSoFar, "countSoFar");
     513                fields /= 2;
     514                fieldCounts = b->CreateAdd(fieldCounts, b->mvmd_srli(fieldWidth, fieldCounts, fields));
     515            }
     516            value = b->CreateAdd(b->mvmd_extract(fieldWidth, fieldCounts, 0), countSoFar, "countSoFar");
    517517            b->CreateAlignedStore(value, ptr, alignment);
    518518        } else if (const Lookahead * l = dyn_cast<Lookahead>(stmt)) {
Note: See TracChangeset for help on using the changeset viewer.