Ignore:
Timestamp:
Feb 27, 2018, 11:57:51 AM (19 months ago)
Author:
nmedfort
Message:

Temporary workaround for Xiangyu

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5873 r5882  
    9191        // uses of the streams and zero out any non-regions from the data.
    9292
     93        AlwaysConsume,
     94
     95        // Always consume the input (i.e., use the lowerbound to determine whether to there
     96        // is enough data to execute a stride rather than the upper bound.)
     97
    9398        /** OUTPUT STREAM ATTRIBUTES **/
    9499
     
    213218    };
    214219
     220    KindId getKind() const {
     221        return mKind;
     222    }
     223
    215224    bool isAdd() const {
    216225        return mKind == KindId::Add;
     
    246255
    247256protected:
    248 
    249     KindId getKind() const {
    250         return mKind;
    251     }
    252257
    253258    friend struct AttributeSet;
     
    255260    friend Attribute Add1();
    256261    friend Attribute Principal();
     262    friend Attribute AlwaysConsume();
    257263    friend Attribute RoundUpTo(const unsigned);
    258264    friend Attribute LookAhead(const unsigned);
     
    324330}
    325331
     332inline Attribute AlwaysConsume() {
     333    return Attribute(Attribute::KindId::AlwaysConsume, 0);
     334}
     335
    326336inline Attribute Principal() {
    327337    return Attribute(Attribute::KindId::Principal, 0);
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5873 r5882  
    708708            }
    709709            Value * arraySize = b->getInt64(ceiling(ub));
    710 
    711             auto name = input.getName();
    712710            if (input.isSwizzled()) {
    713711                // TODO workaround to use larger temporary buffer size for swizzled buffer
     
    796794        #ifdef DEBUG_LOG
    797795        b->CallPrintInt(getName() + "_" + name + "_avail", mAvailableItemCount[i]);
    798         b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     796        b->CallPrintInt(getName() + "_" + name + "_processed0", processed);
    799797        #endif
    800798        mInitialProcessedItemCount[i] = processed;
     
    815813        mAvailableItemCount[i] = unprocessed;
    816814        linearlyAccessible[i] = accessible;
    817         inputStrideSize[i] = getStrideSize(b, input.getRate());
    818         Value * const accessibleStrides = b->CreateUDiv(accessible, inputStrideSize[i]);
     815
     816        const auto ub = getUpperBound(input.getRate());
     817        inputStrideSize[i] = b->getSize(ceiling(ub * mStride));
     818        Value * accessibleStrides = b->CreateUDiv(accessible, inputStrideSize[i]);
     819
     820        if (LLVM_UNLIKELY(input.hasAttribute(Attribute::KindId::AlwaysConsume))) {
     821            const auto lb = getLowerBound(input.getRate());
     822            Value * const lowerbound = b->getSize(ceiling(lb * mStride));
     823            Value * const lowerboundStrides = b->CreateZExt(b->CreateICmpUGE(unprocessed, lowerbound), b->getSizeTy());
     824            Value * const tryLowerbound = b->CreateICmpULT(accessibleStrides, lowerboundStrides);
     825            inputStrideSize[i] = b->CreateSelect(tryLowerbound, lowerbound, inputStrideSize[i]);
     826            accessibleStrides = b->CreateSelect(tryLowerbound, lowerboundStrides, accessibleStrides);
     827        }
     828
    819829        numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides);
    820830    }
     
    10091019        Value * const produced = b->getProducedItemCount(name);
    10101020        #ifdef DEBUG_LOG
    1011         b->CallPrintInt(getName() + "_" + name + "_produced", produced);
     1021        b->CallPrintInt(getName() + "_" + name + "_produced0", produced);
    10121022        #endif
    10131023        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
     
    10191029        if (outputStrideSize[i]) {
    10201030            linearlyWritable[i] = b->getLinearlyWritableItems(name, produced);
     1031            #ifdef DEBUG_LOG
     1032            b->CallPrintInt(getName() + "_" + name + "_writable", linearlyWritable[i]);
     1033            #endif
    10211034            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
    10221035            numOfStrides = b->CreateUMin(numOfStrides, writableStrides);
     
    10601073        }
    10611074        mAvailableItemCount[i] = linearlyAccessible[i];
     1075        #ifdef DEBUG_LOG
     1076        b->CallPrintInt(getName() + "_" + input.getName() + "_accessible", linearlyAccessible[i]);
     1077        #endif
    10621078    }
    10631079
     
    10721088            b->setProcessedItemCount(input.getName(), ic);
    10731089        }
     1090        #ifdef DEBUG_LOG
     1091        b->CallPrintInt(getName() + "_" + input.getName() + "_processed", b->getProcessedItemCount(input.getName()));
     1092        #endif
    10741093    }
    10751094
     
    10821101            b->setProducedItemCount(output.getName(), ic);
    10831102        }
     1103        #ifdef DEBUG_LOG
     1104        b->CallPrintInt(getName() + "_" + output.getName() + "_produced", b->getProducedItemCount(output.getName()));
     1105        #endif
    10841106    }
    10851107
     
    12871309    unsigned last = inputSetCount;
    12881310
     1311    bool hasFixedRateInput = false; // <- temporary workaround
    12891312    for (unsigned i = 0; i < inputSetCount; ++i) {
    12901313        const ProcessingRate & pr = mStreamSetInputs[i].getRate();
    12911314        if (pr.isFixed()) {
    12921315            rateLCM = lcm(rateLCM, pr.getRate());
     1316            hasFixedRateInput = true;
    12931317            if (mStreamSetInputs[i].isPrincipal()) {
    12941318                assert ("A kernel cannot have multiple principle input streams" && (first == 0 && last == inputSetCount));
     
    13571381        const ProcessingRate & pr = output.getRate();
    13581382        Value * produced = nullptr;
    1359         if (pr.isFixed() && output.nonDeferred()) {
     1383        if (hasFixedRateInput && pr.isFixed() && output.nonDeferred()) {
    13601384            assert (baseInitialProcessedItemCount && scaledInverseOfAvailItemCount);
    13611385            const auto rate = pr.getRate();
     
    13691393            Value * const ic = CreateUDivCeil(b, scaledInverseOfAvailItemCount, rateLCM / pr.getRate());
    13701394            produced = b->CreateAdd(p, ic);
     1395            #ifdef DEBUG_LOG
     1396            b->CallPrintInt(getName() + "_" + name + "_produced'", produced);
     1397            #endif           
    13711398        } else { // check if we have an attribute; if so, get the current produced count and adjust it
    13721399            bool noAttributes = true;
     
    13891416            }
    13901417        }
     1418        #ifdef DEBUG_LOG
     1419        b->CallPrintInt(getName() + "_" + name + "_produced\"", produced);
     1420        #endif
    13911421        b->setProducedItemCount(name, produced);
    13921422    }
     
    16521682    }
    16531683
     1684    #ifdef DEBUG_LOG
     1685    b->CallPrintInt(getName() + "_remainingItems", remainingItems);
     1686    #endif
    16541687    generateFinalBlockMethod(b, remainingItems); // may be implemented by the BlockOrientedKernel subtype
    16551688
Note: See TracChangeset for help on using the changeset viewer.