Changeset 6071


Ignore:
Timestamp:
Jun 8, 2018, 8:51:15 AM (4 months ago)
Author:
cameron
Message:

u32u8.cpp initial check-in

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6064 r6071  
    121121add_executable(icgrep icgrep.cpp util/file_select.cpp grep_interface.cpp)
    122122add_executable(u8u16 u8u16.cpp)
     123add_executable(u32u8 u32u8.cpp kernels/pdep_kernel.cpp)
    123124add_executable(base64 base64.cpp kernels/radix64.cpp)
    124125add_executable(wc wc.cpp util/file_select.cpp)
     
    138139target_link_libraries (icgrep GrepEngine UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    139140target_link_libraries (u8u16 PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     141target_link_libraries (u32u8 PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    140142target_link_libraries (base64 PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    141143target_link_libraries (wc GrepEngine UCDlib PabloADT RegExpCompiler  CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6063 r6071  
    236236    }
    237237    Value * shuffle = simd_sub(fw, ConstantVector::get({Idxs, field_count}), simd_fill(fw, shift));
    238     Value * rslt = mvmd_shuffle2(fw, fwCast(fw, a), fwCast(fw, b), shuffle);
     238    Value * rslt = mvmd_shuffle2(fw, fwCast(fw, b), fwCast(fw, a), shuffle);
    239239    return rslt;
    240240}
  • icGREP/icgrep-devel/icgrep/character_deposit.cpp

    r6055 r6071  
    132132    // Deposit
    133133    StreamSetBuffer * depositedBits = pxDriver.addBuffer<DynamicBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks, 1);
    134     // Kernel * pdepK = pxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    135     //pxDriver.makeKernelCall(pdepK, {depositMarker, compressedBits}, {depositedBits});
     134    Kernel * pdepK = pxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
     135    pxDriver.makeKernelCall(pdepK, {depositMarker, compressedBits}, {depositedBits});
    136136   
    137     StreamDepositCompiler depositCompiler(pxDriver, iBuilder->getStreamSetTy(8), bufferBlocks);
    138     depositCompiler.makeCall(depositMarker, compressedBits, depositedBits);
     137    //StreamDepositCompiler depositCompiler(pxDriver, 8, 0, 8, bufferBlocks);
     138    //depositCompiler.makeCall(depositMarker, compressedBits, depositedBits);
    139139
    140140
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6055 r6071  
    11/*
    2  *  Copyright (c) 2017 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
     
    142142}
    143143   
    144 StreamExpandKernel::StreamExpandKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, const unsigned streamCount)
    145 : MultiBlockKernel("streamExpand" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
     144StreamExpandKernel::StreamExpandKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, unsigned sourceStreamCount, unsigned selectedStreamBase, unsigned selectedStreamCount)
     145: MultiBlockKernel("streamExpand" + std::to_string(fieldWidth) + "_" + std::to_string(sourceStreamCount) + "_" + std::to_string(selectedStreamBase) + "_" + std::to_string(selectedStreamCount),
    146146                   {Binding{kb->getStreamSetTy(), "marker", FixedRate(), Principal()},
    147                        Binding{kb->getStreamSetTy(streamCount), "source", PopcountOf("marker")}},
    148                    {Binding{kb->getStreamSetTy(streamCount), "output", FixedRate()}},
     147                       Binding{kb->getStreamSetTy(sourceStreamCount), "source", PopcountOf("marker")}},
     148                   {Binding{kb->getStreamSetTy(selectedStreamCount), "output", FixedRate()}},
    149149                   {}, {}, {})
    150150, mFieldWidth(fieldWidth)
    151 , mStreamCount(streamCount) {
    152     for (unsigned i = 0; i < streamCount; i++) {
    153         addScalar(kb->getBitBlockType(), "pendingSourceBlock_" + std::to_string(i));
     151, mSelectedStreamBase(selectedStreamBase)
     152, mSelectedStreamCount(selectedStreamCount) {
     153    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
     154        addScalar(kb->getBitBlockType(), "pendingSourceBlock_" + std::to_string(mSelectedStreamBase + i));
    154155    }
    155156}
     
    175176    Value * processedSourceItems = b->getProcessedItemCount("source");
    176177    Value * sourceOffset = b->CreateURem(processedSourceItems, bwConst);
    177    
    178     std::vector<Value *> pendingData(mStreamCount);
    179     for (unsigned i = 0; i < mStreamCount; i++) {
    180         pendingData[i] = b->getScalarField("pendingSourceBlock_" + std::to_string(i));
     178    std::vector<Value *> pendingData(mSelectedStreamCount);
     179    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
     180        pendingData[i] = b->getScalarField("pendingSourceBlock_" + std::to_string(mSelectedStreamBase + i));
    181181    }
    182182   
     
    186186    PHINode * blockNoPhi = b->CreatePHI(b->getSizeTy(), 2);
    187187    PHINode * pendingItemsPhi = b->CreatePHI(b->getSizeTy(), 2);
    188     PHINode * pendingDataPhi[mStreamCount];
     188    PHINode * pendingDataPhi[mSelectedStreamCount];
    189189    blockNoPhi->addIncoming(ZERO, entry);
    190190    pendingItemsPhi->addIncoming(sourceOffset, entry);
    191     for (unsigned i = 0; i < mStreamCount; i++) {
     191    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
    192192        pendingDataPhi[i] = b->CreatePHI(b->getBitBlockType(), 2);
    193193        pendingDataPhi[i]->addIncoming(pendingData[i], entry);
     
    205205    // B = b->simd_sll(fw, b->mvmd_dsll(fw, source, pending, field_offset_hi), shift_fwd);
    206206    // all_source_bits = simd_or(A, B);
    207     Value * pendingOffset = b->CreateURem(pendingBlockEnd, bwConst);
     207    Value * pendingOffset = b->CreateURem(pendingItemsPhi, bwConst);
    208208    Value * field_offset_lo =  b->CreateUDiv(pendingOffset, fwConst);
    209209    Value * bit_offset = b->simd_fill(fw, b->CreateURem(pendingOffset, fwConst));
    210    
    211210    // Carefully avoid a shift by the full fieldwith (which gives a poison value).
    212211    // field_offset_lo + 1 unless the bit_offset is 0, in which case it is just field_offset_lo.
     
    214213    // fw - bit_offset, unless bit_offset is 0, in which case, the shift_fwd is 0.
    215214    Value * shift_fwd = b->CreateURem(b->CreateSub(fwSplat, bit_offset), fwSplat);
    216    
     215
    217216    // Once all source bits are assembled, they need to be distributed to the
    218217    // output fields in accord with the popcounts of the deposit mask fields.
     
    234233    Value * source_shift_lo = b->CreateAnd(partialSum, fw_sub1Splat);  // parallel URem
    235234    Value * source_shift_hi = b->CreateAnd(b->CreateSub(fwSplat, source_shift_lo), fw_sub1Splat);
    236    
     235
    237236    // Now load and process source streams.
    238     for (unsigned i = 0; i < mStreamCount; i++) {
    239         Value * source = b->loadInputStreamBlock("source", b->getInt32(i), srcBlockNo);
     237    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
     238        Value * source = b->loadInputStreamBlock("source", b->getInt32(mSelectedStreamBase + i), srcBlockNo);
    240239        Value * A = b->simd_srlv(fw, b->mvmd_dsll(fw, source, pendingDataPhi[i], field_offset_lo), bit_offset);
    241240        Value * B = b->simd_sllv(fw, b->mvmd_dsll(fw, source, pendingDataPhi[i], field_offset_hi), shift_fwd);
    242241        Value * full_source_block = b->simd_or(A, B);
    243        
    244242        Value * C = b->simd_srlv(fw, b->mvmd_shuffle(fw, full_source_block, source_field_lo), source_shift_lo);
    245243        Value * D = b->simd_sllv(fw, b->mvmd_shuffle(fw, full_source_block, source_field_hi), source_shift_hi);
     
    262260    b->SetInsertPoint(expansionDone);
    263261    // Update kernel state.
    264     for (unsigned i = 0; i < mStreamCount; i++) {
    265         b->setScalarField("pendingSourceBlock_" + std::to_string(i), b->bitCast(pendingDataPhi[i]));
     262    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
     263        b->setScalarField("pendingSourceBlock_" + std::to_string(mSelectedStreamBase + i), b->bitCast(pendingDataPhi[i]));
    266264    }
    267265}
     
    269267FieldDepositKernel::FieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, const unsigned streamCount)
    270268: MultiBlockKernel("FieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
    271                    {Binding{kb->getStreamSetTy(), "depositMask"}, Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"}},
     269                   {Binding{kb->getStreamSetTy(1), "depositMask"}, Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"}},
    272270                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
    273271                   {}, {}, {})
     
    298296}
    299297
    300 PDEPFieldDepositKernel::PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, const unsigned streamCount)
    301 : MultiBlockKernel("PDEPFieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
     298PDEPFieldDepositKernel::PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned fieldWidth, const unsigned streamCount, std::string suffix)
     299: MultiBlockKernel("PDEPFieldDeposit" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount) + suffix,
    302300                   {Binding{kb->getStreamSetTy(), "depositMask"}, Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"}},
    303301                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
     
    305303, mPDEPWidth(fieldWidth)
    306304, mStreamCount(streamCount) {
    307     if ((fieldWidth != 32) && (fieldWidth != 64)) llvm::report_fatal_error("Unsupported PDEP width for PDEPFieldCompressKernel");
     305    if ((fieldWidth != 32) && (fieldWidth != 64)) llvm::report_fatal_error("Unsupported PDEP width for PDEPFieldDepositKernel");
    308306}
    309307
     
    327325    blockOffsetPhi->addIncoming(ZERO, entry);
    328326    std::vector<Value *> mask(fieldsPerBlock);
    329     Value * extractionMaskPtr = kb->getInputStreamBlockPtr("depositMask", ZERO, blockOffsetPhi);
    330     extractionMaskPtr = kb->CreatePointerCast(extractionMaskPtr, fieldPtrTy);
     327//  When operating on fields individually, we can use vector load/store with
     328//  extract/insert element operations, or we can use individual field load
     329//  and stores.   Individual field operations require fewer total operations,
     330//  but more memory instructions.   It may be that vector load/extract is better,
     331//  while field store is better.   Vector insert then store creates long dependence
     332//  chains.
     333//
     334#define PREFER_FIELD_STORES_OVER_INSERT_ELEMENT
     335#ifdef PREFER_FIELD_LOADS_OVER_EXTRACT_ELEMENT
     336    Value * depositMaskPtr = kb->getInputStreamBlockPtr("depositMask", ZERO, blockOffsetPhi);
     337    depositMaskPtr = kb->CreatePointerCast(depositMaskPtr, fieldPtrTy);
    331338    for (unsigned i = 0; i < fieldsPerBlock; i++) {
    332         mask[i] = kb->CreateLoad(kb->CreateGEP(extractionMaskPtr, kb->getInt32(i)));
    333     }
     339        mask[i] = kb->CreateLoad(kb->CreateGEP(depositMaskPtr, kb->getInt32(i)));
     340    }
     341#else
     342    Value * depositMask = kb->fwCast(mPDEPWidth, kb->loadInputStreamBlock("depositMask", ZERO, blockOffsetPhi));
     343    for (unsigned i = 0; i < fieldsPerBlock; i++) {
     344        mask[i] = kb->CreateExtractElement(depositMask, kb->getInt32(i));
     345    }
     346#endif
    334347    for (unsigned j = 0; j < mStreamCount; ++j) {
     348#ifdef PREFER_FIELD_LOADS_OVER_EXTRACT_ELEMENT
    335349        Value * inputPtr = kb->getInputStreamBlockPtr("inputStreamSet", kb->getInt32(j), blockOffsetPhi);
    336350        inputPtr = kb->CreatePointerCast(inputPtr, fieldPtrTy);
     351#else
     352        Value * inputStrm = kb->fwCast(mPDEPWidth, kb->loadInputStreamBlock("inputStreamSet", kb->getInt32(j), blockOffsetPhi));
     353#endif
     354#ifdef PREFER_FIELD_STORES_OVER_INSERT_ELEMENT
    337355        Value * outputPtr = kb->getOutputStreamBlockPtr("outputStreamSet", kb->getInt32(j), blockOffsetPhi);
    338356        outputPtr = kb->CreatePointerCast(outputPtr, fieldPtrTy);
     357#else
     358        Value * outputStrm = kb->fwCast(mPDEPWidth, kb->allZeroes());
     359#endif
    339360        for (unsigned i = 0; i < fieldsPerBlock; i++) {
     361#ifdef PREFER_FIELD_LOADS_OVER_EXTRACT_ELEMENT
    340362            Value * field = kb->CreateLoad(kb->CreateGEP(inputPtr, kb->getInt32(i)));
     363#else
     364            Value * field = kb->CreateExtractElement(inputStrm, kb->getInt32(i));
     365#endif
    341366            Value * compressed = kb->CreateCall(PDEP_func, {field, mask[i]});
     367#ifdef PREFER_FIELD_STORES_OVER_INSERT_ELEMENT
    342368            kb->CreateStore(compressed, kb->CreateGEP(outputPtr, kb->getInt32(i)));
    343369        }
     370#else
     371            outputStrm = kb->CreateInsertElement(outputStrm, compressed, kb->getInt32(i));
     372        }
     373        kb->storeOutputStreamBlock("outputStreamSet", kb->getInt32(j), blockOffsetPhi, outputStrm);
     374#endif
    344375    }
    345376    Value * nextBlk = kb->CreateAdd(blockOffsetPhi, kb->getSize(1));
     
    354385        llvm::report_fatal_error("StreamDepositCompiler needs a non-zero bufferBlocks parameter (for now).");
    355386    }
    356     auto & iBuilder = mDriver.getBuilder();
    357     unsigned N = IDISA::getNumOfStreams(ssType);
    358     if (IDISA::getStreamFieldWidth(ssType) != 1) {
    359         llvm::report_fatal_error("StreamDepositCompiler only compresses bit streams (for now)");
    360     }
    361     parabix::StreamSetBuffer * expandedStreams = mDriver.addBuffer<parabix::StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(N), mBufferBlocks);
    362     Kernel * streamK = mDriver.addKernelInstance<StreamExpandKernel>(iBuilder, mFieldWidth, N);
     387    auto & b = mDriver.getBuilder();
     388    unsigned N = mSelectedStreamCount;
     389    parabix::StreamSetBuffer * expandedStreams = mDriver.addBuffer<parabix::StaticBuffer>(b, b->getStreamSetTy(N), mBufferBlocks);
     390    Kernel * streamK = mDriver.addKernelInstance<StreamExpandKernel>(b, mFieldWidth, mSourceStreamCount, mSelectedStreamBase, N);
    363391    mDriver.makeKernelCall(streamK, {depositMask, inputs}, {expandedStreams});
    364392
    365393    Kernel * depositK = nullptr;
    366394    if (AVX2_available()) {
    367         depositK = mDriver.addKernelInstance<PDEPFieldDepositKernel>(iBuilder, mFieldWidth, N);
     395        depositK = mDriver.addKernelInstance<PDEPFieldDepositKernel>(b, mFieldWidth, N, std::to_string(mSelectedStreamBase));
    368396    } else {
    369         depositK = mDriver.addKernelInstance<FieldDepositKernel>(iBuilder, mFieldWidth, N);
     397        depositK = mDriver.addKernelInstance<FieldDepositKernel>(b, mFieldWidth, N);
    370398    }
    371399    mDriver.makeKernelCall(depositK, {depositMask, expandedStreams}, {outputs});
     
    373401
    374402}
     403
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.h

    r6045 r6071  
    5353class StreamExpandKernel final : public MultiBlockKernel {
    5454public:
    55     StreamExpandKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount);
     55    StreamExpandKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned sourceStreamCount, unsigned selectedStreamBase, unsigned selectedStreamCount);
    5656    bool isCachable() const override { return true; }
    5757    bool hasSignature() const override { return false; }
     
    6060private:
    6161    const unsigned mFieldWidth;
    62     const unsigned mStreamCount;
     62    const unsigned mSelectedStreamBase;
     63    const unsigned mSelectedStreamCount;
    6364};
    6465
     
    7778class PDEPFieldDepositKernel final : public MultiBlockKernel {
    7879public:
    79     PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount);
     80    PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount, std::string suffix);
    8081    bool isCachable() const override { return true; }
    8182    bool hasSignature() const override { return false; }
     
    8990class StreamDepositCompiler {
    9091public:
    91     StreamDepositCompiler(Driver & driver, llvm::Type * streamSetType, unsigned bufferBlocks = 0) :
    92     mDriver(driver), ssType(streamSetType), mBufferBlocks(bufferBlocks), mFieldWidth(64) {}
     92    StreamDepositCompiler(Driver & driver, unsigned sourceStreamCount, unsigned selectedStreamBase, unsigned selectedStreamCount, unsigned bufferBlocks = 0) :
     93        mDriver(driver),
     94        mSourceStreamCount(sourceStreamCount),
     95        mSelectedStreamBase(selectedStreamBase),
     96        mSelectedStreamCount(selectedStreamCount),
     97        mBufferBlocks(bufferBlocks), mFieldWidth(64) {}
    9398    void setDepositFieldWidth(unsigned fw) {mFieldWidth = fw;}
    9499    void makeCall(parabix::StreamSetBuffer * mask, parabix::StreamSetBuffer * inputs, parabix::StreamSetBuffer * outputs);
    95100private:
    96101    Driver & mDriver;
    97     llvm::Type * ssType;
     102    const unsigned mSourceStreamCount;
     103    const unsigned mSelectedStreamBase;
     104    const unsigned mSelectedStreamCount;
    98105    unsigned mBufferBlocks;
    99106    unsigned mFieldWidth;
Note: See TracChangeset for help on using the changeset viewer.