Changeset 5857


Ignore:
Timestamp:
Feb 2, 2018, 3:49:44 PM (10 months ago)
Author:
xwa163
Message:
  1. Fix crash of pdep_kernel
  2. add initial version for character_deposit
Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5851 r5857  
    120120add_executable(core combine/core.cpp combine/regexGen.cpp combine/stringGen.cpp combine/propGen.cpp combine/icgrep-test/icgrep-test.cpp grep_interface.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
    121121add_executable(character_deletion character_deletion.cpp kernels/cc_kernel.cpp)
     122add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp)
    122123
    123124#Exclude CoRE from default build.
     
    132133target_link_libraries (core RegExpCompiler ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES})
    133134target_link_libraries (character_deletion PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     135target_link_libraries (character_deposit PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    134136
    135137IF(ENABLE_MULTIPLEXING)
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r5836 r5857  
    1212namespace kernel {
    1313
    14 PDEPkernel::PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned streamCount, unsigned swizzleFactor, unsigned PDEP_width)
    15 : MultiBlockKernel("PDEPdel",
     14PDEPkernel::PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned streamCount, unsigned swizzleFactor, unsigned PDEP_width, std::string name)
     15: MultiBlockKernel(name + "",
    1616                  {Binding{kb->getStreamSetTy(), "PDEPmarkerStream", BoundedRate(0, 1)},
    1717                   Binding{kb->getStreamSetTy(streamCount), "sourceStreamSet", BoundedRate(0, 1)}},
    18                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
     18                  {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet", RateEqualTo("PDEPmarkerStream")}},
    1919                  {}, {}, {})
    2020, mSwizzleFactor(swizzleFactor)
     
    2727void PDEPkernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, Value * const numOfStrides) {
    2828    BasicBlock * entry = kb->GetInsertBlock();
     29//    kb->CallPrintInt("--------------" + this->getName() + " doMultiBlock Start:", kb->getSize(0));
    2930    BasicBlock * checkLoopCond = kb->CreateBasicBlock("checkLoopCond");
    3031    BasicBlock * checkSourceCount = kb->CreateBasicBlock("checkSourceCount");
     
    3334
    3435    Value * itemsToDo = mAvailableItemCount[0];
     36
    3537    Value * sourceItemsAvail = mAvailableItemCount[1];
     38//    kb->CallPrintInt("itemsToDo:", itemsToDo);
     39//    kb->CallPrintInt("sourceItemsAvail:", sourceItemsAvail);
     40
    3641
    3742    Value * PDEPStrmPtr = kb->getInputStreamBlockPtr("PDEPmarkerStream", kb->getInt32(0)); // mStreamBufferPtr[0];
     
    4045    Value * outputStreamPtr = kb->getOutputStreamBlockPtr("outputStreamSet", kb->getInt32(0)); // mStreamBufferPtr[2];
    4146
    42     Constant * blockWidth = kb->getSize(kb->getBitBlockWidth());
    43     Value * blocksToDo = kb->CreateUDivCeil(itemsToDo, blockWidth); // 1 if this is the final block
     47//    kb->CallPrintInt("aaa", outputStreamPtr)
     48
     49    Constant * blockWidth = kb->getSize(kb->getBitBlockWidth()); // 256
     50    Value * blocksToDo = kb->CreateUDivCeil(itemsToDo, blockWidth); // 1 if this is the final block TODO the assumption is incorrect here
    4451    Value * processedSourceBits = kb->getProcessedItemCount("sourceStreamSet");
    4552    Value * base_src_blk_idx = kb->CreateUDiv(processedSourceBits, blockWidth);
     
    5966    PHINode * blocksToDoPhi = kb->CreatePHI(kb->getSizeTy(), 2);
    6067    PHINode * blockOffsetPhi = kb->CreatePHI(kb->getSizeTy(), 2); // block offset from the base block, e.g. 0, 1, 2, ...
    61     PHINode * updatedProcessedBitsPhi = kb->CreatePHI(kb->getSizeTy(), 2);
     68    PHINode * updatedProcessedSourceBitsPhi = kb->CreatePHI(kb->getSizeTy(), 2);
    6269    PHINode * sourceItemsRemaining = kb->CreatePHI(kb->getSizeTy(), 2);
    6370    blocksToDoPhi->addIncoming(blocksToDo, entry);
    6471    blockOffsetPhi->addIncoming(kb->getSize(0), entry);
    65     updatedProcessedBitsPhi->addIncoming(processedSourceBits, entry);
     72    updatedProcessedSourceBitsPhi->addIncoming(processedSourceBits, entry);
    6673    sourceItemsRemaining->addIncoming(sourceItemsAvail, entry);
    6774
     
    7178    kb->SetInsertPoint(checkSourceCount);
    7279    // Extract the values we will use in the main processing loop
    73     Value * updatedProcessedBits = updatedProcessedBitsPhi;
     80    Value * updatedProcessedSourceBits = updatedProcessedSourceBitsPhi;
    7481    Value * updatedSourceItems = sourceItemsRemaining;
    75     Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(PDEPStrmPtr, {blockOffsetPhi, kb->getInt32(0)}));
     82    Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(PDEPStrmPtr, blockOffsetPhi));
    7683
    7784    const auto PDEP_masks = get_PDEP_masks(kb, PDEP_ms_blk, mPDEPWidth);   
     
    8289        total_count = kb->CreateAdd(total_count, mask_popcounts[j]);
    8390    }
     91//    kb->CallPrintInt("total_count", total_count);
     92//    kb->CallPrintInt("sourceItemsRemaining", sourceItemsRemaining);
    8493    kb->CreateCondBr(kb->CreateICmpULE(total_count, sourceItemsRemaining), processBlock, terminate);
    8594    kb->SetInsertPoint(processBlock);
     
    8897    for (unsigned i = 0; i < mSwizzleFactor; i++) {
    8998        // Do block and swizzle index calculations, then combine the "current" and "next" swizzles
    90         Value * current_blk_idx = kb->CreateSub(kb->CreateUDiv(updatedProcessedBits, blockWidth), base_src_blk_idx); // blk index == stream set block index
    91         Value * current_swizzle_idx = kb->CreateUDiv(kb->CreateURem(updatedProcessedBits, blockWidth), pdepWidth);
    92         Value * ahead_pdep_width_less_1 = kb->CreateAdd(pdepWidth_1, updatedProcessedBits);
    93        
     99
     100        Value * current_blk_idx = kb->CreateSub(kb->CreateUDiv(updatedProcessedSourceBits, blockWidth), base_src_blk_idx); // blk index == stream set block index
     101        Value * current_swizzle_idx = kb->CreateUDiv(kb->CreateURem(updatedProcessedSourceBits, blockWidth), pdepWidth);
     102        Value * ahead_pdep_width_less_1 = kb->CreateAdd(pdepWidth_1, updatedProcessedSourceBits);
     103
    94104        Value * next_blk_idx = kb->CreateSub(kb->CreateUDiv(ahead_pdep_width_less_1, blockWidth), base_src_blk_idx);
    95105        Value * next_swizzle_idx = kb->CreateUDiv(kb->CreateURem(ahead_pdep_width_less_1, blockWidth), pdepWidth);
    96106
     107//        kb->CallPrintInt("current_blk_idx", current_blk_idx);
     108//        kb->CallPrintInt("current_swizzle_idx", current_swizzle_idx);
     109
    97110        // Load current and next BitBlocks/swizzles
    98         Value * current_swizzle_ptr = kb->CreateGEP(inputSwizzlesPtr, {current_blk_idx, current_swizzle_idx});
    99         Value * next_swizzle_ptr = kb->CreateGEP(inputSwizzlesPtr, {next_blk_idx, next_swizzle_idx});
     111        // TODO can not guarantee the two GEP is correct, need to check later
     112        Value * current_swizzle_ptr = kb->CreateGEP(inputSwizzlesPtr, kb->CreateAdd(kb->CreateMul(current_blk_idx, kb->getSize(mSwizzleFactor)), current_swizzle_idx));
     113        Value * next_swizzle_ptr = kb->CreateGEP(inputSwizzlesPtr, kb->CreateAdd(kb->CreateMul(next_blk_idx, kb->getSize(mSwizzleFactor)), next_swizzle_idx));
     114
    100115        Value * current_swizzle = kb->CreateBlockAlignedLoad(current_swizzle_ptr);//Constant::getNullValue(cast<PointerType>(current_swizzle_ptr->getType())->getElementType());
    101116        Value * next_swizzle = kb->CreateBlockAlignedLoad(next_swizzle_ptr);//Constant::getNullValue(cast<PointerType>(current_swizzle_ptr->getType())->getElementType());
     117//        kb->CallPrintInt("ptr", current_swizzle_ptr);
     118//        kb->CallPrintRegister("current_swizzle", current_swizzle);
    102119
    103120        // Combine the two swizzles to guarantee we'll have enough source bits for the PDEP operation
    104         Value * shift_amount = kb->CreateURem(updatedProcessedBits, pdepWidth);
     121        Value * shift_amount = kb->CreateURem(updatedProcessedSourceBits, pdepWidth);
    105122        Value * remaining_bits = kb->CreateLShr(current_swizzle, kb->simd_fill(mPDEPWidth, shift_amount)); // shift away bits that have already been used
     123
    106124        Value * borrowed_bits = kb->CreateShl(next_swizzle,
    107                                              kb->simd_fill(mPDEPWidth, kb->CreateSub(pdepWidth, shift_amount))); // shift next swizzle left by width of first swizzle
     125                                              kb->simd_fill(mPDEPWidth, kb->CreateSub(pdepWidth, shift_amount))); // shift next swizzle left by width of first swizzle
    108126        Value * combined = kb->CreateOr(remaining_bits, borrowed_bits); // combine current swizzle and next swizzle
    109127
     
    111129        Value * result_swizzle = Constant::getNullValue(segments->getType());
    112130        // Apply PDEP to each mPDEPWidth segment of the combined swizzle using the current PDEP mask
     131
     132
     133
     134
    113135        Value * PDEP_mask = PDEP_masks[i];
    114         for (unsigned j = 0; j < mSwizzleFactor; j++) { 
     136        for (unsigned j = 0; j < mSwizzleFactor; j++) {
    115137            Value * source_field = kb->CreateExtractElement(segments, j);
    116             Value * PDEP_field = kb->CreateCall(PDEP_func, {source_field, PDEP_mask}); 
     138            Value * PDEP_field = kb->CreateCall(PDEP_func, {source_field, PDEP_mask});
    117139            result_swizzle = kb->CreateInsertElement(result_swizzle, PDEP_field, j);
    118140        }
    119141
    120142        // Store the result
    121         kb->CreateBlockAlignedStore(result_swizzle, kb->CreateGEP(outputStreamPtr, {blockOffsetPhi, kb->getSize(i)}));
    122         updatedProcessedBits = kb->CreateAdd(updatedProcessedBits, mask_popcounts[i]);
     143        auto outputPos = kb->CreateGEP(outputStreamPtr, kb->CreateAdd(kb->CreateMul(blockOffsetPhi, kb->getSize(mSwizzleFactor)), kb->getSize(i)));
     144        kb->CreateBlockAlignedStore(result_swizzle, outputPos);
     145        updatedProcessedSourceBits = kb->CreateAdd(updatedProcessedSourceBits, mask_popcounts[i]);
    123146        updatedSourceItems = kb->CreateSub(updatedSourceItems, mask_popcounts[i]);
    124147    }
    125148
    126     updatedProcessedBitsPhi->addIncoming(updatedProcessedBits, processBlock);
     149    updatedProcessedSourceBitsPhi->addIncoming(updatedProcessedSourceBits, processBlock);
    127150    blocksToDoPhi->addIncoming(kb->CreateSub(blocksToDoPhi, kb->getSize(1)), processBlock);
    128151    blockOffsetPhi->addIncoming(kb->CreateAdd(blockOffsetPhi, kb->getSize(1)), processBlock);
     
    133156    Value * itemsDone = kb->CreateMul(blockOffsetPhi, blockWidth);
    134157    itemsDone = kb->CreateSelect(kb->CreateICmpULT(itemsToDo, itemsDone), itemsToDo, itemsDone);
    135     kb->setProcessedItemCount("PDEPmarkerStream", kb->CreateAdd(itemsDone, kb->getProcessedItemCount("PDEPmarkerStream")));   
    136     kb->setProcessedItemCount("sourceStreamSet", updatedProcessedBitsPhi);
     158    kb->setProcessedItemCount("PDEPmarkerStream", kb->CreateAdd(itemsDone, kb->getProcessedItemCount("PDEPmarkerStream")));
     159    kb->setProcessedItemCount("sourceStreamSet", updatedProcessedSourceBitsPhi);
    137160
     161
     162//    kb->CallPrintInt("itemsDone:", itemsDone);
     163//    kb->CallPrintInt("produced:", kb->getProducedItemCount("outputStreamSet"));
     164//    kb->CallPrintInt("--------------" + this->getName() + " doMultiBlock End:", kb->getSize(0));
    138165}
    139166
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.h

    r5836 r5857  
    88#include "kernel.h"
    99#include <llvm/IR/Value.h>
     10#include <string>
    1011namespace IDISA { class IDISA_Builder; }
    1112/*
     
    6869class PDEPkernel : public MultiBlockKernel {
    6970public:
    70     PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned streamCount, unsigned swizzleFactor, unsigned PDEP_width = 64);
     71    PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned streamCount, unsigned swizzleFactor, unsigned PDEP_width = 64, std::string name = "PDEPdel");
    7172    bool isCachable() const override { return true; }
    7273    bool hasSignature() const override { return false; }
Note: See TracChangeset for help on using the changeset viewer.