Changeset 5000


Ignore:
Timestamp:
Apr 3, 2016, 4:31:22 PM (20 months ago)
Author:
nmedfort
Message:

Redesigned buffer system to allow the pipeline to control selection of the current input and output streams; DoBlock? functions containing lookahead now take multiple input stream arguments. Selection and passing occurs automatically. Some work on Symbol Table.

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
12 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4995 r5000  
    5757SET(PABLO_SRC pablo/pabloAST.cpp pablo/ps_if.cpp pablo/ps_while.cpp pablo/function.cpp pablo/codegenstate.cpp pablo/builder.cpp pablo/symbol_generator.cpp pablo/printer_pablos.cpp pablo/pablo_toolchain.cpp)
    5858SET(PABLO_SRC ${PABLO_SRC} pablo/pablo_compiler.cpp pablo/carry_manager.cpp pablo/carry_data.cpp IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_target.cpp)
    59 SET(PABLO_SRC ${PABLO_SRC} kernels/s2p_kernel.cpp kernels/scanmatchgen.cpp kernels/kernel.cpp kernels/pipeline.cpp)
     59SET(PABLO_SRC ${PABLO_SRC} kernels/s2p_kernel.cpp kernels/kernel.cpp kernels/instance.cpp)
    6060SET(PABLO_SRC ${PABLO_SRC} pablo/analysis/pabloverifier.cpp)
    6161SET(PABLO_SRC ${PABLO_SRC} pablo/optimizers/pablo_simplifier.cpp pablo/optimizers/codemotionpass.cpp)
     
    6767add_library(PabloADT ${PABLO_SRC})
    6868add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
    69 add_library(RegExpCompiler re/re_parser.cpp re/parsefailure.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp)
     69add_library(RegExpCompiler re/re_parser.cpp re/parsefailure.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp)
    7070add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp UCD/CaseFolding_txt.cpp)
    7171add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
     
    108108ENDIF()
    109109
    110 add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp object_cache.cpp ${PRECOMPILED_FILES})
     110add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp object_cache.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp ${PRECOMPILED_FILES})
    111111add_executable(casefold casefold.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/casefold_pipeline.cpp)
    112112add_executable(symtbl symboltable.cpp kernels/symboltablepipeline.cpp kernels/stdout_kernel.cpp toolchain.cpp grep_engine.cpp object_cache.cpp ${PRECOMPILED_FILES})
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4995 r5000  
    914914kernels/lane_s2p_kernel.cpp
    915915kernels/lane_s2p_kernel.h
     916lane_icgrep.cpp
     917lane_grep_engine.h
     918lane_grep_engine.cpp
     919kernels/instance.cpp
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4990 r5000  
    2121                                       "These options control the regular expression source.");
    2222
    23 // static cl::OptionCategory bGrepOutputOptions("Output Options",
    24 //                                       "These options control the output.");
    25 
    2623static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
    2724
    28 // static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(bGrepOutputOptions));
    29 // static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
    30 // static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
    31 
    32 // static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
    33 // static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
    34 
    3525static cl::opt<bool> CaseInsensitive("i", cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(aRegexSourceOptions));
    36 // static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
    37 // static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
    3826
    3927static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(aRegexSourceOptions));
     
    4230
    4331static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
    44 
    45 
    4632
    4733static unsigned firstInputFile = 1;  // Normal case when first positional arg is a regex.
  • icGREP/icgrep-devel/icgrep/kernels/casefold_pipeline.cpp

    r4991 r5000  
    3838
    3939void PipelineBuilder::CreateKernels(PabloFunction * function){
    40     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, SegmentSize);
    41     mP2SKernel = new KernelBuilder("p2s", mMod, iBuilder, SegmentSize);
    42     mCaseFoldKernel = new KernelBuilder("casefold", mMod, iBuilder, SegmentSize);
    43     mStdOutKernel = new KernelBuilder("stddout", mMod, iBuilder, SegmentSize);
     40    mS2PKernel = new KernelBuilder(iBuilder, "s2p", SegmentSize);
     41    mP2SKernel = new KernelBuilder(iBuilder, "p2s", SegmentSize);
     42    mCaseFoldKernel = new KernelBuilder(iBuilder, "casefold", SegmentSize);
     43    mStdOutKernel = new KernelBuilder(iBuilder, "stddout", SegmentSize);
    4444
    4545    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    6666Function *  PipelineBuilder::ExecuteKernels() {
    6767    Type * const int64ty = iBuilder->getInt64Ty();
    68     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    6968    Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
    7069   
     
    9796
    9897    Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
    99     Instance * caseFoldInstance = mCaseFoldKernel->instantiate(s2pInstance->getOutputStreamSet());
    100     Instance * p2sInstance = mP2SKernel->instantiate(caseFoldInstance->getOutputStreamSet());
    101     Instance * stdOutInstance = mStdOutKernel->instantiate(p2sInstance->getOutputStreamSet());
     98    Instance * caseFoldInstance = mCaseFoldKernel->instantiate(s2pInstance->getResultSet());
     99    Instance * p2sInstance = mP2SKernel->instantiate(caseFoldInstance->getResultSet());
     100    Instance * stdOutInstance = mStdOutKernel->instantiate(p2sInstance->getResultSet());
    102101
    103102    stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes to be sent to stdout.
  • icGREP/icgrep-devel/icgrep/kernels/instance.h

    r4992 r5000  
    55#include <kernels/kernel.h>
    66#include <util/slab_allocator.h>
     7#include <llvm/Support/raw_ostream.h>
    78
    89namespace kernel {
     
    1011class Instance {
    1112    friend class KernelBuilder;
     13    using InputStreamMap = KernelBuilder::InputStreamMap;
    1214    using Allocator = SlabAllocator<Instance>;
    1315public:
    1416
    15     void CreateDoBlockCall() {
    16         mDefinition->CreateDoBlockCall(mMemory);
    17     }
     17    llvm::Value * CreateDoBlockCall();
    1818
    1919    llvm::Value * getInternalState(const std::string & name) {
    20         return mDefinition->getInternalState(mMemory, name);
     20        return mDefinition->getInternalState(mKernelState, name);
    2121    }
    2222
    2323    void setInternalState(const std::string & name, llvm::Value * value) {
    24         mDefinition->setInternalState(mMemory, name, value);
     24        mDefinition->setInternalState(mKernelState, name, value);
    2525    }
    2626
    2727    llvm::Value * getInternalState(const unsigned index) {
    28         return mDefinition->getInternalState(mMemory, index);
     28        return getInternalState(iBuilder->getInt32(index));
    2929    }
    3030
    3131    llvm::Value * getInternalState(llvm::Value * const index) {
    32         return mDefinition->getInternalState(mMemory, index);
     32        return mDefinition->getInternalState(mKernelState, index);
    3333    }
    3434
    3535    void setInternalState(const unsigned index, llvm::Value * value) {
    36         mDefinition->setInternalState(mMemory, index, value);
     36        setInternalState(iBuilder->getInt32(index), value);
    3737    }
    3838
    3939    void setInternalState(llvm::Value * const index, llvm::Value * value) {
    40         mDefinition->setInternalState(mMemory, index, value);
     40        mDefinition->setInternalState(mKernelState, index, value);
     41    }
     42
     43    inline llvm::Value * getInputStreamSet(const unsigned streamOffset = 0) {
     44        return getStreamSet(mDefinition->getInputStreamType(), mInputStreamSet, streamOffset, mInputBufferSize);
    4145    }
    4246
    4347    llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0) {
    44         return mDefinition->getInputStream(mMemory, index, streamOffset);
     48        return getInputStream(iBuilder->getInt32(index), streamOffset);
    4549    }
    4650
    4751    llvm::Value * getInputStream(llvm::Value * const index, const unsigned streamOffset = 0) {
    48         return mDefinition->getInputStream(mMemory, index, streamOffset);
     52        return mDefinition->getInputStream(getInputStreamSet(streamOffset), index);
    4953    }
    5054
     
    5458
    5559    llvm::Value * getInputScalar(const unsigned index) {
    56         return mDefinition->getInputScalar(mMemory, index);
     60        return getInputScalar(iBuilder->getInt32(index));
    5761    }
    5862
    5963    llvm::Value * getInputScalar(llvm::Value * const index) {
    60         return mDefinition->getInputScalar(mMemory, index);
     64        return mDefinition->getInputScalar(mInputScalarSet, index);
     65    }
     66
     67    llvm::Type * getInputScalarType() const {
     68        return mDefinition->getInputScalarType();
     69    }
     70
     71    inline llvm::Value * getOutputStreamSet(const unsigned streamOffset = 0) {
     72        return getStreamSet(mDefinition->getOutputStreamType(), mOutputStreamSet, streamOffset, mOutputBufferSize);
    6173    }
    6274
    6375    llvm::Value * getOutputStream(const unsigned index, const unsigned streamOffset = 0) {
    64         return mDefinition->getOutputStream(mMemory, index, streamOffset);
     76        return getOutputStream(iBuilder->getInt32(index), streamOffset);
    6577    }
    6678
    6779    llvm::Value * getOutputStream(llvm::Value * const index, const unsigned streamOffset = 0) {
    68         return mDefinition->getOutputStream(mMemory, index, streamOffset);
     80        return mDefinition->getOutputStream(getOutputStreamSet(streamOffset), index);
    6981    }
    7082
    71     void clearOutputStreamSet(const unsigned streamOffset = 0) {
    72         mDefinition->clearOutputStreamSet(mMemory, streamOffset);
    73     }
    74 
    75     inline std::pair<llvm::Value *, unsigned> getOutputStreamSet() const {
    76         return std::make_pair(mMemory, mDefinition->getBufferSize());
    77     }
     83    void clearOutputStreamSet();
    7884
    7985    llvm::Value * getOutputScalar(const unsigned index) {
    80         return mDefinition->getOutputScalar(mMemory, index);
     86        return getOutputScalar(iBuilder->getInt32(index));
    8187    }
    8288
    8389    llvm::Value * getOutputScalar(llvm::Value * const index) {
    84         return mDefinition->getOutputScalar(mMemory, index);
     90        return mDefinition->getOutputScalar(mOutputScalarSet, index);
    8591    }
    8692
    8793    llvm::Value * getBlockNo() {
    88         return mDefinition->getBlockNo(mMemory);
     94        return mDefinition->getBlockNo(mKernelState);
    8995    }
    9096
    91     unsigned getBufferSize() const {
    92         return mDefinition->getBufferSize();
     97    inline std::pair<llvm::Value *, unsigned> getResultSet() const {
     98        return std::make_pair(mOutputStreamSet, mOutputBufferSize);
    9399    }
    94100
     
    103109protected:
    104110
    105     Instance(KernelBuilder * definition, llvm::AllocaInst * space)
     111    Instance(KernelBuilder * const definition, llvm::Value * const kernelState,
     112             llvm::Value * const inputScalarSet, llvm::Value * const inputStreamSet, const unsigned inputBufferSize,
     113             llvm::Value * const outputScalarSet, llvm::Value * const outputStreamSet, const unsigned outputBufferSize)
    106114    : mDefinition(definition)
    107     , mMemory(space) {
     115    , iBuilder(definition->iBuilder)
     116    , mKernelState(kernelState)
     117    , mInputScalarSet(inputScalarSet)
     118    , mInputStreamSet(inputStreamSet)
     119    , mInputBufferSize(inputBufferSize)
     120    , mOutputScalarSet(outputScalarSet)
     121    , mOutputStreamSet(outputStreamSet)
     122    , mOutputBufferSize(outputBufferSize) {
    108123
    109124    }
    110125
     126    llvm::Value * getStreamSet(Type * const type, llvm::Value * const base, const unsigned index, const unsigned bufferSize);
     127
    111128private:
    112     KernelBuilder * const mDefinition;
    113     llvm::AllocaInst * const mMemory;
    114     static Allocator mAllocator;
     129    KernelBuilder * const                           mDefinition;
     130    IDISA::IDISA_Builder * const                    iBuilder;
     131    llvm::Value * const                             mKernelState;
     132    llvm::Value * const                             mInputScalarSet;
     133    llvm::Value * const                             mInputStreamSet;
     134    const unsigned                                  mInputBufferSize;
     135    llvm::Value * const                             mOutputScalarSet;
     136    llvm::Value * const                             mOutputStreamSet;
     137    const unsigned                                  mOutputBufferSize;
     138    static Allocator                                mAllocator;
    115139};
    116140
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r4995 r5000  
    1515using namespace pablo;
    1616
    17 inline bool isPowerOfTwo(const unsigned x) {
    18     return (x != 0) && (x & (x - 1)) == 0;
    19 }
    20 
    2117namespace kernel {
    2218
    23 enum : unsigned {
    24     INTERNAL_STATE = 0
    25     , INPUT_STREAM_SET = 1
    26     , OUTPUT_STREAM_SET = 2
    27     , OUTPUT_SCALAR_SET = 3
    28 };
    29 
    3019// sets name & sets internal state to the kernel superclass state
    31 KernelBuilder::KernelBuilder(std::string name, Module * m, IDISA::IDISA_Builder * b, const unsigned bufferSize)
    32 : mMod(m)
    33 , iBuilder(b)
     20KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder, std::string && name, const unsigned defaultBufferSize)
     21: iBuilder(builder)
    3422, mKernelName(name)
    35 , mBitBlockType(b->getBitBlockType())
    36 , mBufferSize(bufferSize)
     23, mDefaultBufferSize(defaultBufferSize)
     24, mBitBlockType(builder->getBitBlockType())
    3725, mBlockNoIndex(0) {
    38     assert (mBufferSize > 0);
    39     mBlockNoIndex = addInternalState(b->getInt64Ty(), "BlockNo");
    40 }
    41 
    42 SlabAllocator<Instance> Instance::mAllocator; // static allocator declaration; should probably be in a "instance.cpp"
     26    assert (mDefaultBufferSize > 0);
     27    mBlockNoIndex = iBuilder->getInt32(addInternalState(builder->getInt64Ty(), "BlockNo"));
     28}
    4329
    4430/** ------------------------------------------------------------------------------------------------------------- *
     
    5743    }
    5844    const unsigned index = addInternalState(type);
    59     mInternalStateNameMap.emplace(name, index);
     45    mInternalStateNameMap.emplace(name, iBuilder->getInt32(index));
    6046    return index;
    6147}
     
    6450 * @brief getInternalState
    6551 ** ------------------------------------------------------------------------------------------------------------- */
    66 Value * KernelBuilder::getInternalState(Value * const instance, const unsigned index) {
    67     assert (index < mInternalState.size());
    68     return getInternalState(instance, iBuilder->getInt32(index));
    69 }
    70 
    71 Value * KernelBuilder::getInternalState(Value * const instance, disable_implicit_conversion<Value *> index) {
    72     assert (index->getType()->isIntegerTy());
    73     return iBuilder->CreateGEP(instance, {iBuilder->getInt64(0), iBuilder->getInt32(INTERNAL_STATE), index});
    74 }
    75 
    76 Value * KernelBuilder::getInternalState(Value * const instance, const std::string & name) {
     52Value * KernelBuilder::getInternalState(Value * const kernelState, disable_implicit_conversion<Value *> index) {
     53    assert (index->getType()->isIntegerTy());   
     54    assert (kernelState->getType()->getPointerElementType() == mKernelStateType);
     55    return iBuilder->CreateGEP(kernelState, {iBuilder->getInt32(0), index});
     56}
     57
     58Value * KernelBuilder::getInternalState(Value * const kernelState, const std::string & name) {
    7759    const auto f = mInternalStateNameMap.find(name);
    7860    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
    7961        throw std::runtime_error("Kernel does not contain internal state " + name);
    8062    }
    81     return getInternalState(instance, f->second);
     63    return getInternalState(kernelState, f->second);
    8264}
    8365
     
    8567 * @brief setInternalState
    8668 ** ------------------------------------------------------------------------------------------------------------- */
    87 void KernelBuilder::setInternalState(Value * const instance, const std::string & name, Value * const value) {
    88     Value * ptr = getInternalState(instance, name);
     69void KernelBuilder::setInternalState(Value * const kernelState, const std::string & name, Value * const value) {
     70    Value * ptr = getInternalState(kernelState, name);
    8971    assert (ptr->getType()->getPointerElementType() == value->getType());
    9072    if (value->getType() == iBuilder->getBitBlockType()) {
     
    9577}
    9678
    97 void KernelBuilder::setInternalState(Value * const instance, const unsigned index, Value * const value) {
    98     assert (index < mInternalState.size());
    99     return setInternalState(instance, iBuilder->getInt32(index), value);
    100 }
    101 
    102 void KernelBuilder::setInternalState(Value * const instance, disable_implicit_conversion<Value *> index, Value * const value) {
    103     Value * ptr = getInternalState(instance, index);
     79void KernelBuilder::setInternalState(Value * const kernelState, disable_implicit_conversion<Value *> index, Value * const value) {
     80    Value * ptr = getInternalState(kernelState, index);
    10481    assert (ptr->getType()->getPointerElementType() == value->getType());
    10582    if (value->getType() == iBuilder->getBitBlockType()) {
     
    130107 * @brief getInputStream
    131108 ** ------------------------------------------------------------------------------------------------------------- */
    132 Value * KernelBuilder::getInputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
    133     assert (index < mInputStream.size());
    134     return getInputStream(instance, iBuilder->getInt32(index), streamOffset);
    135 }
    136 
    137 Value * KernelBuilder::getInputStream(Value * const instance, disable_implicit_conversion<Value *> index, const unsigned streamOffset) {
    138     assert (instance && index);
    139     assert (index->getType()->isIntegerTy());
    140     Value * const inputStreamSet = iBuilder->CreateLoad(iBuilder->CreateGEP(instance,
    141         {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(0)}));
    142     Value * modFunction = iBuilder->CreateLoad(iBuilder->CreateGEP(instance,
    143         {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(1)}));
    144     Value * offset = iBuilder->CreateLoad(getBlockNo(instance));
    145     if (streamOffset) {
    146         offset = iBuilder->CreateAdd(offset, ConstantInt::get(offset->getType(), streamOffset));
    147     }
    148     if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || inputStreamSet->getType()->getPointerElementType()->isArrayTy())) {
    149         return iBuilder->CreateGEP(inputStreamSet, { iBuilder->CreateCall(modFunction, offset), index });
    150     } else {
    151         throw std::runtime_error("Cannot access the input stream with a non-constant value unless all input stream types are identical!");
    152     }
     109Value * KernelBuilder::getInputStream(Value * const inputStreamSet, disable_implicit_conversion<Value *> index) {
     110    assert ("Parameters cannot be null!" && (inputStreamSet != nullptr && index != nullptr));
     111    assert ("Stream index must be an integer!" && index->getType()->isIntegerTy());
     112    assert ("Illegal input stream set provided!" && inputStreamSet->getType()->getPointerElementType() == mInputStreamType);
     113    if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || getInputStreamType()->isArrayTy())) {
     114        return iBuilder->CreateGEP(inputStreamSet, { iBuilder->getInt32(0), index });
     115    }
     116    #ifndef NDEBUG
     117    iBuilder->getModule()->dump();
     118    #endif
     119    throw std::runtime_error("Cannot access the input stream with a non-constant value unless all input stream types are identical!");
    153120}
    154121
     
    169136 * @brief getInputScalar
    170137 ** ------------------------------------------------------------------------------------------------------------- */
    171 Value * KernelBuilder::getInputScalar(Value * const instance, const unsigned) {
    172     assert (instance);
    173     throw std::runtime_error("currently not supported!");
    174 }
    175 
    176 Value * KernelBuilder::getInputScalar(Value * const instance, disable_implicit_conversion<Value *>) {
    177     assert (instance);
     138Value * KernelBuilder::getInputScalar(Value * const inputScalarSet, disable_implicit_conversion<Value *>) {
     139    assert (inputScalarSet);
    178140    throw std::runtime_error("currently not supported!");
    179141}
     
    202164 * @brief getOutputStream
    203165 ** ------------------------------------------------------------------------------------------------------------- */
    204 Value * KernelBuilder::getOutputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
    205     assert (index < mOutputStream.size());
    206     return getOutputStream(instance, iBuilder->getInt32(index), streamOffset);
    207 }
    208 
    209 Value * KernelBuilder::getOutputStream(Value * const instance, disable_implicit_conversion<Value *> index, const unsigned streamOffset) {
    210     assert (instance && index);
    211     assert (index->getType()->isIntegerTy());
    212     if (LLVM_LIKELY(isa<ConstantInt>(index.get()))) {
    213         return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), getStreamOffset(instance, streamOffset), index});
    214     } else {
    215         Value * const outputStreamSet = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET)});
    216         if (LLVM_LIKELY(outputStreamSet->getType()->getPointerElementType()->isArrayTy())) {
    217             return iBuilder->CreateGEP(outputStreamSet, {getStreamOffset(instance, streamOffset), index});
    218         }
     166Value * KernelBuilder::getOutputStream(Value * const outputStreamSet, disable_implicit_conversion<Value *> index) {
     167    assert ("Parameters cannot be null!" && (outputStreamSet != nullptr && index != nullptr));
     168    assert ("Stream index must be an integer!" && index->getType()->isIntegerTy());
     169    assert ("Illegal output stream set provided!" && outputStreamSet->getType()->getPointerElementType() == getOutputStreamType());
     170    if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || getOutputStreamType()->isArrayTy())) {
     171        return iBuilder->CreateGEP(outputStreamSet, { iBuilder->getInt32(0), index });
    219172    }
    220173    throw std::runtime_error("Cannot access the output stream with a non-constant value unless all output stream types are identical!");
     
    224177 * @brief getOutputScalar
    225178 ** ------------------------------------------------------------------------------------------------------------- */
    226 Value * KernelBuilder::getOutputScalar(Value * const instance, const unsigned) {
     179Value * KernelBuilder::getOutputScalar(Value * const outputScalarSet, disable_implicit_conversion<Value *> ) {
    227180    throw std::runtime_error("currently not supported!");
    228181}
    229182
    230 Value * KernelBuilder::getOutputScalar(Value * const instance, disable_implicit_conversion<Value *> ) {
    231     throw std::runtime_error("currently not supported!");
    232 }
    233 
    234183/** ------------------------------------------------------------------------------------------------------------- *
    235184 * @brief packDataTypes
    236185 ** ------------------------------------------------------------------------------------------------------------- */
    237 llvm::Type * KernelBuilder::packDataTypes(const std::vector<llvm::Type *> & types) {
    238     bool canPackIntoArray = !types.empty();
     186Type * KernelBuilder::packDataTypes(const std::vector<llvm::Type *> & types) {
     187    if (types.empty()) {
     188        return nullptr;
     189    }
    239190    for (Type * type : types) {
    240191        if (type != types.front()) { // use canLosslesslyBitcastInto ?
    241             canPackIntoArray = false;
    242             break;
     192            return StructType::get(iBuilder->getContext(), types);
    243193        }
    244194    }
    245     if (canPackIntoArray) {
    246         return ArrayType::get(types.front(), types.size());
    247     } else {
    248         return StructType::get(mMod->getContext(), types);
    249     }
     195    return ArrayType::get(types.front(), types.size());
    250196}
    251197
     
    253199 * @brief prepareFunction
    254200 ** ------------------------------------------------------------------------------------------------------------- */
    255 Function * KernelBuilder::prepareFunction() {
    256 
    257     PointerType * modFunctionType = PointerType::get(FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false), 0);
    258     mInputStreamType = PointerType::get(packDataTypes(mInputStream), 0);
    259     mInputScalarType = PointerType::get(packDataTypes(mInputScalar), 0);
     201Function * KernelBuilder::prepareFunction(std::vector<unsigned> && inputStreamOffsets) {
     202
     203    mKernelStateType = StructType::create(iBuilder->getContext(), mInternalState, mKernelName);
     204    mInputScalarType = packDataTypes(mInputScalar);
     205    mInputStreamType = packDataTypes(mInputStream);
     206    mOutputScalarType = packDataTypes(mInputScalar);
    260207    mOutputStreamType = packDataTypes(mOutputStream);
    261     Type * outputScalarType = packDataTypes(mOutputScalar);
    262     Type * internalStateType = packDataTypes(mInternalState);
    263     Type * inputStateType = StructType::create(mMod->getContext(), { mInputStreamType, modFunctionType});
    264     Type * outputBufferType = ArrayType::get(mOutputStreamType, mBufferSize);
    265     mKernelStateType = StructType::create(mMod->getContext(), {internalStateType, inputStateType, outputBufferType, outputScalarType}, mKernelName);
    266 
    267     FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {PointerType::get(mKernelStateType, 0)}, false);
    268     mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
    269     mDoBlock->setCallingConv(CallingConv::C);   
    270     mDoBlock->setDoesNotCapture(1);
     208    mInputStreamOffsets = inputStreamOffsets;
     209
     210    std::vector<Type *> params;
     211    params.push_back(mKernelStateType->getPointerTo());
     212    if (mInputScalarType) {
     213        params.push_back(mInputScalarType->getPointerTo());
     214    }
     215    if (mInputStreamType) {
     216        for (unsigned i = 0; i < mInputStreamOffsets.size(); ++i) {
     217            params.push_back(mInputStreamType->getPointerTo());
     218        }
     219    }
     220    if (mOutputScalarType) {
     221        params.push_back(mOutputScalarType->getPointerTo());
     222    }
     223    if (mOutputStreamType) {
     224        params.push_back(mOutputStreamType->getPointerTo());
     225    }
     226
     227    // A pointer value is captured if the function makes a copy of any part of the pointer that outlives
     228    // the call (e.g., stored in a global or, depending on the context, when returned by the function.)
     229    // Since this does not occur in either our DoBlock or Constructor, all parameters are marked nocapture.
     230
     231    FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), params, false);
     232    mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", iBuilder->getModule());
     233    mDoBlock->setCallingConv(CallingConv::C);
     234    for (unsigned i = 1; i <= params.size(); ++i) {
     235        mDoBlock->setDoesNotCapture(i);
     236    }
    271237    mDoBlock->setDoesNotThrow();
    272 
    273238    Function::arg_iterator args = mDoBlock->arg_begin();
    274     mKernelState = args++;
    275     mKernelState->setName("this");
    276 
    277     iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mDoBlock, 0));
    278 
     239    mKernelStateParam = args++;
     240    mKernelStateParam->setName("this");
     241    if (mInputScalarType) {
     242        mInputScalarParam = args++;
     243        mInputScalarParam->setName("inputScalars");
     244    }
     245    if (mInputStreamType) {
     246        for (const unsigned offset : mInputStreamOffsets) {
     247            Value * const inputStreamSet = args++;
     248            inputStreamSet->setName("inputStreamSet" + std::to_string(offset));
     249            mInputStreamParam.emplace(offset, inputStreamSet);
     250        }
     251    }
     252    if (mOutputScalarType) {
     253        mOutputScalarParam = args++;
     254        mOutputScalarParam->setName("outputScalars");
     255    }
     256    if (mOutputStreamType) {
     257        mOutputStreamParam = args;
     258        mOutputStreamParam->setName("outputStreamSet");
     259    }
     260    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", mDoBlock, 0));
    279261    return mDoBlock;
    280262}
     
    284266 ** ------------------------------------------------------------------------------------------------------------- */
    285267void KernelBuilder::finalize() {
    286 
    287268    // Finish the actual function
    288269    Value * blockNo = getBlockNo();
     
    292273    iBuilder->CreateRetVoid();
    293274
    294     eliminateRedundantMemoryOperations(mDoBlock);
    295 
    296     // Generate the zero initializer
    297     PointerType * modFunctionType = PointerType::get(FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false), 0);
    298     FunctionType * constructorType = FunctionType::get(iBuilder->getVoidTy(), {PointerType::get(mKernelStateType, 0), mInputStreamType, modFunctionType}, false);
    299 
    300     mConstructor = Function::Create(constructorType, GlobalValue::ExternalLinkage, mKernelName + "_Constructor", mMod);
    301     mConstructor->setCallingConv(CallingConv::C);
    302     mDoBlock->setDoesNotCapture(1);
    303     mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
    304     mDoBlock->setDoesNotThrow();
    305 
    306     auto args = mConstructor->arg_begin();
    307     mKernelState = args++;
    308     mKernelState->setName("this");
    309     Value * const inputStream = args++;
    310     inputStream->setName("inputStream");
    311     Value * const modFunction = args++;
    312     modFunction->setName("modFunction");
    313 
    314     iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
    315     for (unsigned i = 0; i < mInternalState.size(); ++i) {
    316         Type * const type = mInternalState[i];
    317         if (type->isSized()) {
    318             setInternalState(i, Constant::getNullValue(type));
    319         } else {
    320             Value * const ptr = getInternalState(i);
    321             Value * const size = iBuilder->CreatePtrDiff(iBuilder->CreateGEP(ptr, iBuilder->getInt32(1)), ptr);
    322             iBuilder->CallPrintInt(mKernelName + "_zeroinit_" + std::to_string(i), size);
    323             iBuilder->CreateMemSet(ptr, iBuilder->getInt8(0), size, 4);
    324         }
    325     }
    326 
    327     Value * const input = iBuilder->CreateGEP(mKernelState, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET)});
    328     iBuilder->CreateStore(inputStream, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    329     iBuilder->CreateStore(modFunction, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    330     iBuilder->CreateRetVoid();
    331 
    332 //    if (mOutputStreamType->getStructNumElements()) {
    333 //        PointerType * outputStreamType = PointerType::get(mOutputStreamType, 0);
    334 //        FunctionType * type = FunctionType::get(outputStreamType, {outputStreamType, PointerType::get(blockNo->getType(), 0)}, false);
    335 //        mStreamSetFunction = Function::Create(type, Function::ExternalLinkage, mKernelName + "_StreamSet", mMod);
    336 //        auto arg = mStreamSetFunction->arg_begin();
    337 //        Value * stream = arg++;
    338 //        stream->setName("stream");
    339 //        mStreamSetFunction->addAttribute(1, Attribute::NoCapture);
    340 //        mStreamSetFunction->addAttribute(2, Attribute::NoCapture);
    341 //        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
    342 //        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
    343 //        Value * offset = arg;
    344 //        BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", mStreamSetFunction);
    345 //        iBuilder->SetInsertPoint(entry);
    346 //        if (mBufferSize != 1) {
    347 //            offset = iBuilder->CreateLoad(offset);
    348 //            if (isPowerOfTwo(mBufferSize)) {
    349 //                offset = iBuilder->CreateAnd(offset, iBuilder->getInt64(mBufferSize - 1));
    350 //            } else if (mBufferSize > 2) {
    351 //                offset = iBuilder->CreateURem(offset, iBuilder->getInt64(mBufferSize));
    352 //            }
    353 //            stream = iBuilder->CreateGEP(stream, offset);
    354 //        }
    355 //        iBuilder->CreateRet(stream);
    356 //    }
    357 
     275    mKernelStateParam = nullptr;
     276    mInputScalarParam = nullptr;
     277    mInputStreamParam.clear();
     278    mOutputScalarParam = nullptr;
     279    mOutputStreamParam = nullptr;
    358280    iBuilder->ClearInsertionPoint();
    359281}
    360282
    361283/** ------------------------------------------------------------------------------------------------------------- *
    362  * @brief eliminateRedundantMemoryOperations
    363  ** ------------------------------------------------------------------------------------------------------------- */
    364 inline void KernelBuilder::eliminateRedundantMemoryOperations(Function * const function) {
    365 
    366 
     284 * @brief instantiate
     285 *
     286 * Allocate and zero initialize the memory for this kernel and its output scalars and streams
     287 ** ------------------------------------------------------------------------------------------------------------- */
     288Instance * KernelBuilder::instantiate(std::pair<Value *, unsigned> && inputStreamSet, const unsigned outputBufferSize) {
     289    AllocaInst * const kernelState = iBuilder->CreateAlloca(mKernelStateType);
     290    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), kernelState);
     291    AllocaInst * outputScalars = nullptr;
     292    if (mOutputScalarType) {
     293        outputScalars = iBuilder->CreateAlloca(mOutputScalarType);
     294    }
     295    AllocaInst * outputStreamSets = nullptr;
     296    if (mOutputStreamType) {
     297        outputStreamSets = iBuilder->CreateAlloca(mOutputStreamType, iBuilder->getInt32(outputBufferSize));
     298    }
     299    return new Instance(this, kernelState, nullptr, std::get<0>(inputStreamSet), std::get<1>(inputStreamSet), outputScalars, outputStreamSets, outputBufferSize);
    367300}
    368301
     
    372305 * Generate a new instance of this kernel and call the default constructor to initialize it
    373306 ** ------------------------------------------------------------------------------------------------------------- */
    374 Instance * KernelBuilder::instantiate(std::pair<Value *, unsigned> && inputStream) {
    375     AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
    376     Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET)};
    377     Value * ptr = iBuilder->CreateGEP(std::get<0>(inputStream), indices);
    378     iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(ptr, mInputStreamType), CreateModFunction(std::get<1>(inputStream)));
    379     return new Instance(this, memory);
    380 }
    381 
    382 /** ------------------------------------------------------------------------------------------------------------- *
    383  * @brief instantiate
    384  *
    385  * Generate a new instance of this kernel and call the default constructor to initialize it
    386  ** ------------------------------------------------------------------------------------------------------------- */
    387 Instance * KernelBuilder::instantiate(llvm::Value * const inputStream) {
    388     AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
    389     iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStream, mInputStreamType), CreateModFunction(0));
    390     return new Instance(this, memory);
    391 }
    392 
    393 /** ------------------------------------------------------------------------------------------------------------- *
    394  * @brief instantiate
    395  *
    396  * Generate a new instance of this kernel and call the default constructor to initialize it
    397  ** ------------------------------------------------------------------------------------------------------------- */
    398 Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {
    399     if (mInputStreamType->getStructNumElements() != inputStreams.size()) {
    400         throw std::runtime_error(mKernelName + ".instantiate expected " + std::to_string(inputStreams.size()) +
    401                                  "elements but was given " + std::to_string(mInputStreamType->getStructNumElements()));
    402     }
    403     AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
    404     AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType, 0);
    405     unsigned i = 0;
    406     for (Value * inputStream : inputStreams) {
    407         Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
    408         iBuilder->CreateStore(inputStream, ptr);
    409     }
    410     iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStruct, mInputStreamType), CreateModFunction(0));
    411     return new Instance(this, memory);
    412 }
    413 
    414 /** ------------------------------------------------------------------------------------------------------------- *
    415  * @brief CreateDoBlockCall
    416  ** ------------------------------------------------------------------------------------------------------------- */
    417 void KernelBuilder::CreateDoBlockCall(Value * const instance) {
    418     assert (mDoBlock && instance);
    419     iBuilder->CreateCall(mDoBlock, instance);
    420 }
    421 
    422 /** ------------------------------------------------------------------------------------------------------------- *
    423  * @brief clearOutputStreamSet
    424  *
    425  * Zero out the i + streamOffset stream set memory, where i is the current stream set indicated by the BlockNo.
    426  ** ------------------------------------------------------------------------------------------------------------- */
    427 void KernelBuilder::clearOutputStreamSet(Value * const instance, const unsigned streamOffset) {
    428     Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), getStreamOffset(instance, streamOffset)};
    429     Value * ptr = iBuilder->CreateGEP(instance, indices);
    430     unsigned size = 0;
    431     for (unsigned i = 0; i < mOutputStreamType->getStructNumElements(); ++i) {
    432         size += mOutputStreamType->getStructElementType(i)->getPrimitiveSizeInBits();
    433     }
    434     iBuilder->CreateMemSet(ptr, iBuilder->getInt8(0), size / 8, 4);
    435 }
    436 
    437 /** ------------------------------------------------------------------------------------------------------------- *
    438  * @brief offset
    439  *
    440  * Compute the stream index of the given offset value.
    441  ** ------------------------------------------------------------------------------------------------------------- */
    442 Value * KernelBuilder::getStreamOffset(Value * const instance, const unsigned index) {
    443     Value * offset = nullptr;
    444     if (mBufferSize > 1) {
    445         offset = iBuilder->CreateLoad(getBlockNo(instance));
    446         if (index) {
    447             offset = iBuilder->CreateAdd(offset, iBuilder->getInt64(index));
    448         }
    449         if (isPowerOfTwo(mBufferSize)) {
    450             offset = iBuilder->CreateAnd(offset, iBuilder->getInt64(mBufferSize - 1));
    451         } else {
    452             offset = iBuilder->CreateURem(offset, iBuilder->getInt64(mBufferSize));
    453         }
    454     } else {
    455         offset = iBuilder->getInt64(index);
    456     }
    457     return offset;
    458 }
    459 
    460 /** ------------------------------------------------------------------------------------------------------------- *
    461  * @brief CreateModFunction
    462  *
    463  * Generate a "modulo" function that dictates the local offset of a given blockNo
    464  ** ------------------------------------------------------------------------------------------------------------- */
    465 inline Function * KernelBuilder::CreateModFunction(const unsigned size) {
    466     const std::string name((size == 0) ? "continuous" : "finite" + std::to_string(size));
    467     Function * function = mMod->getFunction(name);
    468     if (function) {
    469         return function;
    470     }
    471     const auto ip = iBuilder->saveIP();
    472     FunctionType * type = FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false);
    473     function = Function::Create(type, Function::ExternalLinkage, name, mMod);
    474     Value * offset = function->arg_begin();
    475     offset->setName("index");
    476     BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", function);
    477     iBuilder->SetInsertPoint(entry);
    478     if (size) {
    479         if (size == 1) {
    480             offset = iBuilder->getInt64(0);
    481         } else if (isPowerOfTwo(size)) {
    482             offset = iBuilder->CreateAnd(offset, iBuilder->getInt64(size - 1));
    483         } else {
    484             offset = iBuilder->CreateURem(offset, iBuilder->getInt64(size));
    485         }
    486     }
    487     iBuilder->CreateRet(offset);
    488     iBuilder->restoreIP(ip);
    489     return function;
     307Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {   
     308    throw std::runtime_error("Not supported!");
     309//    AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType);
     310//    unsigned i = 0;
     311//    for (Value * inputStream : inputStreams) {
     312//        Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
     313//        iBuilder->CreateStore(iBuilder->CreatePointerCast(inputStream, ptr);
     314//    }
     315//    return instantiate(std::make_pair(inputStruct, 0));
    490316}
    491317
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r4995 r5000  
    99#include <vector>
    1010#include <boost/container/flat_map.hpp>
     11#include <IDISA/idisa_builder.h>
    1112
    1213namespace llvm {
     
    2829}
    2930
    30 namespace IDISA {
    31     class IDISA_Builder;
    32 }
    33 
    3431namespace kernel {
    3532
     
    3936    friend class Instance;
    4037    friend llvm::Function * generateScanWordRoutine(llvm::Module *, IDISA::IDISA_Builder *, unsigned, KernelBuilder *, bool);
    41     using NameMap = boost::container::flat_map<std::string, unsigned>;
     38    using InputStreamMap = boost::container::flat_map<unsigned, llvm::Value *>;
     39    using NameMap = boost::container::flat_map<std::string, llvm::ConstantInt *>;
    4240public:
    43     // sets name & sets internal state to the kernel superclass state
    44     KernelBuilder(std::string name, llvm::Module * m, IDISA::IDISA_Builder * b, const unsigned bufferSize = 1);
     41
     42    KernelBuilder(IDISA::IDISA_Builder * builder, std::string && name, const unsigned defaultBufferSize);
    4543
    4644    template<typename T>
    4745    struct disable_implicit_conversion {
    48         inline disable_implicit_conversion(T const value) : _value(value) {}
     46        inline disable_implicit_conversion(T const value) : _value(value) { assert(_value); }
    4947        inline disable_implicit_conversion(std::nullptr_t) = delete;
    5048        inline disable_implicit_conversion(unsigned) = delete;
     
    6866    unsigned addOutputScalar(llvm::Type * const type);
    6967
    70     llvm::Function * prepareFunction();
     68    inline llvm::Function * prepareFunction() {
     69        return prepareFunction({0});
     70    }
     71
     72    llvm::Function * prepareFunction(std::vector<unsigned> && inputStreamOffsets);
     73
     74    inline llvm::Value * getInternalState(const std::string & name) {
     75        return getInternalState(mKernelStateParam, name);
     76    }
     77
     78    inline void setInternalState(const std::string & name, llvm::Value * value) {
     79        setInternalState(mKernelStateParam, name, value);
     80    }
     81
     82    inline llvm::Value * getInternalState(const unsigned index) {
     83        assert (index < mInternalState.size());
     84        return getInternalState(mKernelStateParam, iBuilder->getInt32(index));
     85    }
     86
     87    inline llvm::Value * getInternalState(disable_implicit_conversion<llvm::Value *> const index) {
     88        return getInternalState(mKernelStateParam, index);
     89    }
     90
     91    void setInternalState(const unsigned index, llvm::Value * value) {
     92        assert (index < mInternalState.size());
     93        setInternalState(mKernelStateParam, iBuilder->getInt32(index), value);
     94    }
     95
     96    void setInternalState(disable_implicit_conversion<llvm::Value *> const index, llvm::Value * value) {
     97        setInternalState(mKernelStateParam, index, value);
     98    }
     99    inline llvm::Type * getKernelStateType() const{
     100        return mKernelStateType;
     101    }
    71102
    72103    inline llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0) {
    73         return getInputStream(mKernelState, index, streamOffset);
     104        assert (index < getNumOfInputStreams());
     105        return getInputStream(iBuilder->getInt32(index), streamOffset);
    74106    }
    75107
    76108    inline llvm::Value * getInputStream(disable_implicit_conversion<llvm::Value *> index, const unsigned streamOffset = 0) {
    77         return getInputStream(mKernelState, index, streamOffset);
     109        const auto f = mInputStreamParam.find(streamOffset);
     110        if (LLVM_UNLIKELY(f == mInputStreamParam.end())) {
     111            throw std::runtime_error("Kernel compilation error: No input stream parameter for stream offset " + std::to_string(streamOffset));
     112        }
     113        return getInputStream(f->second, index);
     114    }
     115
     116    inline unsigned getNumOfInputStreams() const {
     117        return mInputStream.size();
     118    }
     119
     120    inline llvm::Type * getInputStreamType() const {
     121        return mInputStreamType;
    78122    }
    79123
    80124    inline llvm::Value * getInputScalar(const unsigned index) {
    81         return getInputScalar(mKernelState, index);
     125        assert (index < getNumOfInputScalars());
     126        return getInputScalar(iBuilder->getInt32(index));
    82127    }
    83128
    84129    inline llvm::Value * getInputScalar(disable_implicit_conversion<llvm::Value *> const index) {
    85         return getInputScalar(mKernelState, index);
    86     }
    87 
    88     llvm::Value * getInternalState(const std::string & name) {
    89         return getInternalState(mKernelState, name);
    90     }
    91 
    92     void setInternalState(const std::string & name, llvm::Value * value) {
    93         setInternalState(mKernelState, name, value);
    94     }
    95 
    96     llvm::Value * getInternalState(const unsigned index) {
    97         return getInternalState(mKernelState, index);
    98     }
    99 
    100     llvm::Value * getInternalState(disable_implicit_conversion<llvm::Value *> const index) {
    101         return getInternalState(mKernelState, index);
    102     }
    103 
    104     void setInternalState(const unsigned index, llvm::Value * value) {
    105         setInternalState(mKernelState, index, value);
    106     }
    107 
    108     void setInternalState(disable_implicit_conversion<llvm::Value *> const index, llvm::Value * value) {
    109         setInternalState(mKernelState, index, value);
    110     }
    111 
    112     llvm::Value * getOutputStream(const unsigned index, const unsigned streamOffset = 0) {
    113         return getOutputStream(mKernelState, index, streamOffset);
    114     }
    115 
    116     llvm::Value * getOutputStream(disable_implicit_conversion<llvm::Value *> const index, const unsigned streamOffset = 0) {
    117         return getOutputStream(mKernelState, index, streamOffset);
     130        return getInputScalar(mInputScalarParam, index);
     131    }
     132
     133    inline unsigned getNumOfInputScalars() const {
     134        return mInputScalar.size();
     135    }
     136
     137    inline llvm::Type * getInputScalarType() const {
     138        return mInputScalarType;
     139    }
     140
     141    inline llvm::Value * getOutputStream(const unsigned index) {
     142        assert (index < getNumOfOutputStreams());
     143        return getOutputStream(mOutputStreamParam, iBuilder->getInt32(index));
     144    }
     145
     146    inline llvm::Value * getOutputStream(disable_implicit_conversion<llvm::Value *> const index) {
     147        return getOutputStream(mOutputStreamParam, index);
    118148    }
    119149
     
    122152    }
    123153
    124     llvm::Value * getOutputScalar(const unsigned index) {
    125         return getOutputScalar(mKernelState, index);
    126     }
    127 
    128     llvm::Value * getOutputScalar(disable_implicit_conversion<llvm::Value *> const index) {
    129         return getOutputScalar(mKernelState, index);
     154    inline llvm::Type * getOutputStreamType() const {
     155        return mOutputStreamType;
     156    }
     157
     158    inline llvm::Value * getOutputScalar(const unsigned index) {
     159        assert (index < getNumOfOutputScalars());
     160        return getOutputScalar(mOutputScalarParam, iBuilder->getInt32(index));
     161    }
     162
     163    inline llvm::Value * getOutputScalar(disable_implicit_conversion<llvm::Value *> const index) {
     164        return getOutputScalar(mOutputScalarParam, index);
    130165    }
    131166
     
    134169    }
    135170
    136     llvm::Value * getBlockNo() {
    137         return getBlockNo(mKernelState);
    138     }
    139 
    140     llvm::Type * getInputStreamType() const;
    141 
    142     void setInputBufferSize(const unsigned bufferSize);
    143 
    144     unsigned getInputBufferSize() const;
    145 
    146     unsigned getBufferSize() const;
     171    inline llvm::Type * getOutputScalarType() const {
     172        return mOutputStreamType;
     173    }
     174
     175    inline llvm::Value * getBlockNo() {
     176        return getBlockNo(mKernelStateParam);
     177    }
     178
     179    unsigned getDefaultBufferSize() const;
    147180
    148181    void finalize();
    149182
    150     kernel::Instance * instantiate(llvm::Value * const inputStream);
     183    kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStreamSet) {
     184        return instantiate(std::move(inputStreamSet), getDefaultBufferSize());
     185    }
     186
     187    kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStreamSet, const unsigned outputBufferSize);
     188
     189    kernel::Instance * instantiate(llvm::Value * const inputStream) {
     190        return instantiate(std::make_pair(inputStream, 0));
     191    }
    151192
    152193    kernel::Instance * instantiate(std::initializer_list<llvm::Value *> inputStreams);
    153194
    154     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStream);
    155 
    156     llvm::Type * getKernelStateType() const;
    157 
    158195    llvm::Value * getKernelState() const;
    159196
    160197    llvm::Function * getDoBlockFunction() const;
    161198
    162     void clearOutputStreamSet(llvm::Value * const instance, const unsigned streamOffset = 0);
    163 
    164199protected:
    165200
    166     llvm::Type * packDataTypes(const std::vector<llvm::Type *> & types);
    167 
    168     llvm::Value * getInputStream(llvm::Value * const instance, const unsigned index, const unsigned streamOffset);
    169 
    170     llvm::Value * getInputStream(llvm::Value * const instance, disable_implicit_conversion<llvm::Value *> index, const unsigned streamOffset);
    171 
    172     llvm::Value * getInputScalar(llvm::Value * const instance, const unsigned index);
    173 
    174     llvm::Value * getInputScalar(llvm::Value * const instance, disable_implicit_conversion<llvm::Value *> index);
    175 
    176     llvm::Value * getInternalState(llvm::Value * const instance, const std::string & name);
    177 
    178     void setInternalState(llvm::Value * const instance, const std::string & name, llvm::Value * const value);
    179 
    180     llvm::Value * getInternalState(llvm::Value * const instance, const unsigned index);
    181 
    182     llvm::Value * getInternalState(llvm::Value * const instance, disable_implicit_conversion<llvm::Value *> index);
    183 
    184     void setInternalState(llvm::Value * const instance, const unsigned index, llvm::Value * const value);
    185 
    186     void setInternalState(llvm::Value * const instance, disable_implicit_conversion<llvm::Value *> index, llvm::Value * const value);
    187 
    188     llvm::Value * getOutputStream(llvm::Value * const instance, const unsigned index, const unsigned streamOffset);
    189 
    190     llvm::Value * getOutputStream(llvm::Value * const instance, disable_implicit_conversion<llvm::Value *> index, const unsigned streamOffset);
    191 
    192     llvm::Value * getOutputScalar(llvm::Value * const instance, const unsigned index);
    193 
    194     llvm::Value * getOutputScalar(llvm::Value * const instance, disable_implicit_conversion<llvm::Value *> index);
    195 
    196     llvm::Value * getStreamOffset(llvm::Value * const instance, const unsigned index);
     201    Type * packDataTypes(const std::vector<llvm::Type *> & types);
     202
     203    llvm::Value * getInputStream(llvm::Value * const inputStreamSet, disable_implicit_conversion<llvm::Value *> index);
     204
     205    llvm::Value * getInputScalar(llvm::Value * const inputScalarSet, disable_implicit_conversion<llvm::Value *> index);
     206
     207    llvm::Value * getInternalState(llvm::Value * const kernelState, const std::string & name);
     208
     209    void setInternalState(llvm::Value * const kernelState, const std::string & name, llvm::Value * const value);
     210
     211    llvm::Value * getInternalState(llvm::Value * const kernelState, disable_implicit_conversion<llvm::Value *> index);
     212
     213    void setInternalState(llvm::Value * const kernelState, const unsigned index, llvm::Value * const value);
     214
     215    void setInternalState(llvm::Value * const kernelState, disable_implicit_conversion<llvm::Value *> index, llvm::Value * const value);
     216
     217    llvm::Value * getOutputStream(llvm::Value * const outputStreamSet, disable_implicit_conversion<llvm::Value *> index);
     218
     219    llvm::Value * getOutputScalar(llvm::Value * const outputScalarSet, disable_implicit_conversion<llvm::Value *> index);
    197220
    198221    llvm::Value * getBlockNo(llvm::Value * const instance);
     
    200223    llvm::Function * getOutputStreamSetFunction() const;
    201224
    202     void CreateDoBlockCall(llvm::Value * const instance);
    203 
    204     llvm::Function * CreateModFunction(const unsigned size);
    205 
    206     void eliminateRedundantMemoryOperations(llvm::Function * const function);
     225    const std::vector<unsigned> & getInputStreamOffsets() const {
     226        return mInputStreamOffsets;
     227    }
    207228
    208229private:
    209     llvm::Module *                      mMod;
    210     IDISA::IDISA_Builder *              iBuilder;
    211     std::string                                                 mKernelName;
     230
     231    IDISA::IDISA_Builder * const        iBuilder;
     232    const std::string                   mKernelName;
     233    unsigned                            mDefaultBufferSize;
     234
    212235    llvm::Type *                        mBitBlockType;
     236    llvm::ConstantInt *                 mBlockNoIndex;
    213237    llvm::Function *                                    mConstructor;
    214238    llvm::Function *                                    mDoBlock;
    215239
    216     unsigned                            mBufferSize;
    217 
    218240    llvm::Type *                        mKernelStateType;
     241    llvm::Type *                        mInputScalarType;
    219242    llvm::Type *                        mInputStreamType;
    220     llvm::Type *                        mInputScalarType;
     243    llvm::Type *                        mOutputScalarType;
    221244    llvm::Type *                        mOutputStreamType;
    222245
    223     llvm::Value *                       mKernelState;
    224     unsigned                            mBlockNoIndex;
    225 
     246    llvm::Value *                       mKernelStateParam;
     247    llvm::Value *                       mInputScalarParam;
     248    InputStreamMap                      mInputStreamParam;
     249    llvm::Value *                       mOutputScalarParam;
     250    llvm::Value *                       mOutputStreamParam;
     251
     252    std::vector<llvm::Type *>           mInputScalar;
     253    std::vector<std::string>            mInputScalarName;   
    226254    std::vector<llvm::Type *>           mInputStream;
    227255    std::vector<std::string>            mInputStreamName;
    228     std::vector<llvm::Type *>           mInputScalar;
    229     std::vector<std::string>            mInputScalarName;   
     256    std::vector<unsigned>               mInputStreamOffsets;
     257    std::vector<llvm::Type *>           mOutputScalar;
    230258    std::vector<llvm::Type *>           mOutputStream;
    231     std::vector<llvm::Type *>           mOutputScalar;
    232259    std::vector<llvm::Type *>                   mInternalState;
    233260    NameMap                             mInternalStateNameMap;
     
    238265}
    239266
    240 inline llvm::Type * KernelBuilder::getKernelStateType() const{
    241     return mKernelStateType;
    242 }
    243 
    244267inline llvm::Value * KernelBuilder::getKernelState() const {
    245     return mKernelState;
    246 }
    247 
    248 inline llvm::Type * KernelBuilder::getInputStreamType() const {
    249     return mInputStreamType;
     268    return mKernelStateParam;
    250269}
    251270
     
    254273}
    255274
    256 inline unsigned KernelBuilder::getBufferSize() const {
    257     return mBufferSize;
     275inline unsigned KernelBuilder::getDefaultBufferSize() const {
     276    return mDefaultBufferSize;
    258277}
    259278
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r4991 r5000  
    3737
    3838void PipelineBuilder::CreateKernels(PabloFunction * function, bool isNameExpression){
    39     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, SegmentSize);
    40     mICgrepKernel = new KernelBuilder("icgrep", mMod, iBuilder, SegmentSize);
    41     mScanMatchKernel = new KernelBuilder("scanMatch", mMod, iBuilder, SegmentSize);
     39    mS2PKernel = new KernelBuilder(iBuilder, "s2p", SegmentSize);
     40    mICgrepKernel = new KernelBuilder(iBuilder, "icgrep", SegmentSize);
     41    mScanMatchKernel = new KernelBuilder(iBuilder, "scanMatch", SegmentSize);
    4242    generateS2PKernel(mMod, iBuilder, mS2PKernel);
    4343    generateScanMatch(mMod, iBuilder, 64, mScanMatchKernel, isNameExpression);
     
    9595
    9696    Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
    97     Instance * icGrepInstance = mICgrepKernel->instantiate(s2pInstance->getOutputStreamSet());
    98     Instance * scanMatchInstance = mScanMatchKernel->instantiate(icGrepInstance->getOutputStreamSet());
     97    Instance * icGrepInstance = mICgrepKernel->instantiate(s2pInstance->getResultSet());
     98    Instance * scanMatchInstance = mScanMatchKernel->instantiate(icGrepInstance->getResultSet());
    9999
    100100    Value * ptr = iBuilder->CreateBitCast(inputStream, int8PtrTy);
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.cpp

    r4995 r5000  
    183183    Type * const transposedVectorType = VectorType::get(iBuilder->getInt8Ty(), iBuilder->getBitBlockWidth() / 8);
    184184
    185     unsigned minKeyLength = 0;
    186 
    187185    Type * startArrayType = ArrayType::get(iBuilder->getInt32Ty(), iBuilder->getBitBlockWidth() + gatherCount);
    188186    Type * endArrayType = ArrayType::get(iBuilder->getInt32Ty(), gatherCount);
    189187    Type * groupType = StructType::get(iBuilder->getInt32Ty(), startArrayType, iBuilder->getInt32Ty(), endArrayType, nullptr);
    190188    const unsigned baseIdx = kBuilder->addInternalState(iBuilder->getInt8PtrTy(), "Base");
    191     const unsigned positionArrayIdx = kBuilder->addInternalState(ArrayType::get(groupType, endpoints.size()), "Positions");
     189    const unsigned gatherPositionArrayIdx = kBuilder->addInternalState(ArrayType::get(groupType, endpoints.size()), "Positions");
    192190
    193191    for (unsigned maxKeyLength : endpoints) {
    194192        kBuilder->addInputStream(1, "startStream" + std::to_string(maxKeyLength));
    195193        kBuilder->addInputStream(1, "endStream" + std::to_string(maxKeyLength));
    196         kBuilder->addOutputStream(((maxKeyLength + 3) / 4) * 4);
     194        kBuilder->addOutputStream(4); // ((maxKeyLength + 3) / 4) * 4
    197195    }
    198196    kBuilder->addInputStream(1, "startStreamN");
     
    222220    BasicBlock * exit = BasicBlock::Create(mMod->getContext(), "exit", function, 0);
    223221
     222
     223    // ENTRY BLOCK
     224    iBuilder->SetInsertPoint(entry);
    224225    Type * const int32PtrTy = PointerType::get(iBuilder->getInt32Ty(), 0);
    225     FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {iBuilder->getInt8PtrTy(), int32PtrTy, int32PtrTy, iBuilder->getInt32Ty(), int32PtrTy}, false);
    226     Value * const gatherFunctionPtrArray = iBuilder->CreateAlloca(PointerType::get(functionType, 0), iBuilder->getInt32(endpoints.size()));
     226    FunctionType * const gatherFunctionType = FunctionType::get(iBuilder->getVoidTy(), {iBuilder->getInt8PtrTy(), int32PtrTy, int32PtrTy, iBuilder->getInt32Ty(), iBuilder->getInt8PtrTy()}, false);
     227    Value * const gatherFunctionPtrArray = iBuilder->CreateAlloca(PointerType::get(gatherFunctionType, 0), iBuilder->getInt32(endpoints.size()));
     228
    227229    unsigned i = 0;
    228     minKeyLength = 0;
     230    unsigned minKeyLength = 0;
    229231    for (unsigned maxKeyLength : endpoints) {
    230         const unsigned minCount = (minKeyLength / 4);
    231         const unsigned maxCount = ((maxKeyLength + 3) / 4);
    232         Value * ptr = iBuilder->CreateGEP(gatherFunctionPtrArray, iBuilder->getInt32(i++));
    233         iBuilder->CreateStore(generateGatherFunction(transposedVectorType, minCount, maxCount), ptr);
     232        Function * f = generateGatherFunction(minKeyLength, maxKeyLength, transposedVectorType);
     233        mGatherFunction.push_back(f);
     234        iBuilder->CreateStore(f, iBuilder->CreateGEP(gatherFunctionPtrArray, iBuilder->getInt32(i++)));
    234235        minKeyLength = maxKeyLength;
    235236    }
     
    237238    //TODO: this won't work on files > 2^32 bytes yet; needs an intermediate flush then a recalculation of the base pointer.
    238239    Value * const base = iBuilder->CreateLoad(kBuilder->getInternalState(baseIdx), "base");
    239     Value * const positionArray = kBuilder->getInternalState(positionArrayIdx);
     240    Value * const positionArray = kBuilder->getInternalState(gatherPositionArrayIdx);
    240241
    241242    Value * blockPos = iBuilder->CreateLoad(kBuilder->getBlockNo());
     
    255256    // if two positions cannot be in the same vector element, we could possibly do some work in parallel here.
    256257
    257     iBuilder->CallPrintInt(" ---- groupIV ---- ", groupIV);
    258 
    259258    Value * index = iBuilder->CreateMul(groupIV, iBuilder->getInt32(2));
    260259    Value * startStreamPtr = kBuilder->getInputStream(index);
    261260    Value * startStream = iBuilder->CreateBlockAlignedLoad(startStreamPtr);
    262     iBuilder->CallPrintRegister("startStream", startStream);
    263261    startStream = iBuilder->CreateBitCast(startStream, scanWordVectorType, "startStream");
    264262
     
    266264    Value * endStreamPtr = kBuilder->getInputStream(index);
    267265    Value * endStream = iBuilder->CreateBlockAlignedLoad(endStreamPtr);
    268     iBuilder->CallPrintRegister("endStream", endStream);
    269266    endStream = iBuilder->CreateBitCast(endStream, scanWordVectorType, "endStream");
    270267
     
    275272    Value * endIndex = iBuilder->CreateLoad(endIndexPtr, "endIndex");
    276273    Value * endArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(3)}, "endArray");
    277 
    278     Value * const buffer = kBuilder->getOutputStream(groupIV);
    279274
    280275    iBuilder->CreateBr(startOuterCond);
     
    313308    startFieldPhi->addIncoming(generateResetLowestBit(iBuilder, startFieldPhi), startInnerBody);
    314309    startPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(startPos, startBlockOffset), iBuilder->getInt32Ty());
    315     Value * startAddr = iBuilder->CreateGEP(startArray, {iBuilder->getInt32(0), startIndexPhi3});
    316     iBuilder->CallPrintInt("> startIndex ", startIndexPhi3);
    317     iBuilder->CallPrintInt("> startPos ", startPos);
    318     iBuilder->CreateStore(startPos, startAddr);
     310    iBuilder->CreateStore(startPos, iBuilder->CreateGEP(startArray, {iBuilder->getInt32(0), startIndexPhi3}));
    319311    startIndexPhi3->addIncoming(iBuilder->CreateAdd(startIndexPhi3, ConstantInt::get(startIndexPhi3->getType(), 1)), startInnerBody);
    320312    iBuilder->CreateBr(startInnerCond);
     
    362354    endFieldPhi->addIncoming(updatedEndFieldPhi, gather);
    363355    endPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(endPos, endBlockOffset), iBuilder->getInt32Ty());
    364     Value * endAddr = iBuilder->CreateGEP(endArray, {iBuilder->getInt32(0), endIndexPhi2});
    365     iBuilder->CallPrintInt("> endIndex ", endIndexPhi2);
    366     iBuilder->CallPrintInt("> endPos ", endPos);
    367     iBuilder->CreateStore(endPos, endAddr);
     356    iBuilder->CreateStore(endPos, iBuilder->CreateGEP(endArray, {iBuilder->getInt32(0), endIndexPhi2}));
    368357    Value * updatedEndIndexPhi = iBuilder->CreateAdd(endIndexPhi2, ConstantInt::get(endIndexPhi2->getType(), 1));
    369358    endIndexPhi2->addIncoming(updatedEndIndexPhi, endInnerBody);
     
    374363    iBuilder->SetInsertPoint(gather);
    375364
    376     iBuilder->CallPrintInt(" **** gathering **** ", groupIV);
    377 
    378365    Value * startArrayPtr = iBuilder->CreatePointerCast(startArray, PointerType::get(iBuilder->getInt32Ty(), 0));
    379366    Value * endArrayPtr = iBuilder->CreatePointerCast(endArray, PointerType::get(iBuilder->getInt32Ty(), 0));
    380     Value * const bufferPtr = iBuilder->CreatePointerCast(buffer, PointerType::get(iBuilder->getInt32Ty(), 0));
    381367    Value * gatherFunctionPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(gatherFunctionPtrArray, groupIV));
    382 
    383     iBuilder->CreateCall5(gatherFunctionPtr, base, startArrayPtr, endArrayPtr, iBuilder->getInt32(32), bufferPtr);
    384 
    385     // ... call hashing function ...
     368    Value * outputBuffer = iBuilder->CreatePointerCast(kBuilder->getOutputStream(groupIV), iBuilder->getInt8PtrTy());
     369    iBuilder->CreateCall5(gatherFunctionPtr, base, startArrayPtr, endArrayPtr, iBuilder->getInt32(32), outputBuffer);
     370
    386371    Value * remainingArrayPtr = iBuilder->CreateGEP(startArrayPtr, iBuilder->getInt32(gatherCount));
    387372    Value * remainingCount = iBuilder->CreateSub(startIndexPhi3, iBuilder->getInt32(gatherCount));
     
    404389 * @brief generateGatherFunction
    405390 ** ------------------------------------------------------------------------------------------------------------- */
    406 Function * SymbolTableBuilder::generateGatherFunction(Type * const resultType, const unsigned minCount, const unsigned maxCount) {
    407 
    408     assert (maxCount > minCount);
    409 
    410     const std::string functionName = "gather_" + std::to_string(minCount) + "_" + std::to_string(maxCount);
     391Function * SymbolTableBuilder::generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength, Type * const resultType) {
     392
     393    assert (minKeyLength < maxKeyLength);
     394
     395    const std::string functionName = "gather_" + std::to_string(minKeyLength) + "_to_" + std::to_string(maxKeyLength);
    411396    Function * function = mMod->getFunction(functionName);
    412397    if (function == nullptr) {
    413398
    414399        const auto ip = iBuilder->saveIP();
     400
     401        const unsigned minCount = (minKeyLength / 4);
     402        const unsigned maxCount = ((maxKeyLength + 3) / 4);
    415403
    416404        const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
     
    419407
    420408        Type * const int32PtrTy = PointerType::get(iBuilder->getInt32Ty(), 0);
    421         FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {iBuilder->getInt8PtrTy(), int32PtrTy, int32PtrTy, iBuilder->getInt32Ty(), int32PtrTy}, false);
     409        FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {iBuilder->getInt8PtrTy(), int32PtrTy, int32PtrTy, iBuilder->getInt32Ty(), iBuilder->getInt8PtrTy()}, false);
    422410        function = Function::Create(functionType, GlobalValue::ExternalLinkage, functionName, mMod);
    423411        function->setCallingConv(CallingConv::C);
     
    436424        Value * const numOfKeys = args++;
    437425        numOfKeys->setName("numOfKeys");
    438         Value * buffer = args++;
    439         buffer->setName("buffer");
     426        Value * result = args++;
     427        result->setName("result");
    440428
    441429        BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", function, 0);
     
    452440        // ENTRY
    453441        iBuilder->SetInsertPoint(entry);
    454         Value * const untransposedBuffer = iBuilder->CreateAlloca(gatherVectorArrayType, iBuilder->getInt32(4), "untransposedBuffer");
    455 
    456         iBuilder->CallPrintInt("base", base);
    457         iBuilder->CallPrintInt("startArray", startArray);
    458         iBuilder->CallPrintInt("endArray", endArray);
    459         iBuilder->CallPrintInt("numOfKeys", numOfKeys);
    460         iBuilder->CallPrintInt("buffer", buffer);
    461 
     442        AllocaInst * const buffer = iBuilder->CreateAlloca(resultType, iBuilder->getInt32(maxCount * 4), "buffer");
     443        iBuilder->CreateStore(Constant::getNullValue(buffer->getAllocatedType()), buffer);
     444        AllocaInst * const untransposedBuffer = iBuilder->CreateAlloca(gatherVectorArrayType, iBuilder->getInt32(4), "tmp");
     445        iBuilder->CreateStore(Constant::getNullValue(untransposedBuffer->getAllocatedType()), untransposedBuffer);
    462446        iBuilder->CreateBr(gatherCond);
    463447
     
    468452        PHINode * gatherIV = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    469453        gatherIV->addIncoming(iBuilder->getInt32(0), entry);
    470         iBuilder->CallPrintInt(" --- gatherIV", gatherIV);
    471454        Value * gatherLoopTest = iBuilder->CreateICmpNE(gatherIV, iBuilder->getInt32(4));
    472455        iBuilder->CreateCondBr(gatherLoopTest, partialGatherCond, transposeCond);
     
    474457        // PARTIAL GATHER COND
    475458        iBuilder->SetInsertPoint(partialGatherCond);
    476         iBuilder->CallPrintInt(" --- remainingLanes", remainingLanes);
    477         Value * partialGatherLoopTest = iBuilder->CreateICmpSGE(remainingLanes, iBuilder->getInt32(vectorWidth));
     459        Value * partialGatherLoopTest = iBuilder->CreateICmpUGE(remainingLanes, iBuilder->getInt32(vectorWidth));
    478460        iBuilder->CreateCondBr(partialGatherLoopTest, gatherBody, partialGatherBody);
    479461
    480462        // PARTIAL GATHER BODY
    481463        iBuilder->SetInsertPoint(partialGatherBody);
     464        iBuilder->CallPrintInt(functionName + ".remainingLanes", remainingLanes);
    482465        Type * registerType = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    483466        Value * maskedLanes = iBuilder->CreateSub(iBuilder->getInt32(vectorWidth), remainingLanes);
    484467        maskedLanes = iBuilder->CreateMul(maskedLanes, iBuilder->getInt32(32));
    485468        maskedLanes = iBuilder->CreateZExt(maskedLanes, registerType);
    486         maskedLanes = iBuilder->CreateLShr(Constant::getAllOnesValue(registerType), maskedLanes);       
     469        maskedLanes = iBuilder->CreateLShr(Constant::getAllOnesValue(registerType), maskedLanes);
    487470        maskedLanes = iBuilder->CreateBitCast(maskedLanes, gatherVectorType);
     471
    488472        iBuilder->CreateBr(gatherBody);
    489473
     
    493477        activeLanes->addIncoming(Constant::getAllOnesValue(gatherVectorType), partialGatherCond);
    494478        activeLanes->addIncoming(maskedLanes, partialGatherBody);
    495         iBuilder->CallPrintRegister(" --- activeLanes", activeLanes);
     479
     480        iBuilder->CallPrintRegister(functionName + ".activeLanes", activeLanes);
    496481
    497482        startArray = iBuilder->CreateBitCast(startArray, PointerType::get(gatherVectorType, 0));
    498483        Value * startPos = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(startArray, gatherIV), 4);
    499484        for (unsigned blockCount = 0; blockCount < minCount; ++blockCount) {
    500 
    501             iBuilder->CallPrintRegister(" --- startPosF" + std::to_string(blockCount), startPos);
    502485            Value * tokenData = generateMaskedGather(base, startPos, activeLanes);
    503486            startPos = iBuilder->CreateAdd(startPos, four);
    504             iBuilder->CallPrintRegister(" --- tokenDataF" + std::to_string(blockCount), tokenData);
    505487            iBuilder->CreateAlignedStore(tokenData, iBuilder->CreateGEP(untransposedBuffer, {iBuilder->getInt32(blockCount), gatherIV}), 4);
    506488        }
     
    510492        for (unsigned blockCount = minCount; blockCount < maxCount; ++blockCount) {
    511493
    512             iBuilder->CallPrintRegister(" --- startPosP" + std::to_string(blockCount), startPos);
    513 
    514494            // if we have not fully gathered the data for this key
    515495            Value * atLeastOneByte = iBuilder->CreateSExt(iBuilder->CreateICmpULT(startPos, endPos), startPos->getType());
    516496            atLeastOneByte = iBuilder->CreateAnd(atLeastOneByte, activeLanes);
    517             iBuilder->CallPrintRegister(" --- atLeastOneByte" + std::to_string(blockCount), atLeastOneByte);
    518497
    519498            // gather it ...
    520499            Value * tokenData = generateMaskedGather(base, startPos, atLeastOneByte);
    521             iBuilder->CallPrintRegister(" --- tokenDataP" + std::to_string(blockCount), tokenData);
    522500            // and compute how much data is remaining.
    523501            Value * remaining = iBuilder->CreateSub(endPos, startPos);
    524502
    525             iBuilder->CallPrintRegister(" --- remaining" + std::to_string(blockCount), remaining);
    526 
    527503            // if this token only has 1 to 3 bytes remaining ...
    528504            Value * atLeastFourBytes = iBuilder->CreateSExt(iBuilder->CreateICmpUGE(remaining, four), remaining->getType());
    529 
    530             iBuilder->CallPrintRegister(" --- atLeastFourBytes" + std::to_string(blockCount), atLeastFourBytes);
    531 
    532505
    533506            // determine how many bits do *not* belong to the token
     
    535508            remaining = iBuilder->CreateShl(remaining, ConstantInt::get(remaining->getType(), 3));
    536509
    537             iBuilder->CallPrintRegister(" --- remaining" + std::to_string(blockCount), remaining);
    538 
    539510            // then mask them out prior to storing the value
    540511            Value * partialTokenMask = iBuilder->CreateLShr(ConstantInt::getAllOnesValue(remaining->getType()), remaining);
    541512            partialTokenMask = iBuilder->CreateOr(partialTokenMask, atLeastFourBytes);
    542513
    543             iBuilder->CallPrintRegister(" --- partialTokenMask" + std::to_string(blockCount), partialTokenMask);
    544 
    545514            tokenData = iBuilder->CreateAnd(partialTokenMask, tokenData);
    546 
    547             iBuilder->CallPrintRegister(" --- tokenDataM" + std::to_string(blockCount), tokenData);
    548 
    549515            Value * untransposedBufferPtr = iBuilder->CreateGEP(untransposedBuffer, {iBuilder->getInt32(blockCount), gatherIV});
    550 
    551             iBuilder->CallPrintInt(" --- untransposedBufferPtr" + std::to_string(blockCount), untransposedBufferPtr);
    552 
    553516            iBuilder->CreateAlignedStore(tokenData, untransposedBufferPtr, 4);
    554517            if (blockCount < (maxCount - 1)) {
     
    597560        Value * offset = iBuilder->CreateShl(transposeIV, ConstantInt::get(transposeIV->getType(), 2));
    598561        transposeIV->addIncoming(iBuilder->CreateAdd(transposeIV, iBuilder->getInt32(1)), transposeBody);
    599         buffer = iBuilder->CreateBitCast(buffer, PointerType::get(resultType, 0));
     562
    600563        for (unsigned i = 0; i < 4; ++i) {
    601564            Value * index = offset;
     
    603566                index = iBuilder->CreateAdd(offset, iBuilder->getInt32(i));
    604567            }
    605             Value * ptr = iBuilder->CreateGEP(buffer, index);
    606             iBuilder->CreateAlignedStore(value[i], ptr, 4);
     568            iBuilder->CallPrintRegister(functionName, value[i]);
     569            iBuilder->CreateAlignedStore(value[i], iBuilder->CreateGEP(buffer, index), 4);
    607570        }
    608         iBuilder->CreateBr(transposeCond);
     571
     572        Value * emptyGatherTest = iBuilder->CreateICmpUGT(remainingLanes, iBuilder->getInt32(0));
     573        iBuilder->CreateCondBr(emptyGatherTest, transposeCond, exit);
    609574
    610575        // EXIT
    611576        iBuilder->SetInsertPoint(exit);
     577
     578        // ... call hashing function ...
     579
     580
    612581        iBuilder->CreateRetVoid();
    613582
     
    637606    const auto bufferSize = ((mLongestLookahead + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth()) + 1;
    638607
    639     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, 1);
    640     mLeadingKernel = new KernelBuilder("leading", mMod, iBuilder, bufferSize);
    641     mSortingKernel = new KernelBuilder("sorting", mMod, iBuilder, bufferSize);
    642     mGatherKernel = new KernelBuilder("gathering", mMod, iBuilder, 1);
    643     mStdOutKernel = new KernelBuilder("stddout", mMod, iBuilder, 1);
     608    mS2PKernel = new KernelBuilder(iBuilder, "s2p", 1);
     609    mLeadingKernel = new KernelBuilder(iBuilder, "leading", bufferSize);
     610    mSortingKernel = new KernelBuilder(iBuilder, "sorting", bufferSize);
     611    mGatherKernel = new KernelBuilder(iBuilder, "gathering", 1);
     612    mStdOutKernel = new KernelBuilder(iBuilder, "stddout", 1);
    644613
    645614    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    691660    BasicBlock * finalBodyBlock = BasicBlock::Create(mMod->getContext(),  "finalBody", main, 0);
    692661
    693     BasicBlock * exitBlock = BasicBlock::Create(mMod->getContext(), "exit", main, 0);
     662    BasicBlock * remainingBlock = BasicBlock::Create(mMod->getContext(), "remaining", main, 0);
    694663
    695664    Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
    696     Instance * leadingInstance = mLeadingKernel->instantiate(s2pInstance->getOutputStreamSet());
    697     Instance * sortingInstance = mSortingKernel->instantiate(leadingInstance->getOutputStreamSet());
    698     Instance * gatheringInstance = mGatherKernel->instantiate(sortingInstance->getOutputStreamSet());
    699     Instance * stdOutInstance = mStdOutKernel->instantiate(gatheringInstance->getOutputStreamSet());
     665    Instance * leadingInstance = mLeadingKernel->instantiate(s2pInstance->getResultSet());
     666    Instance * sortingInstance = mSortingKernel->instantiate(leadingInstance->getResultSet());
     667    Instance * gatheringInstance = mGatherKernel->instantiate(sortingInstance->getResultSet());
     668    Instance * stdOutInstance = mStdOutKernel->instantiate(gatheringInstance->getResultSet());
    700669
    701670    gatheringInstance->setInternalState("Base", iBuilder->CreateBitCast(inputStream, iBuilder->getInt8PtrTy()));
     
    711680    Value * safetyCheck = iBuilder->CreateICmpUGE(bufferSize, blockSize);
    712681    if (blockSize == requiredBytes) {
    713         iBuilder->CreateCondBr(safetyCheck, leadingTestBlock, exitBlock); // fix this to be a special case
     682        iBuilder->CreateCondBr(safetyCheck, leadingTestBlock, remainingBlock); // fix this to be a special case
    714683    } else {
    715684        throw std::runtime_error("Not supported yet!");
     
    731700
    732701    iBuilder->SetInsertPoint(leadingBodyBlock);
     702
    733703    s2pInstance->CreateDoBlockCall();
    734704    leadingInstance->CreateDoBlockCall();
     
    745715
    746716    iBuilder->SetInsertPoint(regularBodyBlock);
     717
    747718    s2pInstance->CreateDoBlockCall();
    748719    leadingInstance->CreateDoBlockCall();
     
    777748    remainingFullBlocks->addIncoming(iBuilder->getInt64(leadingBlocks), partialBlock);
    778749    Value * remainingFullBlocksCond = iBuilder->CreateICmpUGT(remainingFullBlocks, ConstantInt::getNullValue(intType));
    779     iBuilder->CreateCondBr(remainingFullBlocksCond, finalBodyBlock, exitBlock);
     750    iBuilder->CreateCondBr(remainingFullBlocksCond, finalBodyBlock, remainingBlock);
    780751
    781752    iBuilder->SetInsertPoint(finalBodyBlock);
     753
    782754    leadingInstance->clearOutputStreamSet();
    783755    sortingInstance->CreateDoBlockCall();
     
    787759
    788760
    789 
    790 
    791761    iBuilder->CreateBr(finalTestBlock);
    792     iBuilder->SetInsertPoint(exitBlock);
     762
     763
     764    // perform a final partial gather on all length groups ...
     765    iBuilder->SetInsertPoint(remainingBlock);
     766
     767    Value * const base = iBuilder->CreateLoad(gatheringInstance->getInternalState("Base"));
     768    Value * positionArray = gatheringInstance->getInternalState("Positions");
     769
     770    for (unsigned i = 0; i < mGatherFunction.size(); ++i) {
     771        BasicBlock * nonEmptyGroup = BasicBlock::Create(mMod->getContext(), "", main, 0);
     772
     773        BasicBlock * nextNonEmptyGroup = BasicBlock::Create(mMod->getContext(), "", main, 0);
     774
     775        ConstantInt * groupIV = iBuilder->getInt32(i);
     776        Value * startIndexPtr = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(0)}, "startIndexPtr");
     777        Value * startIndex = iBuilder->CreateLoad(startIndexPtr, "remaining");
     778        Value * cond = iBuilder->CreateICmpNE(startIndex, ConstantInt::getNullValue(startIndex->getType()));
     779        iBuilder->CreateCondBr(cond, nonEmptyGroup, nextNonEmptyGroup);
     780
     781        iBuilder->SetInsertPoint(nonEmptyGroup);
     782        Value * startArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(1)}, "startArray");
     783        Value * startArrayPtr = iBuilder->CreatePointerCast(startArray, PointerType::get(iBuilder->getInt32Ty(), 0));
     784        Value * endArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(3)}, "endArray");
     785        Value * endArrayPtr = iBuilder->CreatePointerCast(endArray, PointerType::get(iBuilder->getInt32Ty(), 0));
     786        Value * outputBuffer = iBuilder->CreatePointerCast(gatheringInstance->getOutputStream(groupIV), iBuilder->getInt8PtrTy());
     787        iBuilder->CreateCall5(mGatherFunction.at(i), base, startArrayPtr, endArrayPtr, startIndex, outputBuffer);
     788        iBuilder->CreateBr(nextNonEmptyGroup);
     789
     790        iBuilder->SetInsertPoint(nextNonEmptyGroup);
     791    }
    793792    iBuilder->CreateRetVoid();
    794793
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.h

    r4995 r5000  
    3333
    3434    void generateGatherKernel(KernelBuilder * kBuilder, const std::vector<unsigned> & endpoints, const unsigned scanWordBitWidth = 64);
    35     Function * generateGatherFunction(Type * const transposedVectorType, const unsigned minCount, const unsigned maxCount);
     35    Function * generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength, Type * const resultType);
    3636
    3737    Value * generateMaskedGather(Value * const base, Value * const vindex, Value * const mask);
     
    4747
    4848    unsigned                            mLongestLookahead;
     49
     50    std::vector<Function *>             mGatherFunction;
     51
    4952    llvm::Type *                        mBitBlockType;
    5053    int                                 mBlockSize;
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4995 r5000  
    8484    #endif
    8585 
    86     Examine(*function);
     86    Examine(function);
    8787
    8888    mCarryManager = new CarryManager(iBuilder);
     
    114114    mCarryManager->initialize(function, mKernelBuilder);
    115115
    116     mFunction = mKernelBuilder->prepareFunction();
     116    mFunction = mKernelBuilder->prepareFunction({mInputStreamOffset.begin(), mInputStreamOffset.end()});
    117117
    118118    mCarryManager->reset();
     
    120120    for (unsigned j = 0; j < function->getNumOfParameters(); ++j) {
    121121        Value * inputVal = mKernelBuilder->getInputStream(j);
     122        const Var * const var = function->getParameter(j);
    122123        if (DumpTrace) {
    123             iBuilder->CallPrintRegister("param" + std::to_string(j + 1), iBuilder->CreateBlockAlignedLoad(inputVal));
    124         }
    125         mMarkerMap.insert(std::make_pair(function->getParameter(j), inputVal));
     124            iBuilder->CallPrintRegister(var->getName()->to_string(), iBuilder->CreateBlockAlignedLoad(inputVal));
     125        }
     126        mMarkerMap.insert(std::make_pair(var, inputVal));
    126127    }
    127128
     
    139140}
    140141
    141 inline void PabloCompiler::Examine(PabloFunction & function) {
     142inline void PabloCompiler::Examine(const PabloFunction * const function) {
    142143    mWhileDepth = 0;
    143144    mIfDepth = 0;
    144145    mMaxWhileDepth = 0;
    145     Examine(function.getEntryBlock());
    146 }
    147 
    148 void PabloCompiler::Examine(PabloBlock * block) {
    149     for (Statement * stmt : *block) {
    150         if (LLVM_UNLIKELY(isa<If>(stmt))) {
    151             Examine(cast<If>(stmt)->getBody());
    152         } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    153             mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
    154             Examine(cast<While>(stmt)->getBody());
    155             --mWhileDepth;
    156         }
     146    LookaheadOffsetMap offsetMap;
     147    Examine(function->getEntryBlock(), offsetMap);
     148    mInputStreamOffset.clear();
     149    for (const auto & oi : offsetMap) {
     150        for (const auto offset : oi.second) {
     151            mInputStreamOffset.insert(offset / iBuilder->getBitBlockWidth());
     152        }
     153    }
     154}
     155
     156void PabloCompiler::Examine(const PabloBlock * const block, LookaheadOffsetMap & offsetMap) {
     157    for (const Statement * stmt : *block) {
     158         boost::container::flat_set<unsigned> offsets;
     159        if (LLVM_UNLIKELY(isa<Lookahead>(stmt))) {
     160            const Lookahead * const la = cast<Lookahead>(stmt);
     161            assert (isa<Var>(la->getExpr()));
     162            offsets.insert(la->getAmount());
     163            offsets.insert(la->getAmount() + iBuilder->getBitBlockWidth() - 1);
     164        } else {
     165            for (unsigned i = 0; i < stmt->getNumOperands(); ++i) {
     166                const PabloAST * expr = stmt->getOperand(i);
     167                if (isa<Var>(expr)) {
     168                    offsets.insert(0);
     169                } else if (LLVM_LIKELY(isa<Statement>(expr) && !isa<Assign>(expr) && !isa<Next>(expr))) {
     170                    const auto f = offsetMap.find(expr);
     171                    assert (f != offsetMap.end());
     172                    const auto & o = f->second;
     173                    offsets.insert(o.begin(), o.end());
     174                }
     175            }
     176            if (LLVM_UNLIKELY(isa<If>(stmt))) {
     177                Examine(cast<If>(stmt)->getBody(), offsetMap);
     178            } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
     179                mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
     180                Examine(cast<While>(stmt)->getBody(), offsetMap);
     181                --mWhileDepth;
     182            }
     183        }
     184        offsetMap.emplace(stmt, offsets);
    157185    }
    158186}
     
    384412            throw std::runtime_error("Lookahead has an illegal Var operand");
    385413        }
    386         const unsigned offset = l->getAmount() / iBuilder->getBitBlockWidth();
     414        const unsigned offset0 = (l->getAmount() / iBuilder->getBitBlockWidth());
     415        const unsigned offset1 = ((l->getAmount() + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth());
    387416        const unsigned shift = (l->getAmount() % iBuilder->getBitBlockWidth());
    388         Value * const v0 = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset));
    389         Value * const v1 = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset + 1));
     417        Value * const v0 = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset0));
     418        Value * const v1 = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset1));
    390419        if (LLVM_UNLIKELY((shift % 8) == 0)) { // Use a single whole-byte shift, if possible.
    391420            expr = iBuilder->mvmd_dslli(8, v1, v0, (shift / 8));
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4974 r5000  
    1717#include <IDISA/idisa_builder.h>
    1818#include <kernels/kernel.h>
     19#include <boost/container/flat_set.hpp>
    1920
    2021namespace llvm {
     
    2425    class VectorType;
    2526    class PointerType;
    26     class ConstantAggregateZero;
    2727    class Constant;
    2828    class FunctionType;
     
    4444
    4545class PabloCompiler {
    46 
     46    using IntSet = boost::container::flat_set<unsigned>;
    4747    using MarkerMap = std::unordered_map<const PabloAST *, Value *>;
    48 
     48    using LookaheadOffsetMap = std::unordered_map<const PabloAST *, IntSet>;
    4949public:
    5050    PabloCompiler(Module * m, IDISA::IDISA_Builder * b);
     
    5555private:
    5656
    57     void Examine(PabloFunction & function);
    58     void Examine(PabloBlock * block);
     57    void Examine(const PabloFunction * const function);
     58    void Examine(const PabloBlock * const block, LookaheadOffsetMap & offsetMap);
    5959
    6060    void compileBlock(const PabloBlock * const block);
     
    6666
    6767    MarkerMap                           mMarkerMap;
    68 
     68    IntSet                              mInputStreamOffset;
    6969    Module *                            mMod;
    7070    IDISA::IDISA_Builder *              iBuilder;
Note: See TracChangeset for help on using the changeset viewer.