Ignore:
Timestamp:
Jun 19, 2016, 3:00:47 PM (3 years ago)
Author:
cameron
Message:

New kernel infrastructure

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
2 deleted
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5060 r5063  
    1414
    1515void KernelInterface::addKernelDeclarations(Module * client) {
    16     errs() << "KernelInterface::addKernelDeclarations\n";
    1716    Module * saveModule = iBuilder->getModule();
    1817    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5051 r5063  
    55
    66#include "kernel.h"
    7 #include <pablo/function.h>
    8 #include <IDISA/idisa_builder.h>
    9 #include <kernels/instance.h>
    10 #include <tuple>
    11 #include <boost/functional/hash_fwd.hpp>
    12 #include <unordered_map>
     7#include <llvm/IR/Module.h>
     8#include <llvm/IR/Type.h>
     9#include <llvm/IR/Value.h>
     10#include <llvm/Support/raw_ostream.h>
    1311
    1412using namespace llvm;
    15 using namespace pablo;
     13using namespace kernel;
    1614
    17 namespace kernel {
    18 
    19 // sets name & sets internal state to the kernel superclass state
    20 KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder, std::string && name, const unsigned defaultBufferSize)
    21 : iBuilder(builder)
    22 , mKernelName(name)
    23 , mDefaultBufferSize(defaultBufferSize)
    24 , mBitBlockType(builder->getBitBlockType())
    25 , mBlockNoIndex(0)
    26 , mKernelStateType(nullptr) {
    27     assert (mDefaultBufferSize > 0);
    28 }
    29 
    30 /** ------------------------------------------------------------------------------------------------------------- *
    31  * @brief addInternalState
    32  ** ------------------------------------------------------------------------------------------------------------- */
    33 unsigned KernelBuilder::addInternalState(Type * const type) {
    34     assert (type);
    35     const unsigned index = mInternalState.size();
    36     mInternalState.push_back(type);
    37     return index;
    38 }
    39 
    40     unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string name) {
    41         if (LLVM_UNLIKELY(mInternalStateNameMap.count(name) != 0)) {
    42             throw std::runtime_error("Kernel already contains internal state '" + name + "'");
    43         }
    44         const unsigned index = addInternalState(type);
    45         mInternalStateNameMap.emplace(name, iBuilder->getInt32(index));
    46         return index;
     15KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
     16                                 std::string kernelName,
     17                                 std::vector<StreamSetBinding> stream_inputs,
     18                                 std::vector<StreamSetBinding> stream_outputs,
     19                                 std::vector<ScalarBinding> scalar_parameters,
     20                                 std::vector<ScalarBinding> scalar_outputs,
     21                                 std::vector<ScalarBinding> internal_scalars) :
     22    KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {
     23   
     24    for (auto binding : scalar_parameters) {
     25        addScalar(binding.scalarType, binding.scalarName);
    4726    }
    48    
    49 /** ------------------------------------------------------------------------------------------------------------- *
    50  * @brief getInternalState
    51  ** ------------------------------------------------------------------------------------------------------------- */
    52 Value * KernelBuilder::getInternalStateInternal(Value * const kernelState, const std::string & name) {
    53     const auto f = mInternalStateNameMap.find(name);
    54     if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
    55         throw std::runtime_error("Kernel does not contain internal state " + name);
     27    for (auto binding : scalar_outputs) {
     28        addScalar(binding.scalarType, binding.scalarName);
    5629    }
    57     return getInternalStateInternal(kernelState, f->second);
    58 }
    59 
    60 Value * KernelBuilder::getInternalStateInternal(Value * const kernelState, disable_implicit_conversion<Value *> index) {
    61     assert (index->getType()->isIntegerTy());
    62     assert (kernelState->getType()->getPointerElementType() == mKernelStateType);
    63     return iBuilder->CreateGEP(kernelState, {iBuilder->getInt32(0), index});
    64 }
    65 
    66 /** ------------------------------------------------------------------------------------------------------------- *
    67  * @brief setInternalState
    68  ** ------------------------------------------------------------------------------------------------------------- */
    69 void KernelBuilder::setInternalStateInternal(Value * const kernelState, const std::string & name, Value * const value) {
    70     Value * ptr = getInternalStateInternal(kernelState, name);
    71     assert (ptr->getType()->getPointerElementType() == value->getType());
    72     if (value->getType() == iBuilder->getBitBlockType()) {
    73         iBuilder->CreateBlockAlignedStore(value, ptr);
    74     } else {
    75         iBuilder->CreateStore(value, ptr);
     30    for (auto binding : internal_scalars) {
     31        addScalar(binding.scalarType, binding.scalarName);
    7632    }
    7733}
    7834
    79 void KernelBuilder::setInternalStateInternal(Value * const kernelState, disable_implicit_conversion<Value *> index, Value * const value) {
    80     Value * ptr = getInternalStateInternal(kernelState, index);
    81     assert (ptr->getType()->getPointerElementType() == value->getType());
    82     if (value->getType() == iBuilder->getBitBlockType()) {
    83         iBuilder->CreateBlockAlignedStore(value, ptr);
    84     } else {
    85         iBuilder->CreateStore(value, ptr);
     35void KernelBuilder::addScalar(Type * t, std::string scalarName) {
     36    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
     37        throw std::runtime_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
    8638    }
     39    unsigned index = mKernelFields.size();
     40    mKernelFields.push_back(t);
     41    mInternalStateNameMap.emplace(scalarName, iBuilder->getInt32(index));
    8742}
    8843
    89 /** ------------------------------------------------------------------------------------------------------------- *
    90  * @brief addInputStream
    91  ** ------------------------------------------------------------------------------------------------------------- */
    92 void KernelBuilder::addInputStream(const unsigned fields, std::string && name) {
    93     assert (fields > 0 && !name.empty());
    94     mInputStreamName.push_back(name);
    95     if (fields == 1) {
    96         mInputStream.push_back(mBitBlockType);
    97     } else {
    98         mInputStream.push_back(ArrayType::get(mBitBlockType, fields));
    99     }
     44void KernelBuilder::finalizeKernelStateType() {
     45    mKernelStateType = StructType::create(getGlobalContext(), mKernelFields, mKernelName);
    10046}
    10147
    102 void KernelBuilder::addInputStream(const unsigned fields) {
    103     addInputStream(fields, mKernelName + "_InputStream_" + std::to_string(mInputStream.size()));
     48std::unique_ptr<Module> KernelBuilder::createKernelModule() {
     49    Module * saveModule = iBuilder->getModule();
     50    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     51    if (mKernelStateType == nullptr) finalizeKernelStateType();
     52    std::unique_ptr<Module> theModule = make_unique<Module>(mKernelName, getGlobalContext());
     53    Module * m = theModule.get();
     54    iBuilder->setModule(m);
     55    generateKernel();
     56    iBuilder->setModule(saveModule);
     57    iBuilder->restoreIP(savePoint);
     58    return theModule;
    10459}
    10560
    106 /** ------------------------------------------------------------------------------------------------------------- *
    107  * @brief getInputStream
    108  ** ------------------------------------------------------------------------------------------------------------- */
    109 Value * KernelBuilder::getInputStreamInternal(Value * const inputStreamSet, disable_implicit_conversion<Value *> index) {
    110     assert ("Parameters cannot be null!" && (inputStreamSet != nullptr && index != nullptr));
    111     assert ("Stream index must be an integer!" && index->getType()->isIntegerTy());
    112     assert ("Illegal input stream set provided!" && inputStreamSet->getType()->getPointerElementType() == mInputStreamType);
    113     if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || getInputStreamType()->isArrayTy())) {
    114         return iBuilder->CreateGEP(inputStreamSet, { iBuilder->getInt32(0), index });
     61void KernelBuilder::generateKernel() {
     62    Module * m = iBuilder->getModule();
     63    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     64    addKernelDeclarations(m);
     65    // Implement the accumulator get functions
     66    for (auto binding : mScalarOutputs) {
     67        auto fnName = mKernelName + accumulator_infix + binding.scalarName;
     68        Function * accumFn = m->getFunction(fnName);
     69        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.scalarName, accumFn, 0));
     70        Value * self = &*(accumFn->arg_begin());
     71        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
     72        Value * retVal = iBuilder->CreateLoad(ptr);
     73        iBuilder->CreateRet(retVal);
    11574    }
    116     throw std::runtime_error("Cannot access the input stream with a non-constant value unless all input stream types are identical!");
     75    // Implement the initializer function
     76    Function * initFunction = m->getFunction(mKernelName + init_suffix);
     77    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
     78   
     79    Function::arg_iterator args = initFunction->arg_begin();
     80    Value * self = &*(args++);
     81    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
     82    for (auto binding : mScalarInputs) {
     83        Value * parm = &*(args++);
     84        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
     85        iBuilder->CreateStore(parm, ptr);
     86    }
     87    iBuilder->CreateRetVoid();
     88    iBuilder->restoreIP(savePoint);
     89}
     90
     91void KernelBuilder::addTrivialFinalBlockMethod(Module * m) {
     92    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     93    Module * saveModule = iBuilder->getModule();
     94    iBuilder->setModule(m);
     95    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     96    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     97    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
     98    // Final Block arguments: self, remaining, then the standard DoBlock args.
     99    Function::arg_iterator args = finalBlockFunction->arg_begin();
     100    Value * self = &*(args++);
     101    /* Skip "remaining" arg */ args++;
     102    std::vector<Value *> doBlockArgs = {self};
     103    while (args != finalBlockFunction->arg_end()){
     104        doBlockArgs.push_back(&*args++);
     105    }
     106    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
     107    iBuilder->CreateRetVoid();
     108    iBuilder->setModule(saveModule);
     109    iBuilder->restoreIP(savePoint);
     110}
     111
     112Value * KernelBuilder::getScalarIndex(std::string fieldName) {
     113    const auto f = mInternalStateNameMap.find(fieldName);
     114    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
     115        throw std::runtime_error("Kernel does not contain internal state: " + fieldName);
     116    }
     117    return f->second;
     118}
     119
     120Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
     121    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
     122    return iBuilder->CreateLoad(ptr);
     123}
     124
     125void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
     126    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
     127    iBuilder->CreateStore(newFieldVal, ptr);
    117128}
    118129
    119130
    120 /** ------------------------------------------------------------------------------------------------------------- *
    121  * @brief addOutputStream
    122  ** ------------------------------------------------------------------------------------------------------------- */
    123 unsigned KernelBuilder::addOutputStream(const unsigned fields) {
    124     assert (fields > 0);
    125     const unsigned index = mOutputStream.size();
    126     mOutputStream.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
    127     return index;
    128 }
    129 
    130 /** ------------------------------------------------------------------------------------------------------------- *
    131  * @brief getOutputStream
    132  ** ------------------------------------------------------------------------------------------------------------- */
    133 Value * KernelBuilder::getOutputStreamInternal(Value * const outputStreamSet, disable_implicit_conversion<Value *> index) {
    134     assert ("Parameters cannot be null!" && (outputStreamSet != nullptr && index != nullptr));
    135     assert ("Stream index must be an integer!" && index->getType()->isIntegerTy());
    136     assert ("Illegal output stream set provided!" && outputStreamSet->getType()->getPointerElementType() == getOutputStreamType());
    137     if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || getOutputStreamType()->isArrayTy())) {
    138         return iBuilder->CreateGEP(outputStreamSet, { iBuilder->getInt32(0), index });
     131Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
     132    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
     133        Value * arg = &*argIter;
     134        if (arg->getName() == paramName) return arg;
    139135    }
    140     throw std::runtime_error("Cannot access the output stream with a non-constant value unless all output stream types are identical!");
    141 }
    142 
    143 /** ------------------------------------------------------------------------------------------------------------- *
    144  * @brief packDataTypes
    145  ** ------------------------------------------------------------------------------------------------------------- */
    146 Type * KernelBuilder::packDataTypes(const std::vector<llvm::Type *> & types) {
    147     if (types.empty()) {
    148         return nullptr;
    149     }
    150     for (Type * type : types) {
    151         if (type != types.front()) { // use canLosslesslyBitcastInto ?
    152             return StructType::get(iBuilder->getContext(), types);
    153         }
    154     }
    155     return ArrayType::get(types.front(), types.size());
    156 }
    157 
    158 /** ------------------------------------------------------------------------------------------------------------- *
    159  * @brief prepareFunction
    160  ** ------------------------------------------------------------------------------------------------------------- */
    161 Function * KernelBuilder::prepareFunction(std::vector<unsigned> && inputStreamOffsets) {
    162 
    163     mBlockNoIndex = iBuilder->getInt32(addInternalState(iBuilder->getInt64Ty(), "BlockNo"));
    164 
    165     if (!mKernelStateType) {
    166         mKernelStateType = StructType::create(iBuilder->getContext(), mInternalState, mKernelName);
    167     }
    168     mInputStreamType = packDataTypes(mInputStream);
    169     mOutputStreamType = packDataTypes(mOutputStream);
    170     mInputStreamOffsets = inputStreamOffsets;
    171 
    172     std::vector<Type *> params;
    173     params.push_back(mKernelStateType->getPointerTo());
    174     if (mInputStreamType) {
    175         for (unsigned i = 0; i < mInputStreamOffsets.size(); ++i) {
    176             params.push_back(mInputStreamType->getPointerTo());
    177         }
    178     }
    179     if (mOutputStreamType) {
    180         params.push_back(mOutputStreamType->getPointerTo());
    181     }
    182 
    183     // A pointer value is captured if the function makes a copy of any part of the pointer that outlives
    184     // the call (e.g., stored in a global or, depending on the context, when returned by the function.)
    185     // Since this does not occur in either our DoBlock or Constructor, all parameters are marked nocapture.
    186 
    187     FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), params, false);
    188     mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", iBuilder->getModule());
    189     mDoBlock->setCallingConv(CallingConv::C);
    190     for (unsigned i = 1; i <= params.size(); ++i) {
    191         mDoBlock->setDoesNotCapture(i);
    192     }
    193     mDoBlock->setDoesNotThrow();
    194     Function::arg_iterator args = mDoBlock->arg_begin();
    195     mKernelStateParam = &*(args++);
    196     mKernelStateParam->setName("this");
    197     if (mInputStreamType) {
    198         for (const unsigned offset : mInputStreamOffsets) {
    199             Value * const inputStreamSet = &*(args++);
    200             inputStreamSet->setName("inputStreamSet" + std::to_string(offset));
    201             mInputStreamParam.emplace(offset, inputStreamSet);
    202         }
    203     }
    204     if (mOutputStreamType) {
    205         mOutputStreamParam = &*args;
    206         mOutputStreamParam->setName("outputStreamSet");
    207     }
    208     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", mDoBlock, 0));
    209     return mDoBlock;
    210 }
    211 
    212 void KernelBuilder::setInstanceParameters(std::vector<ParameterBinding> parms) {
    213     mInstanceParameters = parms;
    214     mInstanceParametersOffset = mInternalState.size();
    215     for (auto binding : mInstanceParameters) {
    216         addInternalState(binding.parameterType, binding.parameterName);
    217     }
    218 }
    219 
    220 
    221 Function *  KernelBuilder::createInitMethod() {
    222     if (!mKernelStateType) {
    223         mKernelStateType = StructType::create(iBuilder->getContext(), mInternalState, mKernelName);
    224     }
    225     std::vector<Type *> initParameters = {PointerType::getUnqual(mKernelStateType)};
    226     for (auto binding : mInstanceParameters) {
    227         initParameters.push_back(binding.parameterType);
    228     }
    229     FunctionType * mInitFunctionType = FunctionType::get(iBuilder->getVoidTy(), initParameters, false);
    230     Function * mInitFunction = Function::Create(mInitFunctionType, GlobalValue::ExternalLinkage, mKernelName + "_Init", iBuilder->getModule());
    231     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", mInitFunction, 0));
    232 
    233     Function::arg_iterator args = mInitFunction->arg_begin();
    234     Value * self = &*(args++);
    235     self->setName("self");
    236     for (auto binding : mInstanceParameters) {
    237         Value * parm = &*(args++);
    238         parm->setName(binding.parameterName);
    239     }
    240 
    241     iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
    242     args = mInitFunction->arg_begin();
    243     args++;   // skip self argument.
    244     for (auto binding : mInstanceParameters) {
    245         Value * parm = &*(args++);
    246         setInternalStateInternal(self, binding.parameterName, parm);
    247     }
    248     iBuilder->CreateRetVoid();
    249     return mInitFunction;
     136    throw std::runtime_error("Method does not have parameter: " + paramName);
    250137}
    251138
    252139
    253140
    254 
    255 
    256 
    257 /** ------------------------------------------------------------------------------------------------------------- *
    258  * @brief finalize
    259  ** ------------------------------------------------------------------------------------------------------------- */
    260 void KernelBuilder::finalize() {
    261     // Finish the actual function
    262     Value * blockNo = getBlockNo();
    263     Value * value = iBuilder->CreateLoad(blockNo);
    264     value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
    265     iBuilder->CreateStore(value, blockNo);
    266     iBuilder->CreateRetVoid();
    267 
    268     mKernelStateParam = nullptr;
    269     mInputStreamParam.clear();
    270     mOutputStreamParam = nullptr;
    271     iBuilder->ClearInsertionPoint();
    272 }
    273 
    274 /** ------------------------------------------------------------------------------------------------------------- *
    275  * @brief instantiate
    276  *
    277  * Allocate and zero initialize the memory for this kernel and its output scalars and streams
    278  ** ------------------------------------------------------------------------------------------------------------- */
    279 Instance * KernelBuilder::instantiate(std::pair<Value *, unsigned> && inputStreamSet, const unsigned outputBufferSize) {
    280     AllocaInst * const kernelState = iBuilder->CreateAlloca(mKernelStateType);
    281     iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), kernelState);
    282     AllocaInst * outputStreamSets = nullptr;
    283     if (mOutputStreamType) {
    284         outputStreamSets = iBuilder->CreateAlloca(mOutputStreamType, iBuilder->getInt32(outputBufferSize));
    285     }
    286     return new Instance(this, kernelState, std::get<0>(inputStreamSet), std::get<1>(inputStreamSet), outputStreamSets, outputBufferSize);
    287 }
    288 
    289 /** ------------------------------------------------------------------------------------------------------------- *
    290  * @brief instantiate
    291  *
    292  * Generate a new instance of this kernel and call the default constructor to initialize it
    293  ** ------------------------------------------------------------------------------------------------------------- */
    294 Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {   
    295     AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType);
    296     unsigned i = 0;
    297     for (Value * inputStream : inputStreams) {
    298         Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
    299         iBuilder->CreateStore(inputStream, ptr);
    300     }
    301     return instantiate(std::make_pair(inputStruct, 0));
    302 }
    303 
    304 Value * KernelBuilder::getInputStreamParam(const unsigned streamOffset) const {
    305     const auto f = mInputStreamParam.find(streamOffset);
    306     if (LLVM_UNLIKELY(f == mInputStreamParam.end())) {
    307         throw std::runtime_error("Kernel compilation error: No input stream parameter for stream offset " + std::to_string(streamOffset));
    308     }
    309     return f->second;
    310 }
    311    
    312 llvm::Value * make_New(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args) {
    313     Module * m = iBuilder->getModule();
    314     Type * kernelType = m->getTypeByName(kernel_name);
    315     if (!kernelType) {
    316         throw std::runtime_error("Cannot find kernel type " + kernel_name);
    317     }
    318     Value * kernelInstance = iBuilder->CreateAlloca(kernelType);
    319     std::vector<Value *> init_args = {kernelInstance};
    320     for (auto a : args) {
    321         init_args.push_back(a);
    322     }
    323     //iBuilder->CreateStore(Constant::getNullValue(kernelType), kernelInstance);
    324     Function * initMethod = m->getFunction(kernel_name + "_Init");
    325     if (!initMethod) {
    326         //throw std::runtime_error("Cannot find " + kernel_name + "_Init");
    327         iBuilder->CreateStore(Constant::getNullValue(kernelType), kernelInstance);
    328         return kernelInstance;
    329     }
    330     iBuilder->CreateCall(initMethod, init_args);
    331     return kernelInstance;
    332 }
    333     llvm::Value * make_DoBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args) {
    334         Module * m = iBuilder->getModule();
    335         Function * doBlockMethod = m->getFunction(kernel_name + "_DoBlock");
    336         if (!doBlockMethod) {
    337             throw std::runtime_error("Cannot find " + kernel_name + "_DoBlock");
    338         }
    339         return iBuilder->CreateCall(doBlockMethod, args);
    340     }
    341    
    342     llvm::Value * make_FinalBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args) {
    343         Module * m = iBuilder->getModule();
    344         Function * finalBlockMethod = m->getFunction(kernel_name + "_FinalBlock");
    345         if (!finalBlockMethod) {
    346             throw std::runtime_error("Cannot find " + kernel_name + "_FinalBlock");
    347         }
    348         return iBuilder->CreateCall(finalBlockMethod, args);
    349     }
    350    
    351    
    352 
    353 } // end of namespace kernel
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5051 r5063  
    1 #ifndef KERNEL_H
    2 #define KERNEL_H
    31/*
    42 *  Copyright (c) 2016 International Characters.
     
    64 */
    75
    8 #include <string>
     6#ifndef KERNEL_BUILDER_H
     7#define KERNEL_BUILDER_H
     8
     9
     10#include "interface.h"
    911#include <vector>
     12#include <llvm/IR/Type.h>
     13#include <IDISA/idisa_builder.h>
    1014#include <boost/container/flat_map.hpp>
    11 #include <IDISA/idisa_builder.h>
    1215
    13 namespace llvm {
    14     class Value;
    15     class Module;
    16     class ExecutionEngine;
    17     class VectorType;
    18     class PointerType;
    19     class Constant;
    20     class FunctionType;
    21     class Function;
    22     class BasicBlock;
    23     class Type;
    24 }
    25 
    26 namespace pablo {
    27     class PabloAST;
    28     class PabloFunction;
    29 }
    30 
    31 template<typename T>
    32 struct disable_implicit_conversion {
    33     inline disable_implicit_conversion(T const value) : _value(value) { assert(_value); }
    34     inline disable_implicit_conversion(std::nullptr_t) = delete;
    35     inline disable_implicit_conversion(unsigned) = delete;
    36     operator T() const { return _value; }
    37     T operator-> () const { return _value; }
    38     T get() const { return _value; }
    39 private:
    40     T const  _value;
    41 };
    4216
    4317namespace kernel {
    44     struct ParameterBinding {
    45         llvm::Type * parameterType;
    46         std::string parameterName;
    47     };
    4818   
     19class KernelBuilder : public KernelInterface {
     20    using NameMap = boost::container::flat_map<std::string, llvm::ConstantInt *>;
     21
     22public:
     23    KernelBuilder(IDISA::IDISA_Builder * builder,
     24                    std::string kernelName,
     25                    std::vector<StreamSetBinding> stream_inputs,
     26                    std::vector<StreamSetBinding> stream_outputs,
     27                    std::vector<ScalarBinding> scalar_parameters,
     28                    std::vector<ScalarBinding> scalar_outputs,
     29                    std::vector<ScalarBinding> internal_scalars);
    4930   
     31    // Add an additional scalar field to the KernelState struct.
     32    // Must occur before any call to addKernelDeclarations or createKernelModule.
     33    void addScalar(llvm::Type * t, std::string scalarName);
    5034   
    51 class Instance;
    52 
    53 class KernelBuilder {
    54     friend class Instance;
    55     friend llvm::Function * generateScanWordRoutine(llvm::Module *, IDISA::IDISA_Builder *, unsigned, KernelBuilder *, bool);
    56     using InputStreamMap = boost::container::flat_map<unsigned, llvm::Value *>;
    57     using NameMap = boost::container::flat_map<std::string, llvm::ConstantInt *>;
    58 public:
    59 
    60     KernelBuilder(IDISA::IDISA_Builder * builder, std::string && name, const unsigned defaultBufferSize);
    61 
     35    void finalizeKernelStateType();
    6236   
    63     void setInstanceParameters(std::vector<ParameterBinding> binding);
    64 
    65     unsigned addInternalState(llvm::Type * const type);
    66     unsigned addInternalState(llvm::Type * const type, std::string name);
    67 
    68     void addInputStream(const unsigned fields);
    69     void addInputStream(const unsigned fields, std::string && name);
    70 
    71     unsigned addOutputStream(const unsigned fields);
    72 
     37    // Create a module for the kernel, including the kernel state type and
     38    // all required methods.  The init and accumulator output methods will be
     39    // defined, while the doBlock and finalBlock methods will initially be empty.
     40    //
     41    virtual std::unique_ptr<llvm::Module> createKernelModule();
    7342   
     43    // Generate Kernel to the current module.
     44    virtual void generateKernel();
    7445   
    75     llvm::Function * createInitMethod();
    76 
     46    // Add a FinalBlock method that simply calls DoBlock without additional
     47    // preparation.
     48    void addTrivialFinalBlockMethod(Module * m);
    7749   
    78     inline llvm::Function * prepareFunction() {
    79         return prepareFunction({0});
    80     }
    81 
    82     llvm::Function * prepareFunction(std::vector<unsigned> && inputStreamOffsets);
    83 
    84     inline llvm::Value * getInternalState(const std::string & name) {
    85         return getInternalStateInternal(mKernelStateParam, name);
    86     }
    87 
    88     inline void setInternalState(const std::string & name, llvm::Value * value) {
    89         setInternalStateInternal(mKernelStateParam, name, value);
    90     }
    91 
    92     inline llvm::Value * getInternalState(const unsigned index) {
    93         assert (index < mInternalState.size());
    94         return getInternalStateInternal(mKernelStateParam, iBuilder->getInt32(index));
    95     }
    96 
    97     inline llvm::Value * getInternalState(disable_implicit_conversion<llvm::Value *> const index) {
    98         return getInternalStateInternal(mKernelStateParam, index);
    99     }
    100 
    101     void setInternalState(const unsigned index, llvm::Value * value) {
    102         assert (index < mInternalState.size());
    103         setInternalStateInternal(mKernelStateParam, iBuilder->getInt32(index), value);
    104     }
    105 
    106     void setInternalState(disable_implicit_conversion<llvm::Value *> const index, llvm::Value * value) {
    107         setInternalStateInternal(mKernelStateParam, index, value);
    108     }
    109 
    110     inline llvm::Type * getKernelStateType() const{
    111         return mKernelStateType;
    112     }
    113 
    114     inline llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0) {
    115         return getInputStreamInternal(getInputStreamParam(streamOffset), iBuilder->getInt32(index));
    116     }
    117 
    118     inline llvm::Value * getInputStream(disable_implicit_conversion<llvm::Value *> index, const unsigned streamOffset = 0) {
    119         return getInputStreamInternal(getInputStreamParam(streamOffset), index);
    120     }
    121 
    122     inline unsigned getNumOfInputStreams() const {
    123         return mInputStream.size();
    124     }
    125 
    126     inline llvm::Type * getInputStreamType() const {
    127         return mInputStreamType;
    128     }
    129 
    130     inline llvm::Value * getOutputStream(const unsigned index) {
    131         assert (index < getNumOfOutputStreams());
    132         return getOutputStreamInternal(mOutputStreamParam, iBuilder->getInt32(index));
    133     }
    134 
    135     inline llvm::Value * getOutputStream(disable_implicit_conversion<llvm::Value *> const index) {
    136         return getOutputStreamInternal(mOutputStreamParam, index);
    137     }
    138 
    139     inline unsigned getNumOfOutputStreams() const {
    140         return mOutputStream.size();
    141     }
    142 
    143     inline llvm::Type * getOutputStreamType() const {
    144         return mOutputStreamType;
    145     }
    146 
    147     inline llvm::Value * getBlockNo() {
    148         return getBlockNoInternal(mKernelStateParam);
    149     }
    150 
    151     unsigned getDefaultBufferSize() const;
    152 
    153     void finalize();
    154 
    155     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStreamSet) {
    156         return instantiate(std::move(inputStreamSet), getDefaultBufferSize());
    157     }
    158 
    159     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStreamSet, const unsigned outputBufferSize);
    160 
    161     kernel::Instance * instantiate(llvm::Value * const inputStream) {
    162         return instantiate(std::make_pair(inputStream, 0));
    163     }
    164 
    165     kernel::Instance * instantiate(std::initializer_list<llvm::Value *> inputStreams);
    166 
    167     llvm::Value * getKernelState() const;
    168 
    169     llvm::Function * getDoBlockFunction() const;
     50    // Run-time access of Kernel State and parameters of methods for
     51    // use in implementing kernels.
     52   
     53    // Get the index of a named scalar field within the kernel state struct.
     54    llvm::Value * getScalarIndex(std::string);
     55   
     56    // Get the value of a scalar field for a given instance.
     57    llvm::Value * getScalarField(llvm::Value * self, std::string fieldName);
     58   
     59    // Set the value of a scalar field for a given instance.
     60    void setScalarField(llvm::Value * self, std::string fieldName, llvm::Value * newFieldVal);
     61   
     62    // Get a parameter by name.
     63    llvm::Value * getParameter(llvm::Function * f, std::string paramName);
    17064
    17165protected:
    17266
    173     Type * packDataTypes(const std::vector<llvm::Type *> & types);
    174 
    175     llvm::Value * getInputStreamInternal(llvm::Value * const inputStreamSet, disable_implicit_conversion<llvm::Value *> index);
    176 
    177     llvm::Value * getInternalStateInternal(llvm::Value * const kernelState, const std::string & name);
    178 
    179     void setInternalStateInternal(llvm::Value * const kernelState, const std::string & name, llvm::Value * const value);
    180 
    181     llvm::Value * getInternalStateInternal(llvm::Value * const kernelState, disable_implicit_conversion<llvm::Value *> index);
    182 
    183     void setInternalStateInternal(llvm::Value * const kernelState, const unsigned index, llvm::Value * const value);
    184 
    185     void setInternalStateInternal(llvm::Value * const kernelState, disable_implicit_conversion<llvm::Value *> index, llvm::Value * const value);
    186 
    187     llvm::Value * getOutputStreamInternal(llvm::Value * const outputStreamSet, disable_implicit_conversion<llvm::Value *> index);
    188 
    189     llvm::Value * getBlockNoInternal(llvm::Value * const instance) {
    190         return getInternalStateInternal(instance, mBlockNoIndex);
    191     }
    192 
    193     llvm::Function * getOutputStreamSetFunction() const;
    194 
    195     llvm::Value * getInputStreamParam(const unsigned streamOffset) const;
    196 
    197     const std::vector<unsigned> & getInputStreamOffsets() const {
    198         return mInputStreamOffsets;
    199     }
    200 
    201 private:
    202 
    203     IDISA::IDISA_Builder * const        iBuilder;
    204     const std::string                   mKernelName;
    205     unsigned                            mDefaultBufferSize;
    206 
    207     llvm::Type *                        mBitBlockType;
    208     llvm::ConstantInt *                 mBlockNoIndex;
    209     llvm::Function *                                    mConstructor;
    210     llvm::Function *                                    mDoBlock;
    211 
    212     std::vector<ParameterBinding>           mInstanceParameters;
    213     unsigned                            mInstanceParametersOffset;
    214    
    215     llvm::Type *                        mKernelStateType;
    216     llvm::Type *                        mInputStreamType;
    217     llvm::Type *                        mOutputStreamType;
    218 
    219     llvm::Value *                       mKernelStateParam;
    220     InputStreamMap                      mInputStreamParam;
    221     llvm::Value *                       mOutputStreamParam;
    222 
    223     std::vector<std::string>            mInputScalarName;   
    224     std::vector<llvm::Type *>           mInputStream;
    225     std::vector<std::string>            mInputStreamName;
    226     std::vector<unsigned>               mInputStreamOffsets;
    227     std::vector<llvm::Type *>           mOutputStream;
    228     std::vector<llvm::Type *>                   mInternalState;
    229     NameMap                             mInternalStateNameMap;
     67    std::vector<llvm::Type *>  mKernelFields;
     68    NameMap                    mInternalStateNameMap;
    23069};
    231 
    232 inline llvm::Function * KernelBuilder::getDoBlockFunction() const {
    233     return mDoBlock;
    23470}
    235 
    236 inline llvm::Value * KernelBuilder::getKernelState() const {
    237     return mKernelStateParam;
    238 }
    239 
    240 inline unsigned KernelBuilder::getDefaultBufferSize() const {
    241     return mDefaultBufferSize;
    242 }
    243    
    244 llvm::Value * make_New(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args);
    245 
    246     llvm::Value * make_DoBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args);
    247     llvm::Value * make_FinalBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args);
    248    
    249 } // end of namespace kernel
    250 
    251 #endif // KERNEL_H
     71#endif
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5052 r5063  
    1111#include <kernels/scanmatchgen.h>
    1212#include <kernels/s2p_kernel.h>
    13 #include <kernels/instance.h>
    1413
    1514#include <pablo/function.h>
    16 #include <pablo/pablo_compiler.h>
     15#include <pablo/pablo_kernel.h>
    1716#include <pablo/pablo_toolchain.h>
    1817
    1918#include <llvm/IR/Intrinsics.h>
     19#include "llvm/Support/SourceMgr.h"
     20#include "llvm/IRReader/IRReader.h"
     21#include "llvm/Linker/Linker.h"
     22
     23
    2024
    2125using namespace pablo;
     
    3135
    3236PipelineBuilder::~PipelineBuilder() {
    33     delete mS2PKernel;
    34     delete mICgrepKernel;
    35     delete mScanMatchKernel;
    36 }
    37 
    38 void PipelineBuilder::CreateKernels(PabloFunction * function, bool UTF_16, bool isNameExpression){
    39     mS2PKernel = new KernelBuilder(iBuilder, "s2p", codegen::SegmentSize);
    40     mICgrepKernel = new KernelBuilder(iBuilder, "icgrep", codegen::SegmentSize);
    41     mScanMatchKernel = new KernelBuilder(iBuilder, "scanMatch", codegen::SegmentSize);
    42     if (UTF_16) {
    43         generateS2P_16Kernel(mMod, iBuilder, mS2PKernel);
    44     }
    45     else {
    46         generateS2PKernel(mMod, iBuilder, mS2PKernel);
    47     }
    48     generateScanMatch(mMod, iBuilder, 64, mScanMatchKernel, isNameExpression);
    49     pablo_function_passes(function);
    50     PabloCompiler pablo_compiler(mMod, iBuilder);
    51     try {
    52         pablo_compiler.setKernel(mICgrepKernel);
    53         pablo_compiler.compile(function);
    54         delete function;
    55         releaseSlabAllocatorMemory();
    56     } catch (std::runtime_error e) {
    57         delete function;
    58         releaseSlabAllocatorMemory();
    59         std::cerr << "Runtime error: " << e.what() << std::endl;
    60         exit(1);
    61     }
    6237}
    6338
     
    6742}
    6843
    69 inline Value * Cal_Count(Instance * icGrepInstance, IDISA::IDISA_Builder * iBuilder) {
    70     Value * match = icGrepInstance->getOutputStream(0, 0);
    71     Value * matches = iBuilder->CreateLoad(match, false, "match");
     44inline Value * Cal_Count(Value * match_ptr, IDISA::IDISA_Builder * iBuilder) {
     45    Value * matches = iBuilder->CreateLoad(match_ptr, false, "match");
    7246    return generatePopcount(iBuilder, matches);
    7347}
    7448
    75 Function * PipelineBuilder::ExecuteKernels(bool CountOnly, bool UTF_16) {
     49Function * PipelineBuilder::ExecuteKernels(PabloFunction * function, bool isNameExpression, bool CountOnly, bool UTF_16) {
     50   
     51    s2pKernel  s2pk(iBuilder);
     52    scanMatchKernel scanMatchK(iBuilder, 64, false);
     53
     54    s2pk.generateKernel();
     55    scanMatchK.generateKernel();
     56   
     57    //std::unique_ptr<Module> s2pM = s2pk.createKernelModule();
     58    //std::unique_ptr<Module> scanMatchM = scanMatchK.createKernelModule();
     59   
     60    //s2pk.addKernelDeclarations(mMod);
     61    //scanMatchK.addKernelDeclarations(mMod);
     62
     63    pablo_function_passes(function);
     64    PabloKernel  icgrepK(iBuilder, "icgrep", function, {"matchedLineCount"});
     65    icgrepK.prepareKernel();
     66    icgrepK.generateKernel();
     67
     68    //std::unique_ptr<Module> icgrepM = icgrepK.createKernelModule();
     69    //icgrepK.addKernelDeclarations(mMod);
     70   
    7671    Type * const int64ty = iBuilder->getInt64Ty();
    7772    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    78     Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, (UTF_16 ? 16 : 8))})), 1), 0);
     73    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, (UTF_16 ? 16 : 8)), 1), 0);
    7974    Type * const resultTy = CountOnly ? int64ty : iBuilder->getVoidTy();
    8075    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", resultTy, inputType, int64ty, int64ty, nullptr));
     
    9085
    9186    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
    92 
    93 
    9487    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    95     BasicBlock * segmentCondBlock = nullptr;
    96     BasicBlock * segmentBodyBlock = nullptr;
    97     const unsigned segmentSize = codegen::SegmentSize;
    98     if (segmentSize > 1) {
    99         segmentCondBlock = BasicBlock::Create(mMod->getContext(), "segmentCond", main, 0);
    100         segmentBodyBlock = BasicBlock::Create(mMod->getContext(), "segmentBody", main, 0);
    101     }
    10288    BasicBlock * fullCondBlock = BasicBlock::Create(mMod->getContext(), "fullCond", main, 0);
    10389    BasicBlock * fullBodyBlock = BasicBlock::Create(mMod->getContext(), "fullBody", main, 0);
    10490    BasicBlock * finalBlock = BasicBlock::Create(mMod->getContext(), "final", main, 0);
    105     BasicBlock * finalPartialBlock = BasicBlock::Create(mMod->getContext(), "partial", main, 0);
    106     BasicBlock * finalEmptyBlock = BasicBlock::Create(mMod->getContext(), "empty", main, 0);
    107     BasicBlock * exitBlock = BasicBlock::Create(mMod->getContext(), "exit", main, 0);
    10891
    109     Value * count = nullptr;
    110     if (CountOnly) {
    111         count = iBuilder->CreateAlloca(mBitBlockType, nullptr, "count");
    112         iBuilder->CreateStore(ConstantInt::getNullValue(mBitBlockType), count);
     92   
     93    const unsigned segmentSize = 1;// or codegen::SegmentSize
     94   
     95    StreamSetBuffer ByteStream(iBuilder, StreamSetType(1, (UTF_16 ? 16 : 8)), 0);
     96    StreamSetBuffer BasisBits(iBuilder, StreamSetType((UTF_16 ? 16 : 8), 1), segmentSize);
     97    StreamSetBuffer MatchResults(iBuilder, StreamSetType(2, 1), segmentSize);
     98   
     99    ByteStream.setStreamSetBuffer(inputStream);
     100    BasisBits.allocateBuffer();
     101    MatchResults.allocateBuffer();
     102
     103    Value * initialBufferSize = bufferSize;
     104    Value * initialBlockNo = iBuilder->getInt64(0);
     105    BasicBlock * initialBlock = entryBlock;
     106   
     107    Value * s2pInstance = s2pk.createInstance({});
     108    Value * icgrepInstance = icgrepK.createInstance({});
     109    Value * scanMatchInstance = nullptr;
     110    if (!CountOnly) {
     111        scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), bufferSize, fileIdx});
    113112    }
    114 
    115     Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
    116     Instance * icGrepInstance = mICgrepKernel->instantiate(s2pInstance->getOutputStreamBuffer());
    117     Instance * scanMatchInstance = nullptr;
     113    iBuilder->CreateBr(fullCondBlock);
    118114   
    119     if (!CountOnly) {
    120         scanMatchInstance = mScanMatchKernel->instantiate(icGrepInstance->getOutputStreamBuffer());
    121         scanMatchInstance->setInternalState("FileBuf", iBuilder->CreateBitCast(inputStream, int8PtrTy));
    122         scanMatchInstance->setInternalState("FileSize", bufferSize);
    123         scanMatchInstance->setInternalState("FileIdx", fileIdx);
    124     }
    125     Value * initialBufferSize = nullptr;
    126     BasicBlock * initialBlock = nullptr;
    127 
    128     if (segmentSize > 1) {
    129         iBuilder->CreateBr(segmentCondBlock);
    130         iBuilder->SetInsertPoint(segmentCondBlock);
    131         PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    132         remainingBytes->addIncoming(bufferSize, entryBlock);
    133         Constant * const step = ConstantInt::get(int64ty, mBlockSize * segmentSize * (UTF_16 ? 2 : 1));
    134         Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    135         iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock);
    136         iBuilder->SetInsertPoint(segmentBodyBlock);
    137         for (unsigned i = 0; i < segmentSize; ++i) {
    138             s2pInstance->CreateDoBlockCall();
    139         }
    140         for (unsigned i = 0; i < segmentSize; ++i) {
    141             Value * match = (icGrepInstance->getOutputStream(0, 0));
    142             icGrepInstance->CreateDoBlockCall();
    143             Value * temp = iBuilder->CreateLoad(match);
    144             Value * matches = iBuilder->CreateBitCast(temp, iBuilder->getIntNTy(mBlockSize));
    145             Value * popcount_for = generatePopcount(iBuilder, matches);
    146             if(CountOnly){
    147                 Value * temp_count = iBuilder->CreateLoad(count);
    148                 Value * prev_count = iBuilder->CreateBitCast(temp_count, iBuilder->getIntNTy(mBlockSize));
    149                 Value * add_for = iBuilder->CreateAdd(prev_count, popcount_for);
    150                 Value * add = iBuilder->CreateBitCast(add_for, mBitBlockType);
    151                 iBuilder->CreateStore(add, count);
    152             }
    153         }
    154         if (!CountOnly) {
    155             for (unsigned i = 0; i < segmentSize; ++i) {
    156                 scanMatchInstance->CreateDoBlockCall();
    157             }
    158         }
    159         remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
    160         iBuilder->CreateBr(segmentCondBlock);
    161         initialBufferSize = remainingBytes;
    162         initialBlock = segmentCondBlock;
    163     } else {
    164         initialBufferSize = bufferSize;
    165         initialBlock = entryBlock;
    166         iBuilder->CreateBr(fullCondBlock);
    167     }
    168 
    169115    iBuilder->SetInsertPoint(fullCondBlock);
    170116    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    171117    remainingBytes->addIncoming(initialBufferSize, initialBlock);
    172 
     118    PHINode * blockNo = iBuilder->CreatePHI(int64ty, 2, "blockNo");
     119    blockNo->addIncoming(initialBlockNo, initialBlock);
     120   
    173121    Constant * const step = ConstantInt::get(int64ty, mBlockSize * (UTF_16 ? 2 : 1));
    174122    Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    175123    iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
    176124
     125    // Full Block Pipeline loop
    177126    iBuilder->SetInsertPoint(fullBodyBlock);
    178     s2pInstance->CreateDoBlockCall();
    179     icGrepInstance->CreateDoBlockCall();
    180     if (CountOnly) {
    181         Value * popcount = Cal_Count(icGrepInstance, iBuilder);
    182         Value * temp_count = iBuilder->CreateLoad(count);
    183         Value * add = iBuilder->CreateAdd(temp_count, popcount);
    184         iBuilder->CreateStore(add, count);
    185     } else {
    186         scanMatchInstance->CreateDoBlockCall();
     127   
     128    Value * byteStreamPtr = ByteStream.getBlockPointer(blockNo);
     129    Value * basisBitsPtr = BasisBits.getBlockPointer(blockNo);
     130    Value * matchResultsPtr = MatchResults.getBlockPointer(blockNo);
     131    s2pk.createDoBlockCall(s2pInstance, {byteStreamPtr, basisBitsPtr});
     132    icgrepK.createDoBlockCall(icgrepInstance, {basisBitsPtr, matchResultsPtr});
     133    if (!CountOnly) {
     134
     135        scanMatchK.createDoBlockCall(scanMatchInstance, {matchResultsPtr});
    187136    }
    188 
    189137    remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), fullBodyBlock);
     138    blockNo->addIncoming(iBuilder->CreateAdd(blockNo, iBuilder->getInt64(1)), fullBodyBlock);
    190139    iBuilder->CreateBr(fullCondBlock);
    191140
    192141    iBuilder->SetInsertPoint(finalBlock);
    193     Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(int64ty, 0));
    194     iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
     142    byteStreamPtr = ByteStream.getBlockPointer(blockNo);
     143    basisBitsPtr = BasisBits.getBlockPointer(blockNo);
     144    matchResultsPtr = MatchResults.getBlockPointer(blockNo);
     145    s2pk.createFinalBlockCall(s2pInstance, remainingBytes, {byteStreamPtr, basisBitsPtr});
     146    icgrepK.createFinalBlockCall(icgrepInstance, remainingBytes, {basisBitsPtr, matchResultsPtr});
     147    if (CountOnly) {
     148        Value * matchCount = icgrepK.createGetAccumulatorCall(icgrepInstance, "matchedLineCount");
     149        iBuilder->CreateRet(matchCount);
     150    }
     151    else {
     152        scanMatchK.createFinalBlockCall(scanMatchInstance, remainingBytes, {matchResultsPtr});
     153        iBuilder->CreateRetVoid();
     154    }
     155   
     156    //Linker L(*mMod);
     157    //L.linkInModule(std::move(s2pM));
     158    //L.linkInModule(std::move(scanMatchM));
     159    //L.linkInModule(std::move(icgrepM));
    195160
    196161
    197     iBuilder->SetInsertPoint(finalPartialBlock);
    198     s2pInstance->CreateDoBlockCall();
    199     iBuilder->CreateBr(exitBlock);
    200 
    201     iBuilder->SetInsertPoint(finalEmptyBlock);
    202     s2pInstance->clearOutputStreamSet();
    203     iBuilder->CreateBr(exitBlock);
    204 
    205     iBuilder->SetInsertPoint(exitBlock);
    206 
    207     Value * remainingByte = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(mBlockSize));
    208     Value * remainingUnit = iBuilder->CreateLShr(remainingByte, ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1));
    209     Value * EOFmark = iBuilder->CreateShl(ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1), UTF_16 ? remainingUnit : remainingByte);
    210     icGrepInstance->setInternalState("EOFmark", iBuilder->CreateBitCast(EOFmark, mBitBlockType));
    211 
    212     icGrepInstance->CreateDoBlockCall();
    213     if (CountOnly) {
    214         Value * popcount1 = Cal_Count(icGrepInstance, iBuilder);
    215         Value * temp_count1 = iBuilder->CreateLoad(count);
    216         Value * result = iBuilder->CreateAdd(temp_count1, popcount1);
    217         for (unsigned width = (mBlockSize / 64); width > 1; width /= 2) {
    218             std::vector<Constant *> mask(width / 2);
    219             for (unsigned i = 0; i < (width / 2); ++i) {
    220                 mask[i] = iBuilder->getInt32(i);
    221             }
    222             Value * const undef = UndefValue::get(VectorType::get(int64ty, width));
    223             Value * const lh = iBuilder->CreateShuffleVector(result, undef, ConstantVector::get(mask));
    224             for (unsigned i = 0; i < (width / 2); ++i) {
    225                 mask[i] = iBuilder->getInt32(i + (width / 2));
    226             }
    227             Value * const rh = iBuilder->CreateShuffleVector(result, undef, ConstantVector::get(mask));
    228             result = iBuilder->CreateAdd(lh, rh);
    229         }
    230         iBuilder->CreateRet(iBuilder->CreateExtractElement(result, iBuilder->getInt32(0)));
    231     } else {
    232         scanMatchInstance->CreateDoBlockCall();
    233         iBuilder->CreateRetVoid();
    234     }
    235162    return main;
    236163}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.h

    r5046 r5063  
    77
    88#include <IDISA/idisa_builder.h>
    9 #include "kernel.h"
     9#include <kernels/kernel.h>
    1010
    1111namespace llvm {
     
    1818    class PabloFunction;
    1919    class PabloBlock;
     20    class PabloKernel;
    2021}
    2122
     
    3031    ~PipelineBuilder();
    3132
    32     void CreateKernels(pablo::PabloFunction * function, bool UTF_16, bool isNameExpression);
    33     llvm::Function * ExecuteKernels(bool CountOnly, bool UTF_16);
     33    llvm::Function * ExecuteKernels(pablo::PabloFunction * function, bool isNameExpression, bool CountOnly, bool UTF_16);
    3434
    3535private:
    3636    llvm::Module *                      mMod;
    3737    IDISA::IDISA_Builder *              iBuilder;
    38     KernelBuilder *                     mS2PKernel;
    39     KernelBuilder *                     mICgrepKernel;   
    40     KernelBuilder *                     mScanMatchKernel;
    4138    llvm::Type *                        mBitBlockType;
    4239    int                                 mBlockSize;
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5051 r5063  
    66#include <kernels/kernel.h>
    77#include <IDISA/idisa_builder.h>
     8#include <llvm/Support/raw_ostream.h>
    89
    910namespace kernel {
     11using namespace llvm;
    1012
    1113const int PACK_LANES = 1;
     
    5355}
    5456
    55 void s2p(IDISA::IDISA_Builder * iBuilder, Value * input, Value * output[]) {
    56     Value * bit[8];
    57     for (unsigned i = 0; i < 8; i++) {
    58         bit[i] = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
     57/* Alternative transposition model, but small field width packs are problematic. */
     58#if 0
     59void s2p_ideal(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
     60    Value * hi_nybble[4];
     61    Value * lo_nybble[4];
     62    for (unsigned i = 0; i<4; i++) {
     63        Value * s0 = input[2*i];
     64        Value * s1 = input[2*i+1];
     65        hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
     66        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
    5967    }
    60     s2p(iBuilder, bit, output);
     68    Value * pair01[2];
     69    Value * pair23[2];
     70    Value * pair45[2];
     71    Value * pair67[2];
     72    for (unsigned i = 0; i<2; i++) {
     73        pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     74        pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     75        pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     76        pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     77    }
     78    output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
     79    output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
     80    output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
     81    output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
     82    output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
     83    output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
     84    output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
     85    output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
    6186}
     87#endif
     88   
     89   
     90#if 0
    6291
    63 void generateS2PKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    64     kBuilder->addInputStream(8, "byte_pack");
    65     for(unsigned i = 0; i < 8; ++i) {
    66         kBuilder->addOutputStream(1);
    67     }
    68     kBuilder->prepareFunction();
    69     Value * output[8];
    70 
    71     Value * ptr = kBuilder->getInputStream(0);
    72     //iBuilder->CallPrintInt("ptr", iBuilder->CreatePtrToInt(ptr, iBuilder->getInt64Ty()));
    73     s2p(iBuilder, ptr, output);
    74     for (unsigned j = 0; j < 8; ++j) {
    75         //iBuilder->CallPrintRegister("bit" + std::to_string(j + 1), output[j]);
    76         iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    77     }
    78     kBuilder->finalize();
    79 }
    8092
    8193void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     
    105117    kBuilder->finalize();
    106118}
    107        
    108 void generateS2P_idealKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    109     kBuilder->addInputStream(8, "byte_pack");
    110     for(unsigned i = 0; i < 8; ++i) {
    111         kBuilder->addOutputStream(1);
    112     }
    113     kBuilder->prepareFunction();
    114     Value * input = kBuilder->getInputStream(0);
    115     Value * output[8];
    116     Value * hi_nybble[4];
    117     Value * lo_nybble[4];
    118     for (unsigned i = 0; i<4; i++) {
    119         Value * s0 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
    120         Value * s1 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
    121         hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
    122         lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
    123     }
    124     Value * pair01[2];
    125     Value * pair23[2];
    126     Value * pair45[2];
    127     Value * pair67[2];
    128     for (unsigned i = 0; i<2; i++) {
    129         pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    130         pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    131         pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    132         pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    133     }
    134     output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
    135     output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
    136     output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
    137     output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
    138     output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
    139     output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
    140     output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
    141     output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
    142 
    143     s2p(iBuilder, kBuilder->getInputStream(0), output);
    144     for (unsigned j = 0; j < 8; ++j) {
    145         iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    146     }
    147     kBuilder->finalize();
    148 }
    149119   
    150 std::unique_ptr<llvm::Module> s2pKernel::createKernelModule() {
    151     std::unique_ptr<llvm::Module> theModule = KernelInterface::createKernelModule();
     120#endif
    152121   
    153     /***********************
    154      WARNING iBuilder has a different module than theModule at this point.
    155     ***********************/
    156     Function * doBlockFunction = theModule.get()->getFunction(mKernelName + "_DoBlock");
    157    
    158     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    159    
    160     Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream");
    161     Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");
    162     Value * s_bytepack[8];
    163     for (unsigned i = 0; i < 8; i++) {
    164         s_bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
    165     }
    166     Value * p_bitblock[8];
    167     s2p(iBuilder, s_bytepack, p_bitblock);
    168     for (unsigned j = 0; j < 8; ++j) {
    169         iBuilder->CreateBlockAlignedStore(p_bitblock[j], basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    170     }
    171     iBuilder->CreateRetVoid();
    172 
     122void s2pKernel::generateFinalBlockMethod() {
    173123    /* Now the prepare the s2p final block function:
    174124     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
    175125     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
    176126     */
    177     Function * finalBlockFunction = theModule.get()->getFunction(mKernelName + "_FinalBlock");
     127    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     128    Module * m = iBuilder->getModule();
     129    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     130    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    178131    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    179 
     132   
    180133    Value * self = getParameter(finalBlockFunction, "self");
    181134    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
    182     byteStreamBlock_ptr = getParameter(finalBlockFunction, "byteStream");
    183     basisBitsBlock_ptr = getParameter(finalBlockFunction, "basisBits");
     135    Value * byteStreamBlock_ptr = getParameter(finalBlockFunction, "byteStream");
     136    Value * basisBitsBlock_ptr = getParameter(finalBlockFunction, "basisBits");
    184137   
    185138    BasicBlock * finalPartialBlock = BasicBlock::Create(iBuilder->getContext(), "partial", finalBlockFunction, 0);
     
    200153    iBuilder->SetInsertPoint(exitBlock);
    201154    iBuilder->CreateRetVoid();
     155    iBuilder->restoreIP(savePoint);
     156}
    202157
    203     return theModule;
     158void s2pKernel::generateKernel() {
     159    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     160    if (mKernelStateType == nullptr) finalizeKernelStateType();
     161    KernelBuilder::generateKernel();
     162    generateFinalBlockMethod();
     163
     164    Module * m = iBuilder->getModule();
     165    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     166   
     167    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     168   
     169    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream");
     170    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");
     171    Value * s_bytepack[8];
     172    for (unsigned i = 0; i < 8; i++) {
     173        s_bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     174    }
     175    Value * p_bitblock[8];
     176    s2p(iBuilder, s_bytepack, p_bitblock);
     177    for (unsigned j = 0; j < 8; ++j) {
     178        iBuilder->CreateBlockAlignedStore(p_bitblock[j], basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     179    }
     180    iBuilder->CreateRetVoid();
     181    iBuilder->restoreIP(savePoint);
    204182}
    205183
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5051 r5063  
    88#include "streamset.h"
    99#include "interface.h"
     10#include "kernel.h"
    1011
    1112namespace llvm { class Module; }
     
    1718class KernelBuilder;
    1819
    19 void generateS2PKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    20 void generateS2P_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    21 void generateS2P_idealKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     20//void generateS2P_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    2221
    2322
    2423
    25 class s2pKernel : public KernelInterface {
     24class s2pKernel : public KernelBuilder {
    2625public:
    2726    s2pKernel(IDISA::IDISA_Builder * iBuilder) :
    28     KernelInterface(iBuilder, "s2p",
    29                     {StreamSetBinding{StreamSetType(1, 8), "byteStream"}},
    30                     {StreamSetBinding{StreamSetType(8, 1), "basisBits"}},
    31                     {}, {}, {}) {}
     27    KernelBuilder(iBuilder, "s2p",
     28                  {StreamSetBinding{StreamSetType(1, 8), "byteStream"}},
     29                  {StreamSetBinding{StreamSetType(8, 1), "basisBits"}},
     30                  {}, {}, {}) {}
    3231   
    33     std::unique_ptr<llvm::Module> createKernelModule() override;
     32    void generateFinalBlockMethod();
     33    void generateKernel() override;
     34   
     35};
    3436
    35 };
     37   
     38
    3639}
    3740#endif
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5056 r5063  
    3939}
    4040
    41 Function * generateScanWordRoutine(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanwordBitWidth, KernelBuilder * const kBuilder, bool isNameExpression) {
    42 
    43     Function * function = m->getFunction("scan_matches_in_scanword");
    44     if (LLVM_UNLIKELY(function != nullptr)) {
    45         return function;
    46     }
    47 
    48     LLVMContext & ctxt = m->getContext();
    49     Type * T = iBuilder->getIntNTy(scanwordBitWidth);
    50     Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    51     Type * returnType = StructType::get(ctxt, std::vector<Type *>({T, T}));
    52     FunctionType * functionType = FunctionType::get(returnType, std::vector<Type *>({PointerType::get(kBuilder->getKernelStateType(), 0), T, T, T, T, T}), false);
    53 
    54     SmallVector<AttributeSet, 6> Attrs;
    55     Attrs.push_back(AttributeSet::get(ctxt, ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
    56     Attrs.push_back(AttributeSet::get(ctxt, 1, std::vector<Attribute::AttrKind>({})));
    57     Attrs.push_back(AttributeSet::get(ctxt, 2, std::vector<Attribute::AttrKind>({})));
    58     Attrs.push_back(AttributeSet::get(ctxt, 3, std::vector<Attribute::AttrKind>({})));
    59     Attrs.push_back(AttributeSet::get(ctxt, 4, std::vector<Attribute::AttrKind>({})));
    60     Attrs.push_back(AttributeSet::get(ctxt, 5, std::vector<Attribute::AttrKind>({})));
    61     AttributeSet AttrSet = AttributeSet::get(ctxt, Attrs);
    62 
    63     function = Function::Create(functionType, GlobalValue::ExternalLinkage, "scan_matches_in_scanword", m);
    64     function->setCallingConv(CallingConv::C);
    65     function->setAttributes(AttrSet);
    66     function->addFnAttr(llvm::Attribute::AlwaysInline);
    67 
    68     Function::arg_iterator args = function->arg_begin();
    69     Value * instance = &*(args++);
    70     instance->setName("this");
    71     Value * matches_input_parm = &*(args++);
    72     matches_input_parm->setName("matches");
    73     Value * record_breaks_input_parm = &*(args++);
    74     record_breaks_input_parm->setName("breaks");
    75     Value * scanwordPos = &*(args++);
    76     scanwordPos->setName("scanwordPos");
    77     Value * recordStart_input_parm = &*(args++);
    78     recordStart_input_parm->setName("pendingLineStart");
    79     Value * recordNum_input_parm = &*(args++);
    80     recordNum_input_parm->setName("lineNum");
    81 
    82     Constant * matchProcessor;
    83     if (isNameExpression) {
    84         matchProcessor = m->getOrInsertFunction("insert_codepoints", Type::getVoidTy(ctxt), T, T, T, S, nullptr);
    85     } else {
    86         matchProcessor = m->getOrInsertFunction("wrapped_report_match", Type::getVoidTy(ctxt), T, T, T, S, T, T, nullptr);
    87     }
    88     iBuilder->SetInsertPoint(BasicBlock::Create(ctxt, "entry", function,0));
    89 
    90     BasicBlock * entry_block = iBuilder->GetInsertBlock();
    91     BasicBlock * matches_test_block = BasicBlock::Create(ctxt, "matches_test_block", function, 0);
    92     BasicBlock * process_matches_loop_entry = BasicBlock::Create(ctxt, "process_matches_loop", function, 0);
    93     BasicBlock * prior_breaks_block = BasicBlock::Create(ctxt, "prior_breaks_block", function, 0);
    94     BasicBlock * loop_final_block = BasicBlock::Create(ctxt, "loop_final_block", function, 0);
    95     BasicBlock * matches_done_block = BasicBlock::Create(ctxt, "matches_done_block", function, 0);
    96     BasicBlock * remaining_breaks_block = BasicBlock::Create(ctxt, "remaining_breaks_block", function, 0);
    97     BasicBlock * return_block = BasicBlock::Create(ctxt, "return_block", function, 0);
    98 
    99 
    100     // The match scanner works with a loop involving four variables:
    101     // (a) the bit stream scanword of matches marking the ends of selected records,
    102     // (b) the bit stream scanword of record_breaks marking the ends of all records,
    103     // (c) the integer lastRecordNum indicating the number of records processed so far,
    104     // (d) the index lastRecordStart indicating the file position of the last record.
    105     // We set up a loop structure, in which a set of 4 phi nodes initialize these
    106     // variables from either the input to the scanner or the computed values within
    107     // the loop body.
    108 
    109 
    110     iBuilder->CreateBr(matches_test_block);
    111 
    112     // LOOP Test Block
    113     iBuilder->SetInsertPoint(matches_test_block);
    114     PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
    115     PHINode * record_breaks_phi = iBuilder->CreatePHI(T, 2, "record_breaks");
    116     PHINode * recordNum_phi = iBuilder->CreatePHI(T, 2, "recordNum");
    117     PHINode * recordStart_phi = iBuilder->CreatePHI(T, 2, "recordStart");
    118     matches_phi->addIncoming(matches_input_parm, entry_block);
    119     record_breaks_phi->addIncoming(record_breaks_input_parm, entry_block);
    120     recordNum_phi->addIncoming(recordNum_input_parm, entry_block);
    121     recordStart_phi->addIncoming(recordStart_input_parm, entry_block);
    122     Value * have_matches_cond = iBuilder->CreateICmpNE(matches_phi, ConstantInt::get(T, 0));
    123     iBuilder->CreateCondBr(have_matches_cond, process_matches_loop_entry, matches_done_block);
    124 
    125     // LOOP BODY
    126     // The loop body is entered if we have more matches to process.
    127     iBuilder->SetInsertPoint(process_matches_loop_entry);
    128     Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, matches_phi), record_breaks_phi);
    129     // Within the loop we have a conditional block that is executed if there are any prior
    130     // record breaks.
    131     Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::get(T, 0));
    132     iBuilder->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
    133 
    134     // PRIOR_BREAKS_BLOCK
    135     // If there are prior breaks, we count them and compute the record start position.
    136     iBuilder->SetInsertPoint(prior_breaks_block);
    137     Value * matchRecordNum = iBuilder->CreateAdd(generatePopcount(iBuilder, prior_breaks), recordNum_phi);
    138     Value * reverseDistance = generateCountReverseZeroes(iBuilder, prior_breaks);
    139     Value * width = ConstantInt::get(T, scanwordBitWidth);
    140     Value * matchRecordStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseDistance));
    141     iBuilder->CreateBr(loop_final_block);
    142 
    143     // LOOP FINAL BLOCK
    144     // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
    145     // and recortStart depending on whether the conditional execution of prior_breaks_block.
    146     iBuilder->SetInsertPoint(loop_final_block);
    147     PHINode * matchRecordNum_phi = iBuilder->CreatePHI(T, 2, "matchRecordNum");
    148     PHINode * matchRecordStart_phi = iBuilder->CreatePHI(T, 2, "matchRecordStart");
    149     matchRecordNum_phi->addIncoming(recordNum_phi, process_matches_loop_entry);
    150     matchRecordNum_phi->addIncoming(matchRecordNum, prior_breaks_block);
    151     matchRecordStart_phi->addIncoming(recordStart_phi, process_matches_loop_entry);
    152     matchRecordStart_phi->addIncoming(matchRecordStart, prior_breaks_block);   
    153     Value * matchRecordEnd = iBuilder->CreateAdd(scanwordPos, generateCountForwardZeroes(iBuilder, matches_phi));
    154 
    155     Value * fileBuf = iBuilder->CreateLoad(kBuilder->getInternalStateInternal(instance, "FileBuf"));
    156     if (isNameExpression) {
    157         iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf}));
    158     } else {
    159         Value * fileSize = iBuilder->CreateLoad(kBuilder->getInternalStateInternal(instance, "FileSize"));
    160         Value * fileIdx = iBuilder->CreateLoad(kBuilder->getInternalStateInternal(instance, "FileIdx"));
    161         iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf, fileSize, fileIdx}));
    162     }
    163 
    164     Value * remaining_matches = generateResetLowestBit(iBuilder, matches_phi);
    165     Value * remaining_breaks = iBuilder->CreateXor(record_breaks_phi, prior_breaks);
    166     matches_phi->addIncoming(remaining_matches, loop_final_block);
    167     record_breaks_phi->addIncoming(remaining_breaks, loop_final_block);
    168     recordNum_phi->addIncoming(matchRecordNum_phi, loop_final_block);
    169     recordStart_phi->addIncoming(matchRecordStart_phi, loop_final_block);
    170     iBuilder->CreateBr(matches_test_block);
    171 
    172 
    173     // LOOP EXIT/MATCHES_DONE
    174     iBuilder->SetInsertPoint(matches_done_block);
    175     // When the matches are done, there may be additional record breaks remaining
    176     Value * more_breaks_cond = iBuilder->CreateICmpNE(record_breaks_phi, ConstantInt::get(T, 0));
    177     iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
    178 
    179     // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
    180     iBuilder->SetInsertPoint(remaining_breaks_block);
    181     Value * break_count = generatePopcount(iBuilder, record_breaks_phi);
    182     Value * final_record_num = iBuilder->CreateAdd(recordNum_phi, break_count);
    183     Value * reverseZeroes = generateCountReverseZeroes(iBuilder, record_breaks_phi);
    184     Value * pendingLineStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseZeroes));
    185     iBuilder->CreateBr(return_block);
    186 
    187     // RETURN block
    188     iBuilder->SetInsertPoint(return_block);
    189     PHINode * finalRecordCount_phi = iBuilder->CreatePHI(T, 2, "finalRecordCount");
    190     PHINode * finalRecordStart_phi = iBuilder->CreatePHI(T, 2, "finalRecordStart");
    191     finalRecordCount_phi->addIncoming(recordNum_phi, matches_done_block);
    192     finalRecordCount_phi->addIncoming(final_record_num, remaining_breaks_block);
    193     finalRecordStart_phi->addIncoming(recordStart_phi, matches_done_block);
    194     finalRecordStart_phi->addIncoming(pendingLineStart, remaining_breaks_block);
    195     Value * retVal = UndefValue::get(returnType);
    196     retVal = iBuilder->CreateInsertValue(retVal, finalRecordStart_phi, 0);
    197     retVal = iBuilder->CreateInsertValue(retVal, finalRecordCount_phi, 1);
    198     iBuilder->CreateRet(retVal);
    199 
    200     return function;
    201 }
    202 
    203 
    204 void generateScanMatch(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanWordBitWidth, KernelBuilder * kBuilder, bool isNameExpression) {
    205    
    206     Type * T = iBuilder->getIntNTy(scanWordBitWidth);
    207     Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    208 
    209     const unsigned fieldCount = iBuilder->getBitBlockWidth() / scanWordBitWidth;
    210     Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    211 
    212     kBuilder->addInputStream(1, "matches");
    213     kBuilder->addInputStream(1, "breaks");
    214     //use index
    215     const unsigned lineStart = kBuilder->addInternalState(T, "LineStart");
    216     const unsigned lineNum = kBuilder->addInternalState(T, "LineNum");
    217     kBuilder->addInternalState(S, "FileBuf");
    218     kBuilder->addInternalState(T, "FileSize");
    219     kBuilder->addInternalState(T, "FileIdx");
    220     Function * function = kBuilder->prepareFunction();
    221 
    222 
    223     Function * scanWordFunction = generateScanWordRoutine(m, iBuilder, scanWordBitWidth, kBuilder, isNameExpression);
    224    
    225     iBuilder->SetInsertPoint(&function->getEntryBlock());
    226 
    227     Value * kernelStuctParam = kBuilder->getKernelState();
    228 
    229     Value * scanwordPos = iBuilder->CreateLoad(kBuilder->getInternalState("BlockNo"));
    230     scanwordPos = iBuilder->CreateMul(scanwordPos, ConstantInt::get(scanwordPos->getType(), iBuilder->getBitBlockWidth()));
    231    
    232     Value * recordStart = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(lineStart));
    233     Value * recordNum = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(lineNum));
    234     Value * matchWordVector = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(0)), scanwordVectorType);
    235     Value * breakWordVector = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(1)), scanwordVectorType);
    236     for(unsigned i = 0; i < fieldCount; ++i){
    237         Value * matchWord = iBuilder->CreateExtractElement(matchWordVector, ConstantInt::get(T, i));
    238         Value * recordBreaksWord = iBuilder->CreateExtractElement(breakWordVector, ConstantInt::get(T, i));
    239         Value * wordResult = iBuilder->CreateCall(scanWordFunction, {kernelStuctParam, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum});
    240         scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, scanWordBitWidth));
    241         recordStart = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({0}));
    242         recordNum = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({1}));
    243     }
    244     kBuilder->setInternalState(lineStart, recordStart);
    245     kBuilder->setInternalState(lineNum, recordNum);
    246     kBuilder->finalize();
    247 
    248 }
    24941       
    250 std::unique_ptr<llvm::Module> scanMatchKernel::createKernelModule() {
    251     std::unique_ptr<llvm::Module> theModule = KernelInterface::createKernelModule();
    252    
    253     Function * scanWordFunction = generateScanWordRoutine(theModule.get());
     42void scanMatchKernel::generateKernel() {
     43    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     44    if (mKernelStateType == nullptr) finalizeKernelStateType();
     45    KernelBuilder::generateKernel();
     46    Module * m = iBuilder->getModule();
     47   
     48    Function * scanWordFunction = generateScanWordRoutine(m);
    25449    const unsigned fieldCount = iBuilder->getBitBlockWidth() / mScanwordBitWidth;
    25550    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    25651    Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    25752
    258     Function * doBlockFunction = theModule.get()->getFunction(mKernelName + "_DoBlock");
     53    Function * doBlockFunction = m->getFunction(mKernelName + "_DoBlock");
    25954
    26055    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     
    28479   
    28580    // scanMatch FinalBlock function simply dispatches to the DoBlock function
    286     Function * finalBlockFunction = theModule.get()->getFunction(mKernelName + "_FinalBlock");
    287     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    288     Value * self = getParameter(finalBlockFunction, "self");
    289     Value * matchResults = getParameter(finalBlockFunction, "matchResults");
    290     iBuilder->CreateCall(doBlockFunction, {self, matchResults});
    291     iBuilder->CreateRetVoid();
    292     //
    293     return theModule;
     81    addTrivialFinalBlockMethod(m);
     82    iBuilder->restoreIP(savePoint);
    29483}
    29584
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5055 r5063  
    77
    88#include "streamset.h"
    9 #include "interface.h"
     9#include "kernel.h"
    1010
    1111namespace llvm { class Module; class Function;}
     
    1414
    1515namespace kernel {
    16 
    17 class KernelBuilder;
    18 
    19 void generateScanMatch(llvm::Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanWordBitWidth, KernelBuilder * kBuilder, bool isNameExpression);
    20 
    2116   
    22    
    23 class scanMatchKernel : public KernelInterface {
     17class scanMatchKernel : public KernelBuilder {
    2418public:
    2519    scanMatchKernel(IDISA::IDISA_Builder * iBuilder, unsigned scanwordBitWidth, bool isNameExpression) :
    26     KernelInterface(iBuilder, "scanMatch",
     20    KernelBuilder(iBuilder, "scanMatch",
    2721                    {StreamSetBinding{StreamSetType(2, 1), "matchResults"}},
    2822                    {},
     
    3428    mIsNameExpression(isNameExpression) {}
    3529       
    36     std::unique_ptr<llvm::Module> createKernelModule() override;
     30    void generateKernel() override;
    3731
    3832private:
Note: See TracChangeset for help on using the changeset viewer.