Changeset 5063 for icGREP/icgrep-devel


Ignore:
Timestamp:
Jun 19, 2016, 3:00:47 PM (3 years ago)
Author:
cameron
Message:

New kernel infrastructure

Location:
icGREP/icgrep-devel/icgrep
Files:
2 deleted
23 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5059 r5063  
    5959SET(PABLO_SRC pablo/pabloAST.cpp pablo/ps_if.cpp pablo/ps_while.cpp pablo/function.cpp pablo/codegenstate.cpp pablo/builder.cpp pablo/symbol_generator.cpp pablo/printer_pablos.cpp pablo/pablo_toolchain.cpp pablo/passes/flattenif.cpp)
    6060SET(PABLO_SRC ${PABLO_SRC} pablo/pablo_compiler.cpp pablo/carry_manager.cpp pablo/carry_data.cpp pablo/pablo_kernel.cpp)
    61 SET(PABLO_SRC ${PABLO_SRC} kernels/s2p_kernel.cpp kernels/kernel.cpp kernels/instance.cpp kernels/streamset.cpp kernels/interface.cpp)
     61SET(PABLO_SRC ${PABLO_SRC} kernels/s2p_kernel.cpp kernels/kernel.cpp kernels/streamset.cpp kernels/interface.cpp)
    6262SET(PABLO_SRC ${PABLO_SRC} pablo/analysis/pabloverifier.cpp)
    6363SET(PABLO_SRC ${PABLO_SRC} pablo/optimizers/pablo_simplifier.cpp pablo/optimizers/codemotionpass.cpp)
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5052 r5063  
    122122    pablo::PabloFunction * function = re::re2pablo_compiler(encoding, re_ast);
    123123   
    124 
    125     pipelineBuilder.CreateKernels(function, UTF_16, isNameExpression);
    126     #ifndef NDEBUG
    127     std::cerr << "CreateKernels complete\n";
    128     #endif
    129 
    130     llvm::Function * grepIR = pipelineBuilder.ExecuteKernels(CountOnly, UTF_16);
    131     #ifndef NDEBUG
    132     std::cerr << "ExecuteKernels complete\n";
    133     #endif
     124    llvm::Function * grepIR = pipelineBuilder.ExecuteKernels(function, isNameExpression, CountOnly, UTF_16);
    134125
    135126    mEngine = JIT_to_ExecutionEngine(M);
     
    137128    icgrep_Linking(M, mEngine);
    138129
    139     #ifndef NDEBUG
    140     std::cerr << "icgrep_Linking complete\n";
     130#ifndef NDEBUG
    141131    verifyModule(*M, &dbgs());
    142     #endif
     132#endif
    143133
    144134    mEngine->finalizeObject();
    145     #ifndef NDEBUG
    146     std::cerr << "finalizeObject complete\n";
    147     #endif
    148135    delete idb;
    149136
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5052 r5063  
    243243        }
    244244    }
     245   
    245246    PrintResult(CountOnly, total_CountOnly);
    246247   
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5060 r5063  
    1414
    1515void KernelInterface::addKernelDeclarations(Module * client) {
    16     errs() << "KernelInterface::addKernelDeclarations\n";
    1716    Module * saveModule = iBuilder->getModule();
    1817    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5051 r5063  
    55
    66#include "kernel.h"
    7 #include <pablo/function.h>
    8 #include <IDISA/idisa_builder.h>
    9 #include <kernels/instance.h>
    10 #include <tuple>
    11 #include <boost/functional/hash_fwd.hpp>
    12 #include <unordered_map>
     7#include <llvm/IR/Module.h>
     8#include <llvm/IR/Type.h>
     9#include <llvm/IR/Value.h>
     10#include <llvm/Support/raw_ostream.h>
    1311
    1412using namespace llvm;
    15 using namespace pablo;
     13using namespace kernel;
    1614
    17 namespace kernel {
    18 
    19 // sets name & sets internal state to the kernel superclass state
    20 KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder, std::string && name, const unsigned defaultBufferSize)
    21 : iBuilder(builder)
    22 , mKernelName(name)
    23 , mDefaultBufferSize(defaultBufferSize)
    24 , mBitBlockType(builder->getBitBlockType())
    25 , mBlockNoIndex(0)
    26 , mKernelStateType(nullptr) {
    27     assert (mDefaultBufferSize > 0);
    28 }
    29 
    30 /** ------------------------------------------------------------------------------------------------------------- *
    31  * @brief addInternalState
    32  ** ------------------------------------------------------------------------------------------------------------- */
    33 unsigned KernelBuilder::addInternalState(Type * const type) {
    34     assert (type);
    35     const unsigned index = mInternalState.size();
    36     mInternalState.push_back(type);
    37     return index;
    38 }
    39 
    40     unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string name) {
    41         if (LLVM_UNLIKELY(mInternalStateNameMap.count(name) != 0)) {
    42             throw std::runtime_error("Kernel already contains internal state '" + name + "'");
    43         }
    44         const unsigned index = addInternalState(type);
    45         mInternalStateNameMap.emplace(name, iBuilder->getInt32(index));
    46         return index;
     15KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
     16                                 std::string kernelName,
     17                                 std::vector<StreamSetBinding> stream_inputs,
     18                                 std::vector<StreamSetBinding> stream_outputs,
     19                                 std::vector<ScalarBinding> scalar_parameters,
     20                                 std::vector<ScalarBinding> scalar_outputs,
     21                                 std::vector<ScalarBinding> internal_scalars) :
     22    KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {
     23   
     24    for (auto binding : scalar_parameters) {
     25        addScalar(binding.scalarType, binding.scalarName);
    4726    }
    48    
    49 /** ------------------------------------------------------------------------------------------------------------- *
    50  * @brief getInternalState
    51  ** ------------------------------------------------------------------------------------------------------------- */
    52 Value * KernelBuilder::getInternalStateInternal(Value * const kernelState, const std::string & name) {
    53     const auto f = mInternalStateNameMap.find(name);
    54     if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
    55         throw std::runtime_error("Kernel does not contain internal state " + name);
     27    for (auto binding : scalar_outputs) {
     28        addScalar(binding.scalarType, binding.scalarName);
    5629    }
    57     return getInternalStateInternal(kernelState, f->second);
    58 }
    59 
    60 Value * KernelBuilder::getInternalStateInternal(Value * const kernelState, disable_implicit_conversion<Value *> index) {
    61     assert (index->getType()->isIntegerTy());
    62     assert (kernelState->getType()->getPointerElementType() == mKernelStateType);
    63     return iBuilder->CreateGEP(kernelState, {iBuilder->getInt32(0), index});
    64 }
    65 
    66 /** ------------------------------------------------------------------------------------------------------------- *
    67  * @brief setInternalState
    68  ** ------------------------------------------------------------------------------------------------------------- */
    69 void KernelBuilder::setInternalStateInternal(Value * const kernelState, const std::string & name, Value * const value) {
    70     Value * ptr = getInternalStateInternal(kernelState, name);
    71     assert (ptr->getType()->getPointerElementType() == value->getType());
    72     if (value->getType() == iBuilder->getBitBlockType()) {
    73         iBuilder->CreateBlockAlignedStore(value, ptr);
    74     } else {
    75         iBuilder->CreateStore(value, ptr);
     30    for (auto binding : internal_scalars) {
     31        addScalar(binding.scalarType, binding.scalarName);
    7632    }
    7733}
    7834
    79 void KernelBuilder::setInternalStateInternal(Value * const kernelState, disable_implicit_conversion<Value *> index, Value * const value) {
    80     Value * ptr = getInternalStateInternal(kernelState, index);
    81     assert (ptr->getType()->getPointerElementType() == value->getType());
    82     if (value->getType() == iBuilder->getBitBlockType()) {
    83         iBuilder->CreateBlockAlignedStore(value, ptr);
    84     } else {
    85         iBuilder->CreateStore(value, ptr);
     35void KernelBuilder::addScalar(Type * t, std::string scalarName) {
     36    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
     37        throw std::runtime_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
    8638    }
     39    unsigned index = mKernelFields.size();
     40    mKernelFields.push_back(t);
     41    mInternalStateNameMap.emplace(scalarName, iBuilder->getInt32(index));
    8742}
    8843
    89 /** ------------------------------------------------------------------------------------------------------------- *
    90  * @brief addInputStream
    91  ** ------------------------------------------------------------------------------------------------------------- */
    92 void KernelBuilder::addInputStream(const unsigned fields, std::string && name) {
    93     assert (fields > 0 && !name.empty());
    94     mInputStreamName.push_back(name);
    95     if (fields == 1) {
    96         mInputStream.push_back(mBitBlockType);
    97     } else {
    98         mInputStream.push_back(ArrayType::get(mBitBlockType, fields));
    99     }
     44void KernelBuilder::finalizeKernelStateType() {
     45    mKernelStateType = StructType::create(getGlobalContext(), mKernelFields, mKernelName);
    10046}
    10147
    102 void KernelBuilder::addInputStream(const unsigned fields) {
    103     addInputStream(fields, mKernelName + "_InputStream_" + std::to_string(mInputStream.size()));
     48std::unique_ptr<Module> KernelBuilder::createKernelModule() {
     49    Module * saveModule = iBuilder->getModule();
     50    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     51    if (mKernelStateType == nullptr) finalizeKernelStateType();
     52    std::unique_ptr<Module> theModule = make_unique<Module>(mKernelName, getGlobalContext());
     53    Module * m = theModule.get();
     54    iBuilder->setModule(m);
     55    generateKernel();
     56    iBuilder->setModule(saveModule);
     57    iBuilder->restoreIP(savePoint);
     58    return theModule;
    10459}
    10560
    106 /** ------------------------------------------------------------------------------------------------------------- *
    107  * @brief getInputStream
    108  ** ------------------------------------------------------------------------------------------------------------- */
    109 Value * KernelBuilder::getInputStreamInternal(Value * const inputStreamSet, disable_implicit_conversion<Value *> index) {
    110     assert ("Parameters cannot be null!" && (inputStreamSet != nullptr && index != nullptr));
    111     assert ("Stream index must be an integer!" && index->getType()->isIntegerTy());
    112     assert ("Illegal input stream set provided!" && inputStreamSet->getType()->getPointerElementType() == mInputStreamType);
    113     if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || getInputStreamType()->isArrayTy())) {
    114         return iBuilder->CreateGEP(inputStreamSet, { iBuilder->getInt32(0), index });
     61void KernelBuilder::generateKernel() {
     62    Module * m = iBuilder->getModule();
     63    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     64    addKernelDeclarations(m);
     65    // Implement the accumulator get functions
     66    for (auto binding : mScalarOutputs) {
     67        auto fnName = mKernelName + accumulator_infix + binding.scalarName;
     68        Function * accumFn = m->getFunction(fnName);
     69        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.scalarName, accumFn, 0));
     70        Value * self = &*(accumFn->arg_begin());
     71        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
     72        Value * retVal = iBuilder->CreateLoad(ptr);
     73        iBuilder->CreateRet(retVal);
    11574    }
    116     throw std::runtime_error("Cannot access the input stream with a non-constant value unless all input stream types are identical!");
     75    // Implement the initializer function
     76    Function * initFunction = m->getFunction(mKernelName + init_suffix);
     77    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
     78   
     79    Function::arg_iterator args = initFunction->arg_begin();
     80    Value * self = &*(args++);
     81    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
     82    for (auto binding : mScalarInputs) {
     83        Value * parm = &*(args++);
     84        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
     85        iBuilder->CreateStore(parm, ptr);
     86    }
     87    iBuilder->CreateRetVoid();
     88    iBuilder->restoreIP(savePoint);
     89}
     90
     91void KernelBuilder::addTrivialFinalBlockMethod(Module * m) {
     92    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     93    Module * saveModule = iBuilder->getModule();
     94    iBuilder->setModule(m);
     95    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     96    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     97    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
     98    // Final Block arguments: self, remaining, then the standard DoBlock args.
     99    Function::arg_iterator args = finalBlockFunction->arg_begin();
     100    Value * self = &*(args++);
     101    /* Skip "remaining" arg */ args++;
     102    std::vector<Value *> doBlockArgs = {self};
     103    while (args != finalBlockFunction->arg_end()){
     104        doBlockArgs.push_back(&*args++);
     105    }
     106    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
     107    iBuilder->CreateRetVoid();
     108    iBuilder->setModule(saveModule);
     109    iBuilder->restoreIP(savePoint);
     110}
     111
     112Value * KernelBuilder::getScalarIndex(std::string fieldName) {
     113    const auto f = mInternalStateNameMap.find(fieldName);
     114    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
     115        throw std::runtime_error("Kernel does not contain internal state: " + fieldName);
     116    }
     117    return f->second;
     118}
     119
     120Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
     121    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
     122    return iBuilder->CreateLoad(ptr);
     123}
     124
     125void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
     126    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
     127    iBuilder->CreateStore(newFieldVal, ptr);
    117128}
    118129
    119130
    120 /** ------------------------------------------------------------------------------------------------------------- *
    121  * @brief addOutputStream
    122  ** ------------------------------------------------------------------------------------------------------------- */
    123 unsigned KernelBuilder::addOutputStream(const unsigned fields) {
    124     assert (fields > 0);
    125     const unsigned index = mOutputStream.size();
    126     mOutputStream.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
    127     return index;
    128 }
    129 
    130 /** ------------------------------------------------------------------------------------------------------------- *
    131  * @brief getOutputStream
    132  ** ------------------------------------------------------------------------------------------------------------- */
    133 Value * KernelBuilder::getOutputStreamInternal(Value * const outputStreamSet, disable_implicit_conversion<Value *> index) {
    134     assert ("Parameters cannot be null!" && (outputStreamSet != nullptr && index != nullptr));
    135     assert ("Stream index must be an integer!" && index->getType()->isIntegerTy());
    136     assert ("Illegal output stream set provided!" && outputStreamSet->getType()->getPointerElementType() == getOutputStreamType());
    137     if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || getOutputStreamType()->isArrayTy())) {
    138         return iBuilder->CreateGEP(outputStreamSet, { iBuilder->getInt32(0), index });
     131Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
     132    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
     133        Value * arg = &*argIter;
     134        if (arg->getName() == paramName) return arg;
    139135    }
    140     throw std::runtime_error("Cannot access the output stream with a non-constant value unless all output stream types are identical!");
    141 }
    142 
    143 /** ------------------------------------------------------------------------------------------------------------- *
    144  * @brief packDataTypes
    145  ** ------------------------------------------------------------------------------------------------------------- */
    146 Type * KernelBuilder::packDataTypes(const std::vector<llvm::Type *> & types) {
    147     if (types.empty()) {
    148         return nullptr;
    149     }
    150     for (Type * type : types) {
    151         if (type != types.front()) { // use canLosslesslyBitcastInto ?
    152             return StructType::get(iBuilder->getContext(), types);
    153         }
    154     }
    155     return ArrayType::get(types.front(), types.size());
    156 }
    157 
    158 /** ------------------------------------------------------------------------------------------------------------- *
    159  * @brief prepareFunction
    160  ** ------------------------------------------------------------------------------------------------------------- */
    161 Function * KernelBuilder::prepareFunction(std::vector<unsigned> && inputStreamOffsets) {
    162 
    163     mBlockNoIndex = iBuilder->getInt32(addInternalState(iBuilder->getInt64Ty(), "BlockNo"));
    164 
    165     if (!mKernelStateType) {
    166         mKernelStateType = StructType::create(iBuilder->getContext(), mInternalState, mKernelName);
    167     }
    168     mInputStreamType = packDataTypes(mInputStream);
    169     mOutputStreamType = packDataTypes(mOutputStream);
    170     mInputStreamOffsets = inputStreamOffsets;
    171 
    172     std::vector<Type *> params;
    173     params.push_back(mKernelStateType->getPointerTo());
    174     if (mInputStreamType) {
    175         for (unsigned i = 0; i < mInputStreamOffsets.size(); ++i) {
    176             params.push_back(mInputStreamType->getPointerTo());
    177         }
    178     }
    179     if (mOutputStreamType) {
    180         params.push_back(mOutputStreamType->getPointerTo());
    181     }
    182 
    183     // A pointer value is captured if the function makes a copy of any part of the pointer that outlives
    184     // the call (e.g., stored in a global or, depending on the context, when returned by the function.)
    185     // Since this does not occur in either our DoBlock or Constructor, all parameters are marked nocapture.
    186 
    187     FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), params, false);
    188     mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", iBuilder->getModule());
    189     mDoBlock->setCallingConv(CallingConv::C);
    190     for (unsigned i = 1; i <= params.size(); ++i) {
    191         mDoBlock->setDoesNotCapture(i);
    192     }
    193     mDoBlock->setDoesNotThrow();
    194     Function::arg_iterator args = mDoBlock->arg_begin();
    195     mKernelStateParam = &*(args++);
    196     mKernelStateParam->setName("this");
    197     if (mInputStreamType) {
    198         for (const unsigned offset : mInputStreamOffsets) {
    199             Value * const inputStreamSet = &*(args++);
    200             inputStreamSet->setName("inputStreamSet" + std::to_string(offset));
    201             mInputStreamParam.emplace(offset, inputStreamSet);
    202         }
    203     }
    204     if (mOutputStreamType) {
    205         mOutputStreamParam = &*args;
    206         mOutputStreamParam->setName("outputStreamSet");
    207     }
    208     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", mDoBlock, 0));
    209     return mDoBlock;
    210 }
    211 
    212 void KernelBuilder::setInstanceParameters(std::vector<ParameterBinding> parms) {
    213     mInstanceParameters = parms;
    214     mInstanceParametersOffset = mInternalState.size();
    215     for (auto binding : mInstanceParameters) {
    216         addInternalState(binding.parameterType, binding.parameterName);
    217     }
    218 }
    219 
    220 
    221 Function *  KernelBuilder::createInitMethod() {
    222     if (!mKernelStateType) {
    223         mKernelStateType = StructType::create(iBuilder->getContext(), mInternalState, mKernelName);
    224     }
    225     std::vector<Type *> initParameters = {PointerType::getUnqual(mKernelStateType)};
    226     for (auto binding : mInstanceParameters) {
    227         initParameters.push_back(binding.parameterType);
    228     }
    229     FunctionType * mInitFunctionType = FunctionType::get(iBuilder->getVoidTy(), initParameters, false);
    230     Function * mInitFunction = Function::Create(mInitFunctionType, GlobalValue::ExternalLinkage, mKernelName + "_Init", iBuilder->getModule());
    231     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", mInitFunction, 0));
    232 
    233     Function::arg_iterator args = mInitFunction->arg_begin();
    234     Value * self = &*(args++);
    235     self->setName("self");
    236     for (auto binding : mInstanceParameters) {
    237         Value * parm = &*(args++);
    238         parm->setName(binding.parameterName);
    239     }
    240 
    241     iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
    242     args = mInitFunction->arg_begin();
    243     args++;   // skip self argument.
    244     for (auto binding : mInstanceParameters) {
    245         Value * parm = &*(args++);
    246         setInternalStateInternal(self, binding.parameterName, parm);
    247     }
    248     iBuilder->CreateRetVoid();
    249     return mInitFunction;
     136    throw std::runtime_error("Method does not have parameter: " + paramName);
    250137}
    251138
    252139
    253140
    254 
    255 
    256 
    257 /** ------------------------------------------------------------------------------------------------------------- *
    258  * @brief finalize
    259  ** ------------------------------------------------------------------------------------------------------------- */
    260 void KernelBuilder::finalize() {
    261     // Finish the actual function
    262     Value * blockNo = getBlockNo();
    263     Value * value = iBuilder->CreateLoad(blockNo);
    264     value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
    265     iBuilder->CreateStore(value, blockNo);
    266     iBuilder->CreateRetVoid();
    267 
    268     mKernelStateParam = nullptr;
    269     mInputStreamParam.clear();
    270     mOutputStreamParam = nullptr;
    271     iBuilder->ClearInsertionPoint();
    272 }
    273 
    274 /** ------------------------------------------------------------------------------------------------------------- *
    275  * @brief instantiate
    276  *
    277  * Allocate and zero initialize the memory for this kernel and its output scalars and streams
    278  ** ------------------------------------------------------------------------------------------------------------- */
    279 Instance * KernelBuilder::instantiate(std::pair<Value *, unsigned> && inputStreamSet, const unsigned outputBufferSize) {
    280     AllocaInst * const kernelState = iBuilder->CreateAlloca(mKernelStateType);
    281     iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), kernelState);
    282     AllocaInst * outputStreamSets = nullptr;
    283     if (mOutputStreamType) {
    284         outputStreamSets = iBuilder->CreateAlloca(mOutputStreamType, iBuilder->getInt32(outputBufferSize));
    285     }
    286     return new Instance(this, kernelState, std::get<0>(inputStreamSet), std::get<1>(inputStreamSet), outputStreamSets, outputBufferSize);
    287 }
    288 
    289 /** ------------------------------------------------------------------------------------------------------------- *
    290  * @brief instantiate
    291  *
    292  * Generate a new instance of this kernel and call the default constructor to initialize it
    293  ** ------------------------------------------------------------------------------------------------------------- */
    294 Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {   
    295     AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType);
    296     unsigned i = 0;
    297     for (Value * inputStream : inputStreams) {
    298         Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
    299         iBuilder->CreateStore(inputStream, ptr);
    300     }
    301     return instantiate(std::make_pair(inputStruct, 0));
    302 }
    303 
    304 Value * KernelBuilder::getInputStreamParam(const unsigned streamOffset) const {
    305     const auto f = mInputStreamParam.find(streamOffset);
    306     if (LLVM_UNLIKELY(f == mInputStreamParam.end())) {
    307         throw std::runtime_error("Kernel compilation error: No input stream parameter for stream offset " + std::to_string(streamOffset));
    308     }
    309     return f->second;
    310 }
    311    
    312 llvm::Value * make_New(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args) {
    313     Module * m = iBuilder->getModule();
    314     Type * kernelType = m->getTypeByName(kernel_name);
    315     if (!kernelType) {
    316         throw std::runtime_error("Cannot find kernel type " + kernel_name);
    317     }
    318     Value * kernelInstance = iBuilder->CreateAlloca(kernelType);
    319     std::vector<Value *> init_args = {kernelInstance};
    320     for (auto a : args) {
    321         init_args.push_back(a);
    322     }
    323     //iBuilder->CreateStore(Constant::getNullValue(kernelType), kernelInstance);
    324     Function * initMethod = m->getFunction(kernel_name + "_Init");
    325     if (!initMethod) {
    326         //throw std::runtime_error("Cannot find " + kernel_name + "_Init");
    327         iBuilder->CreateStore(Constant::getNullValue(kernelType), kernelInstance);
    328         return kernelInstance;
    329     }
    330     iBuilder->CreateCall(initMethod, init_args);
    331     return kernelInstance;
    332 }
    333     llvm::Value * make_DoBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args) {
    334         Module * m = iBuilder->getModule();
    335         Function * doBlockMethod = m->getFunction(kernel_name + "_DoBlock");
    336         if (!doBlockMethod) {
    337             throw std::runtime_error("Cannot find " + kernel_name + "_DoBlock");
    338         }
    339         return iBuilder->CreateCall(doBlockMethod, args);
    340     }
    341    
    342     llvm::Value * make_FinalBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args) {
    343         Module * m = iBuilder->getModule();
    344         Function * finalBlockMethod = m->getFunction(kernel_name + "_FinalBlock");
    345         if (!finalBlockMethod) {
    346             throw std::runtime_error("Cannot find " + kernel_name + "_FinalBlock");
    347         }
    348         return iBuilder->CreateCall(finalBlockMethod, args);
    349     }
    350    
    351    
    352 
    353 } // end of namespace kernel
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5051 r5063  
    1 #ifndef KERNEL_H
    2 #define KERNEL_H
    31/*
    42 *  Copyright (c) 2016 International Characters.
     
    64 */
    75
    8 #include <string>
     6#ifndef KERNEL_BUILDER_H
     7#define KERNEL_BUILDER_H
     8
     9
     10#include "interface.h"
    911#include <vector>
     12#include <llvm/IR/Type.h>
     13#include <IDISA/idisa_builder.h>
    1014#include <boost/container/flat_map.hpp>
    11 #include <IDISA/idisa_builder.h>
    1215
    13 namespace llvm {
    14     class Value;
    15     class Module;
    16     class ExecutionEngine;
    17     class VectorType;
    18     class PointerType;
    19     class Constant;
    20     class FunctionType;
    21     class Function;
    22     class BasicBlock;
    23     class Type;
    24 }
    25 
    26 namespace pablo {
    27     class PabloAST;
    28     class PabloFunction;
    29 }
    30 
    31 template<typename T>
    32 struct disable_implicit_conversion {
    33     inline disable_implicit_conversion(T const value) : _value(value) { assert(_value); }
    34     inline disable_implicit_conversion(std::nullptr_t) = delete;
    35     inline disable_implicit_conversion(unsigned) = delete;
    36     operator T() const { return _value; }
    37     T operator-> () const { return _value; }
    38     T get() const { return _value; }
    39 private:
    40     T const  _value;
    41 };
    4216
    4317namespace kernel {
    44     struct ParameterBinding {
    45         llvm::Type * parameterType;
    46         std::string parameterName;
    47     };
    4818   
     19class KernelBuilder : public KernelInterface {
     20    using NameMap = boost::container::flat_map<std::string, llvm::ConstantInt *>;
     21
     22public:
     23    KernelBuilder(IDISA::IDISA_Builder * builder,
     24                    std::string kernelName,
     25                    std::vector<StreamSetBinding> stream_inputs,
     26                    std::vector<StreamSetBinding> stream_outputs,
     27                    std::vector<ScalarBinding> scalar_parameters,
     28                    std::vector<ScalarBinding> scalar_outputs,
     29                    std::vector<ScalarBinding> internal_scalars);
    4930   
     31    // Add an additional scalar field to the KernelState struct.
     32    // Must occur before any call to addKernelDeclarations or createKernelModule.
     33    void addScalar(llvm::Type * t, std::string scalarName);
    5034   
    51 class Instance;
    52 
    53 class KernelBuilder {
    54     friend class Instance;
    55     friend llvm::Function * generateScanWordRoutine(llvm::Module *, IDISA::IDISA_Builder *, unsigned, KernelBuilder *, bool);
    56     using InputStreamMap = boost::container::flat_map<unsigned, llvm::Value *>;
    57     using NameMap = boost::container::flat_map<std::string, llvm::ConstantInt *>;
    58 public:
    59 
    60     KernelBuilder(IDISA::IDISA_Builder * builder, std::string && name, const unsigned defaultBufferSize);
    61 
     35    void finalizeKernelStateType();
    6236   
    63     void setInstanceParameters(std::vector<ParameterBinding> binding);
    64 
    65     unsigned addInternalState(llvm::Type * const type);
    66     unsigned addInternalState(llvm::Type * const type, std::string name);
    67 
    68     void addInputStream(const unsigned fields);
    69     void addInputStream(const unsigned fields, std::string && name);
    70 
    71     unsigned addOutputStream(const unsigned fields);
    72 
     37    // Create a module for the kernel, including the kernel state type and
     38    // all required methods.  The init and accumulator output methods will be
     39    // defined, while the doBlock and finalBlock methods will initially be empty.
     40    //
     41    virtual std::unique_ptr<llvm::Module> createKernelModule();
    7342   
     43    // Generate Kernel to the current module.
     44    virtual void generateKernel();
    7445   
    75     llvm::Function * createInitMethod();
    76 
     46    // Add a FinalBlock method that simply calls DoBlock without additional
     47    // preparation.
     48    void addTrivialFinalBlockMethod(Module * m);
    7749   
    78     inline llvm::Function * prepareFunction() {
    79         return prepareFunction({0});
    80     }
    81 
    82     llvm::Function * prepareFunction(std::vector<unsigned> && inputStreamOffsets);
    83 
    84     inline llvm::Value * getInternalState(const std::string & name) {
    85         return getInternalStateInternal(mKernelStateParam, name);
    86     }
    87 
    88     inline void setInternalState(const std::string & name, llvm::Value * value) {
    89         setInternalStateInternal(mKernelStateParam, name, value);
    90     }
    91 
    92     inline llvm::Value * getInternalState(const unsigned index) {
    93         assert (index < mInternalState.size());
    94         return getInternalStateInternal(mKernelStateParam, iBuilder->getInt32(index));
    95     }
    96 
    97     inline llvm::Value * getInternalState(disable_implicit_conversion<llvm::Value *> const index) {
    98         return getInternalStateInternal(mKernelStateParam, index);
    99     }
    100 
    101     void setInternalState(const unsigned index, llvm::Value * value) {
    102         assert (index < mInternalState.size());
    103         setInternalStateInternal(mKernelStateParam, iBuilder->getInt32(index), value);
    104     }
    105 
    106     void setInternalState(disable_implicit_conversion<llvm::Value *> const index, llvm::Value * value) {
    107         setInternalStateInternal(mKernelStateParam, index, value);
    108     }
    109 
    110     inline llvm::Type * getKernelStateType() const{
    111         return mKernelStateType;
    112     }
    113 
    114     inline llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0) {
    115         return getInputStreamInternal(getInputStreamParam(streamOffset), iBuilder->getInt32(index));
    116     }
    117 
    118     inline llvm::Value * getInputStream(disable_implicit_conversion<llvm::Value *> index, const unsigned streamOffset = 0) {
    119         return getInputStreamInternal(getInputStreamParam(streamOffset), index);
    120     }
    121 
    122     inline unsigned getNumOfInputStreams() const {
    123         return mInputStream.size();
    124     }
    125 
    126     inline llvm::Type * getInputStreamType() const {
    127         return mInputStreamType;
    128     }
    129 
    130     inline llvm::Value * getOutputStream(const unsigned index) {
    131         assert (index < getNumOfOutputStreams());
    132         return getOutputStreamInternal(mOutputStreamParam, iBuilder->getInt32(index));
    133     }
    134 
    135     inline llvm::Value * getOutputStream(disable_implicit_conversion<llvm::Value *> const index) {
    136         return getOutputStreamInternal(mOutputStreamParam, index);
    137     }
    138 
    139     inline unsigned getNumOfOutputStreams() const {
    140         return mOutputStream.size();
    141     }
    142 
    143     inline llvm::Type * getOutputStreamType() const {
    144         return mOutputStreamType;
    145     }
    146 
    147     inline llvm::Value * getBlockNo() {
    148         return getBlockNoInternal(mKernelStateParam);
    149     }
    150 
    151     unsigned getDefaultBufferSize() const;
    152 
    153     void finalize();
    154 
    155     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStreamSet) {
    156         return instantiate(std::move(inputStreamSet), getDefaultBufferSize());
    157     }
    158 
    159     kernel::Instance * instantiate(std::pair<llvm::Value *, unsigned> && inputStreamSet, const unsigned outputBufferSize);
    160 
    161     kernel::Instance * instantiate(llvm::Value * const inputStream) {
    162         return instantiate(std::make_pair(inputStream, 0));
    163     }
    164 
    165     kernel::Instance * instantiate(std::initializer_list<llvm::Value *> inputStreams);
    166 
    167     llvm::Value * getKernelState() const;
    168 
    169     llvm::Function * getDoBlockFunction() const;
     50    // Run-time access of Kernel State and parameters of methods for
     51    // use in implementing kernels.
     52   
     53    // Get the index of a named scalar field within the kernel state struct.
     54    llvm::Value * getScalarIndex(std::string);
     55   
     56    // Get the value of a scalar field for a given instance.
     57    llvm::Value * getScalarField(llvm::Value * self, std::string fieldName);
     58   
     59    // Set the value of a scalar field for a given instance.
     60    void setScalarField(llvm::Value * self, std::string fieldName, llvm::Value * newFieldVal);
     61   
     62    // Get a parameter by name.
     63    llvm::Value * getParameter(llvm::Function * f, std::string paramName);
    17064
    17165protected:
    17266
    173     Type * packDataTypes(const std::vector<llvm::Type *> & types);
    174 
    175     llvm::Value * getInputStreamInternal(llvm::Value * const inputStreamSet, disable_implicit_conversion<llvm::Value *> index);
    176 
    177     llvm::Value * getInternalStateInternal(llvm::Value * const kernelState, const std::string & name);
    178 
    179     void setInternalStateInternal(llvm::Value * const kernelState, const std::string & name, llvm::Value * const value);
    180 
    181     llvm::Value * getInternalStateInternal(llvm::Value * const kernelState, disable_implicit_conversion<llvm::Value *> index);
    182 
    183     void setInternalStateInternal(llvm::Value * const kernelState, const unsigned index, llvm::Value * const value);
    184 
    185     void setInternalStateInternal(llvm::Value * const kernelState, disable_implicit_conversion<llvm::Value *> index, llvm::Value * const value);
    186 
    187     llvm::Value * getOutputStreamInternal(llvm::Value * const outputStreamSet, disable_implicit_conversion<llvm::Value *> index);
    188 
    189     llvm::Value * getBlockNoInternal(llvm::Value * const instance) {
    190         return getInternalStateInternal(instance, mBlockNoIndex);
    191     }
    192 
    193     llvm::Function * getOutputStreamSetFunction() const;
    194 
    195     llvm::Value * getInputStreamParam(const unsigned streamOffset) const;
    196 
    197     const std::vector<unsigned> & getInputStreamOffsets() const {
    198         return mInputStreamOffsets;
    199     }
    200 
    201 private:
    202 
    203     IDISA::IDISA_Builder * const        iBuilder;
    204     const std::string                   mKernelName;
    205     unsigned                            mDefaultBufferSize;
    206 
    207     llvm::Type *                        mBitBlockType;
    208     llvm::ConstantInt *                 mBlockNoIndex;
    209     llvm::Function *                                    mConstructor;
    210     llvm::Function *                                    mDoBlock;
    211 
    212     std::vector<ParameterBinding>           mInstanceParameters;
    213     unsigned                            mInstanceParametersOffset;
    214    
    215     llvm::Type *                        mKernelStateType;
    216     llvm::Type *                        mInputStreamType;
    217     llvm::Type *                        mOutputStreamType;
    218 
    219     llvm::Value *                       mKernelStateParam;
    220     InputStreamMap                      mInputStreamParam;
    221     llvm::Value *                       mOutputStreamParam;
    222 
    223     std::vector<std::string>            mInputScalarName;   
    224     std::vector<llvm::Type *>           mInputStream;
    225     std::vector<std::string>            mInputStreamName;
    226     std::vector<unsigned>               mInputStreamOffsets;
    227     std::vector<llvm::Type *>           mOutputStream;
    228     std::vector<llvm::Type *>                   mInternalState;
    229     NameMap                             mInternalStateNameMap;
     67    std::vector<llvm::Type *>  mKernelFields;
     68    NameMap                    mInternalStateNameMap;
    23069};
    231 
    232 inline llvm::Function * KernelBuilder::getDoBlockFunction() const {
    233     return mDoBlock;
    23470}
    235 
    236 inline llvm::Value * KernelBuilder::getKernelState() const {
    237     return mKernelStateParam;
    238 }
    239 
    240 inline unsigned KernelBuilder::getDefaultBufferSize() const {
    241     return mDefaultBufferSize;
    242 }
    243    
    244 llvm::Value * make_New(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args);
    245 
    246     llvm::Value * make_DoBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args);
    247     llvm::Value * make_FinalBlock_Call(IDISA::IDISA_Builder * iBuilder, std::string kernel_name, std::vector<Value *> args);
    248    
    249 } // end of namespace kernel
    250 
    251 #endif // KERNEL_H
     71#endif
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5052 r5063  
    1111#include <kernels/scanmatchgen.h>
    1212#include <kernels/s2p_kernel.h>
    13 #include <kernels/instance.h>
    1413
    1514#include <pablo/function.h>
    16 #include <pablo/pablo_compiler.h>
     15#include <pablo/pablo_kernel.h>
    1716#include <pablo/pablo_toolchain.h>
    1817
    1918#include <llvm/IR/Intrinsics.h>
     19#include "llvm/Support/SourceMgr.h"
     20#include "llvm/IRReader/IRReader.h"
     21#include "llvm/Linker/Linker.h"
     22
     23
    2024
    2125using namespace pablo;
     
    3135
    3236PipelineBuilder::~PipelineBuilder() {
    33     delete mS2PKernel;
    34     delete mICgrepKernel;
    35     delete mScanMatchKernel;
    36 }
    37 
    38 void PipelineBuilder::CreateKernels(PabloFunction * function, bool UTF_16, bool isNameExpression){
    39     mS2PKernel = new KernelBuilder(iBuilder, "s2p", codegen::SegmentSize);
    40     mICgrepKernel = new KernelBuilder(iBuilder, "icgrep", codegen::SegmentSize);
    41     mScanMatchKernel = new KernelBuilder(iBuilder, "scanMatch", codegen::SegmentSize);
    42     if (UTF_16) {
    43         generateS2P_16Kernel(mMod, iBuilder, mS2PKernel);
    44     }
    45     else {
    46         generateS2PKernel(mMod, iBuilder, mS2PKernel);
    47     }
    48     generateScanMatch(mMod, iBuilder, 64, mScanMatchKernel, isNameExpression);
    49     pablo_function_passes(function);
    50     PabloCompiler pablo_compiler(mMod, iBuilder);
    51     try {
    52         pablo_compiler.setKernel(mICgrepKernel);
    53         pablo_compiler.compile(function);
    54         delete function;
    55         releaseSlabAllocatorMemory();
    56     } catch (std::runtime_error e) {
    57         delete function;
    58         releaseSlabAllocatorMemory();
    59         std::cerr << "Runtime error: " << e.what() << std::endl;
    60         exit(1);
    61     }
    6237}
    6338
     
    6742}
    6843
    69 inline Value * Cal_Count(Instance * icGrepInstance, IDISA::IDISA_Builder * iBuilder) {
    70     Value * match = icGrepInstance->getOutputStream(0, 0);
    71     Value * matches = iBuilder->CreateLoad(match, false, "match");
     44inline Value * Cal_Count(Value * match_ptr, IDISA::IDISA_Builder * iBuilder) {
     45    Value * matches = iBuilder->CreateLoad(match_ptr, false, "match");
    7246    return generatePopcount(iBuilder, matches);
    7347}
    7448
    75 Function * PipelineBuilder::ExecuteKernels(bool CountOnly, bool UTF_16) {
     49Function * PipelineBuilder::ExecuteKernels(PabloFunction * function, bool isNameExpression, bool CountOnly, bool UTF_16) {
     50   
     51    s2pKernel  s2pk(iBuilder);
     52    scanMatchKernel scanMatchK(iBuilder, 64, false);
     53
     54    s2pk.generateKernel();
     55    scanMatchK.generateKernel();
     56   
     57    //std::unique_ptr<Module> s2pM = s2pk.createKernelModule();
     58    //std::unique_ptr<Module> scanMatchM = scanMatchK.createKernelModule();
     59   
     60    //s2pk.addKernelDeclarations(mMod);
     61    //scanMatchK.addKernelDeclarations(mMod);
     62
     63    pablo_function_passes(function);
     64    PabloKernel  icgrepK(iBuilder, "icgrep", function, {"matchedLineCount"});
     65    icgrepK.prepareKernel();
     66    icgrepK.generateKernel();
     67
     68    //std::unique_ptr<Module> icgrepM = icgrepK.createKernelModule();
     69    //icgrepK.addKernelDeclarations(mMod);
     70   
    7671    Type * const int64ty = iBuilder->getInt64Ty();
    7772    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    78     Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, (UTF_16 ? 16 : 8))})), 1), 0);
     73    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, (UTF_16 ? 16 : 8)), 1), 0);
    7974    Type * const resultTy = CountOnly ? int64ty : iBuilder->getVoidTy();
    8075    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", resultTy, inputType, int64ty, int64ty, nullptr));
     
    9085
    9186    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
    92 
    93 
    9487    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    95     BasicBlock * segmentCondBlock = nullptr;
    96     BasicBlock * segmentBodyBlock = nullptr;
    97     const unsigned segmentSize = codegen::SegmentSize;
    98     if (segmentSize > 1) {
    99         segmentCondBlock = BasicBlock::Create(mMod->getContext(), "segmentCond", main, 0);
    100         segmentBodyBlock = BasicBlock::Create(mMod->getContext(), "segmentBody", main, 0);
    101     }
    10288    BasicBlock * fullCondBlock = BasicBlock::Create(mMod->getContext(), "fullCond", main, 0);
    10389    BasicBlock * fullBodyBlock = BasicBlock::Create(mMod->getContext(), "fullBody", main, 0);
    10490    BasicBlock * finalBlock = BasicBlock::Create(mMod->getContext(), "final", main, 0);
    105     BasicBlock * finalPartialBlock = BasicBlock::Create(mMod->getContext(), "partial", main, 0);
    106     BasicBlock * finalEmptyBlock = BasicBlock::Create(mMod->getContext(), "empty", main, 0);
    107     BasicBlock * exitBlock = BasicBlock::Create(mMod->getContext(), "exit", main, 0);
    10891
    109     Value * count = nullptr;
    110     if (CountOnly) {
    111         count = iBuilder->CreateAlloca(mBitBlockType, nullptr, "count");
    112         iBuilder->CreateStore(ConstantInt::getNullValue(mBitBlockType), count);
     92   
     93    const unsigned segmentSize = 1;// or codegen::SegmentSize
     94   
     95    StreamSetBuffer ByteStream(iBuilder, StreamSetType(1, (UTF_16 ? 16 : 8)), 0);
     96    StreamSetBuffer BasisBits(iBuilder, StreamSetType((UTF_16 ? 16 : 8), 1), segmentSize);
     97    StreamSetBuffer MatchResults(iBuilder, StreamSetType(2, 1), segmentSize);
     98   
     99    ByteStream.setStreamSetBuffer(inputStream);
     100    BasisBits.allocateBuffer();
     101    MatchResults.allocateBuffer();
     102
     103    Value * initialBufferSize = bufferSize;
     104    Value * initialBlockNo = iBuilder->getInt64(0);
     105    BasicBlock * initialBlock = entryBlock;
     106   
     107    Value * s2pInstance = s2pk.createInstance({});
     108    Value * icgrepInstance = icgrepK.createInstance({});
     109    Value * scanMatchInstance = nullptr;
     110    if (!CountOnly) {
     111        scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), bufferSize, fileIdx});
    113112    }
    114 
    115     Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
    116     Instance * icGrepInstance = mICgrepKernel->instantiate(s2pInstance->getOutputStreamBuffer());
    117     Instance * scanMatchInstance = nullptr;
     113    iBuilder->CreateBr(fullCondBlock);
    118114   
    119     if (!CountOnly) {
    120         scanMatchInstance = mScanMatchKernel->instantiate(icGrepInstance->getOutputStreamBuffer());
    121         scanMatchInstance->setInternalState("FileBuf", iBuilder->CreateBitCast(inputStream, int8PtrTy));
    122         scanMatchInstance->setInternalState("FileSize", bufferSize);
    123         scanMatchInstance->setInternalState("FileIdx", fileIdx);
    124     }
    125     Value * initialBufferSize = nullptr;
    126     BasicBlock * initialBlock = nullptr;
    127 
    128     if (segmentSize > 1) {
    129         iBuilder->CreateBr(segmentCondBlock);
    130         iBuilder->SetInsertPoint(segmentCondBlock);
    131         PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    132         remainingBytes->addIncoming(bufferSize, entryBlock);
    133         Constant * const step = ConstantInt::get(int64ty, mBlockSize * segmentSize * (UTF_16 ? 2 : 1));
    134         Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    135         iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock);
    136         iBuilder->SetInsertPoint(segmentBodyBlock);
    137         for (unsigned i = 0; i < segmentSize; ++i) {
    138             s2pInstance->CreateDoBlockCall();
    139         }
    140         for (unsigned i = 0; i < segmentSize; ++i) {
    141             Value * match = (icGrepInstance->getOutputStream(0, 0));
    142             icGrepInstance->CreateDoBlockCall();
    143             Value * temp = iBuilder->CreateLoad(match);
    144             Value * matches = iBuilder->CreateBitCast(temp, iBuilder->getIntNTy(mBlockSize));
    145             Value * popcount_for = generatePopcount(iBuilder, matches);
    146             if(CountOnly){
    147                 Value * temp_count = iBuilder->CreateLoad(count);
    148                 Value * prev_count = iBuilder->CreateBitCast(temp_count, iBuilder->getIntNTy(mBlockSize));
    149                 Value * add_for = iBuilder->CreateAdd(prev_count, popcount_for);
    150                 Value * add = iBuilder->CreateBitCast(add_for, mBitBlockType);
    151                 iBuilder->CreateStore(add, count);
    152             }
    153         }
    154         if (!CountOnly) {
    155             for (unsigned i = 0; i < segmentSize; ++i) {
    156                 scanMatchInstance->CreateDoBlockCall();
    157             }
    158         }
    159         remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
    160         iBuilder->CreateBr(segmentCondBlock);
    161         initialBufferSize = remainingBytes;
    162         initialBlock = segmentCondBlock;
    163     } else {
    164         initialBufferSize = bufferSize;
    165         initialBlock = entryBlock;
    166         iBuilder->CreateBr(fullCondBlock);
    167     }
    168 
    169115    iBuilder->SetInsertPoint(fullCondBlock);
    170116    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    171117    remainingBytes->addIncoming(initialBufferSize, initialBlock);
    172 
     118    PHINode * blockNo = iBuilder->CreatePHI(int64ty, 2, "blockNo");
     119    blockNo->addIncoming(initialBlockNo, initialBlock);
     120   
    173121    Constant * const step = ConstantInt::get(int64ty, mBlockSize * (UTF_16 ? 2 : 1));
    174122    Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    175123    iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
    176124
     125    // Full Block Pipeline loop
    177126    iBuilder->SetInsertPoint(fullBodyBlock);
    178     s2pInstance->CreateDoBlockCall();
    179     icGrepInstance->CreateDoBlockCall();
    180     if (CountOnly) {
    181         Value * popcount = Cal_Count(icGrepInstance, iBuilder);
    182         Value * temp_count = iBuilder->CreateLoad(count);
    183         Value * add = iBuilder->CreateAdd(temp_count, popcount);
    184         iBuilder->CreateStore(add, count);
    185     } else {
    186         scanMatchInstance->CreateDoBlockCall();
     127   
     128    Value * byteStreamPtr = ByteStream.getBlockPointer(blockNo);
     129    Value * basisBitsPtr = BasisBits.getBlockPointer(blockNo);
     130    Value * matchResultsPtr = MatchResults.getBlockPointer(blockNo);
     131    s2pk.createDoBlockCall(s2pInstance, {byteStreamPtr, basisBitsPtr});
     132    icgrepK.createDoBlockCall(icgrepInstance, {basisBitsPtr, matchResultsPtr});
     133    if (!CountOnly) {
     134
     135        scanMatchK.createDoBlockCall(scanMatchInstance, {matchResultsPtr});
    187136    }
    188 
    189137    remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), fullBodyBlock);
     138    blockNo->addIncoming(iBuilder->CreateAdd(blockNo, iBuilder->getInt64(1)), fullBodyBlock);
    190139    iBuilder->CreateBr(fullCondBlock);
    191140
    192141    iBuilder->SetInsertPoint(finalBlock);
    193     Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(int64ty, 0));
    194     iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
     142    byteStreamPtr = ByteStream.getBlockPointer(blockNo);
     143    basisBitsPtr = BasisBits.getBlockPointer(blockNo);
     144    matchResultsPtr = MatchResults.getBlockPointer(blockNo);
     145    s2pk.createFinalBlockCall(s2pInstance, remainingBytes, {byteStreamPtr, basisBitsPtr});
     146    icgrepK.createFinalBlockCall(icgrepInstance, remainingBytes, {basisBitsPtr, matchResultsPtr});
     147    if (CountOnly) {
     148        Value * matchCount = icgrepK.createGetAccumulatorCall(icgrepInstance, "matchedLineCount");
     149        iBuilder->CreateRet(matchCount);
     150    }
     151    else {
     152        scanMatchK.createFinalBlockCall(scanMatchInstance, remainingBytes, {matchResultsPtr});
     153        iBuilder->CreateRetVoid();
     154    }
     155   
     156    //Linker L(*mMod);
     157    //L.linkInModule(std::move(s2pM));
     158    //L.linkInModule(std::move(scanMatchM));
     159    //L.linkInModule(std::move(icgrepM));
    195160
    196161
    197     iBuilder->SetInsertPoint(finalPartialBlock);
    198     s2pInstance->CreateDoBlockCall();
    199     iBuilder->CreateBr(exitBlock);
    200 
    201     iBuilder->SetInsertPoint(finalEmptyBlock);
    202     s2pInstance->clearOutputStreamSet();
    203     iBuilder->CreateBr(exitBlock);
    204 
    205     iBuilder->SetInsertPoint(exitBlock);
    206 
    207     Value * remainingByte = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(mBlockSize));
    208     Value * remainingUnit = iBuilder->CreateLShr(remainingByte, ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1));
    209     Value * EOFmark = iBuilder->CreateShl(ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1), UTF_16 ? remainingUnit : remainingByte);
    210     icGrepInstance->setInternalState("EOFmark", iBuilder->CreateBitCast(EOFmark, mBitBlockType));
    211 
    212     icGrepInstance->CreateDoBlockCall();
    213     if (CountOnly) {
    214         Value * popcount1 = Cal_Count(icGrepInstance, iBuilder);
    215         Value * temp_count1 = iBuilder->CreateLoad(count);
    216         Value * result = iBuilder->CreateAdd(temp_count1, popcount1);
    217         for (unsigned width = (mBlockSize / 64); width > 1; width /= 2) {
    218             std::vector<Constant *> mask(width / 2);
    219             for (unsigned i = 0; i < (width / 2); ++i) {
    220                 mask[i] = iBuilder->getInt32(i);
    221             }
    222             Value * const undef = UndefValue::get(VectorType::get(int64ty, width));
    223             Value * const lh = iBuilder->CreateShuffleVector(result, undef, ConstantVector::get(mask));
    224             for (unsigned i = 0; i < (width / 2); ++i) {
    225                 mask[i] = iBuilder->getInt32(i + (width / 2));
    226             }
    227             Value * const rh = iBuilder->CreateShuffleVector(result, undef, ConstantVector::get(mask));
    228             result = iBuilder->CreateAdd(lh, rh);
    229         }
    230         iBuilder->CreateRet(iBuilder->CreateExtractElement(result, iBuilder->getInt32(0)));
    231     } else {
    232         scanMatchInstance->CreateDoBlockCall();
    233         iBuilder->CreateRetVoid();
    234     }
    235162    return main;
    236163}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.h

    r5046 r5063  
    77
    88#include <IDISA/idisa_builder.h>
    9 #include "kernel.h"
     9#include <kernels/kernel.h>
    1010
    1111namespace llvm {
     
    1818    class PabloFunction;
    1919    class PabloBlock;
     20    class PabloKernel;
    2021}
    2122
     
    3031    ~PipelineBuilder();
    3132
    32     void CreateKernels(pablo::PabloFunction * function, bool UTF_16, bool isNameExpression);
    33     llvm::Function * ExecuteKernels(bool CountOnly, bool UTF_16);
     33    llvm::Function * ExecuteKernels(pablo::PabloFunction * function, bool isNameExpression, bool CountOnly, bool UTF_16);
    3434
    3535private:
    3636    llvm::Module *                      mMod;
    3737    IDISA::IDISA_Builder *              iBuilder;
    38     KernelBuilder *                     mS2PKernel;
    39     KernelBuilder *                     mICgrepKernel;   
    40     KernelBuilder *                     mScanMatchKernel;
    4138    llvm::Type *                        mBitBlockType;
    4239    int                                 mBlockSize;
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5051 r5063  
    66#include <kernels/kernel.h>
    77#include <IDISA/idisa_builder.h>
     8#include <llvm/Support/raw_ostream.h>
    89
    910namespace kernel {
     11using namespace llvm;
    1012
    1113const int PACK_LANES = 1;
     
    5355}
    5456
    55 void s2p(IDISA::IDISA_Builder * iBuilder, Value * input, Value * output[]) {
    56     Value * bit[8];
    57     for (unsigned i = 0; i < 8; i++) {
    58         bit[i] = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
     57/* Alternative transposition model, but small field width packs are problematic. */
     58#if 0
     59void s2p_ideal(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
     60    Value * hi_nybble[4];
     61    Value * lo_nybble[4];
     62    for (unsigned i = 0; i<4; i++) {
     63        Value * s0 = input[2*i];
     64        Value * s1 = input[2*i+1];
     65        hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
     66        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
    5967    }
    60     s2p(iBuilder, bit, output);
     68    Value * pair01[2];
     69    Value * pair23[2];
     70    Value * pair45[2];
     71    Value * pair67[2];
     72    for (unsigned i = 0; i<2; i++) {
     73        pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     74        pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     75        pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     76        pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     77    }
     78    output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
     79    output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
     80    output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
     81    output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
     82    output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
     83    output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
     84    output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
     85    output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
    6186}
     87#endif
     88   
     89   
     90#if 0
    6291
    63 void generateS2PKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    64     kBuilder->addInputStream(8, "byte_pack");
    65     for(unsigned i = 0; i < 8; ++i) {
    66         kBuilder->addOutputStream(1);
    67     }
    68     kBuilder->prepareFunction();
    69     Value * output[8];
    70 
    71     Value * ptr = kBuilder->getInputStream(0);
    72     //iBuilder->CallPrintInt("ptr", iBuilder->CreatePtrToInt(ptr, iBuilder->getInt64Ty()));
    73     s2p(iBuilder, ptr, output);
    74     for (unsigned j = 0; j < 8; ++j) {
    75         //iBuilder->CallPrintRegister("bit" + std::to_string(j + 1), output[j]);
    76         iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    77     }
    78     kBuilder->finalize();
    79 }
    8092
    8193void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     
    105117    kBuilder->finalize();
    106118}
    107        
    108 void generateS2P_idealKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    109     kBuilder->addInputStream(8, "byte_pack");
    110     for(unsigned i = 0; i < 8; ++i) {
    111         kBuilder->addOutputStream(1);
    112     }
    113     kBuilder->prepareFunction();
    114     Value * input = kBuilder->getInputStream(0);
    115     Value * output[8];
    116     Value * hi_nybble[4];
    117     Value * lo_nybble[4];
    118     for (unsigned i = 0; i<4; i++) {
    119         Value * s0 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
    120         Value * s1 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
    121         hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
    122         lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
    123     }
    124     Value * pair01[2];
    125     Value * pair23[2];
    126     Value * pair45[2];
    127     Value * pair67[2];
    128     for (unsigned i = 0; i<2; i++) {
    129         pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    130         pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    131         pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    132         pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    133     }
    134     output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
    135     output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
    136     output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
    137     output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
    138     output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
    139     output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
    140     output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
    141     output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
    142 
    143     s2p(iBuilder, kBuilder->getInputStream(0), output);
    144     for (unsigned j = 0; j < 8; ++j) {
    145         iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    146     }
    147     kBuilder->finalize();
    148 }
    149119   
    150 std::unique_ptr<llvm::Module> s2pKernel::createKernelModule() {
    151     std::unique_ptr<llvm::Module> theModule = KernelInterface::createKernelModule();
     120#endif
    152121   
    153     /***********************
    154      WARNING iBuilder has a different module than theModule at this point.
    155     ***********************/
    156     Function * doBlockFunction = theModule.get()->getFunction(mKernelName + "_DoBlock");
    157    
    158     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    159    
    160     Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream");
    161     Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");
    162     Value * s_bytepack[8];
    163     for (unsigned i = 0; i < 8; i++) {
    164         s_bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
    165     }
    166     Value * p_bitblock[8];
    167     s2p(iBuilder, s_bytepack, p_bitblock);
    168     for (unsigned j = 0; j < 8; ++j) {
    169         iBuilder->CreateBlockAlignedStore(p_bitblock[j], basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    170     }
    171     iBuilder->CreateRetVoid();
    172 
     122void s2pKernel::generateFinalBlockMethod() {
    173123    /* Now the prepare the s2p final block function:
    174124     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
    175125     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
    176126     */
    177     Function * finalBlockFunction = theModule.get()->getFunction(mKernelName + "_FinalBlock");
     127    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     128    Module * m = iBuilder->getModule();
     129    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     130    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    178131    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    179 
     132   
    180133    Value * self = getParameter(finalBlockFunction, "self");
    181134    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
    182     byteStreamBlock_ptr = getParameter(finalBlockFunction, "byteStream");
    183     basisBitsBlock_ptr = getParameter(finalBlockFunction, "basisBits");
     135    Value * byteStreamBlock_ptr = getParameter(finalBlockFunction, "byteStream");
     136    Value * basisBitsBlock_ptr = getParameter(finalBlockFunction, "basisBits");
    184137   
    185138    BasicBlock * finalPartialBlock = BasicBlock::Create(iBuilder->getContext(), "partial", finalBlockFunction, 0);
     
    200153    iBuilder->SetInsertPoint(exitBlock);
    201154    iBuilder->CreateRetVoid();
     155    iBuilder->restoreIP(savePoint);
     156}
    202157
    203     return theModule;
     158void s2pKernel::generateKernel() {
     159    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     160    if (mKernelStateType == nullptr) finalizeKernelStateType();
     161    KernelBuilder::generateKernel();
     162    generateFinalBlockMethod();
     163
     164    Module * m = iBuilder->getModule();
     165    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     166   
     167    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     168   
     169    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream");
     170    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");
     171    Value * s_bytepack[8];
     172    for (unsigned i = 0; i < 8; i++) {
     173        s_bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     174    }
     175    Value * p_bitblock[8];
     176    s2p(iBuilder, s_bytepack, p_bitblock);
     177    for (unsigned j = 0; j < 8; ++j) {
     178        iBuilder->CreateBlockAlignedStore(p_bitblock[j], basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     179    }
     180    iBuilder->CreateRetVoid();
     181    iBuilder->restoreIP(savePoint);
    204182}
    205183
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5051 r5063  
    88#include "streamset.h"
    99#include "interface.h"
     10#include "kernel.h"
    1011
    1112namespace llvm { class Module; }
     
    1718class KernelBuilder;
    1819
    19 void generateS2PKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    20 void generateS2P_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    21 void generateS2P_idealKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     20//void generateS2P_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    2221
    2322
    2423
    25 class s2pKernel : public KernelInterface {
     24class s2pKernel : public KernelBuilder {
    2625public:
    2726    s2pKernel(IDISA::IDISA_Builder * iBuilder) :
    28     KernelInterface(iBuilder, "s2p",
    29                     {StreamSetBinding{StreamSetType(1, 8), "byteStream"}},
    30                     {StreamSetBinding{StreamSetType(8, 1), "basisBits"}},
    31                     {}, {}, {}) {}
     27    KernelBuilder(iBuilder, "s2p",
     28                  {StreamSetBinding{StreamSetType(1, 8), "byteStream"}},
     29                  {StreamSetBinding{StreamSetType(8, 1), "basisBits"}},
     30                  {}, {}, {}) {}
    3231   
    33     std::unique_ptr<llvm::Module> createKernelModule() override;
     32    void generateFinalBlockMethod();
     33    void generateKernel() override;
     34   
     35};
    3436
    35 };
     37   
     38
    3639}
    3740#endif
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5056 r5063  
    3939}
    4040
    41 Function * generateScanWordRoutine(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanwordBitWidth, KernelBuilder * const kBuilder, bool isNameExpression) {
    42 
    43     Function * function = m->getFunction("scan_matches_in_scanword");
    44     if (LLVM_UNLIKELY(function != nullptr)) {
    45         return function;
    46     }
    47 
    48     LLVMContext & ctxt = m->getContext();
    49     Type * T = iBuilder->getIntNTy(scanwordBitWidth);
    50     Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    51     Type * returnType = StructType::get(ctxt, std::vector<Type *>({T, T}));
    52     FunctionType * functionType = FunctionType::get(returnType, std::vector<Type *>({PointerType::get(kBuilder->getKernelStateType(), 0), T, T, T, T, T}), false);
    53 
    54     SmallVector<AttributeSet, 6> Attrs;
    55     Attrs.push_back(AttributeSet::get(ctxt, ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
    56     Attrs.push_back(AttributeSet::get(ctxt, 1, std::vector<Attribute::AttrKind>({})));
    57     Attrs.push_back(AttributeSet::get(ctxt, 2, std::vector<Attribute::AttrKind>({})));
    58     Attrs.push_back(AttributeSet::get(ctxt, 3, std::vector<Attribute::AttrKind>({})));
    59     Attrs.push_back(AttributeSet::get(ctxt, 4, std::vector<Attribute::AttrKind>({})));
    60     Attrs.push_back(AttributeSet::get(ctxt, 5, std::vector<Attribute::AttrKind>({})));
    61     AttributeSet AttrSet = AttributeSet::get(ctxt, Attrs);
    62 
    63     function = Function::Create(functionType, GlobalValue::ExternalLinkage, "scan_matches_in_scanword", m);
    64     function->setCallingConv(CallingConv::C);
    65     function->setAttributes(AttrSet);
    66     function->addFnAttr(llvm::Attribute::AlwaysInline);
    67 
    68     Function::arg_iterator args = function->arg_begin();
    69     Value * instance = &*(args++);
    70     instance->setName("this");
    71     Value * matches_input_parm = &*(args++);
    72     matches_input_parm->setName("matches");
    73     Value * record_breaks_input_parm = &*(args++);
    74     record_breaks_input_parm->setName("breaks");
    75     Value * scanwordPos = &*(args++);
    76     scanwordPos->setName("scanwordPos");
    77     Value * recordStart_input_parm = &*(args++);
    78     recordStart_input_parm->setName("pendingLineStart");
    79     Value * recordNum_input_parm = &*(args++);
    80     recordNum_input_parm->setName("lineNum");
    81 
    82     Constant * matchProcessor;
    83     if (isNameExpression) {
    84         matchProcessor = m->getOrInsertFunction("insert_codepoints", Type::getVoidTy(ctxt), T, T, T, S, nullptr);
    85     } else {
    86         matchProcessor = m->getOrInsertFunction("wrapped_report_match", Type::getVoidTy(ctxt), T, T, T, S, T, T, nullptr);
    87     }
    88     iBuilder->SetInsertPoint(BasicBlock::Create(ctxt, "entry", function,0));
    89 
    90     BasicBlock * entry_block = iBuilder->GetInsertBlock();
    91     BasicBlock * matches_test_block = BasicBlock::Create(ctxt, "matches_test_block", function, 0);
    92     BasicBlock * process_matches_loop_entry = BasicBlock::Create(ctxt, "process_matches_loop", function, 0);
    93     BasicBlock * prior_breaks_block = BasicBlock::Create(ctxt, "prior_breaks_block", function, 0);
    94     BasicBlock * loop_final_block = BasicBlock::Create(ctxt, "loop_final_block", function, 0);
    95     BasicBlock * matches_done_block = BasicBlock::Create(ctxt, "matches_done_block", function, 0);
    96     BasicBlock * remaining_breaks_block = BasicBlock::Create(ctxt, "remaining_breaks_block", function, 0);
    97     BasicBlock * return_block = BasicBlock::Create(ctxt, "return_block", function, 0);
    98 
    99 
    100     // The match scanner works with a loop involving four variables:
    101     // (a) the bit stream scanword of matches marking the ends of selected records,
    102     // (b) the bit stream scanword of record_breaks marking the ends of all records,
    103     // (c) the integer lastRecordNum indicating the number of records processed so far,
    104     // (d) the index lastRecordStart indicating the file position of the last record.
    105     // We set up a loop structure, in which a set of 4 phi nodes initialize these
    106     // variables from either the input to the scanner or the computed values within
    107     // the loop body.
    108 
    109 
    110     iBuilder->CreateBr(matches_test_block);
    111 
    112     // LOOP Test Block
    113     iBuilder->SetInsertPoint(matches_test_block);
    114     PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
    115     PHINode * record_breaks_phi = iBuilder->CreatePHI(T, 2, "record_breaks");
    116     PHINode * recordNum_phi = iBuilder->CreatePHI(T, 2, "recordNum");
    117     PHINode * recordStart_phi = iBuilder->CreatePHI(T, 2, "recordStart");
    118     matches_phi->addIncoming(matches_input_parm, entry_block);
    119     record_breaks_phi->addIncoming(record_breaks_input_parm, entry_block);
    120     recordNum_phi->addIncoming(recordNum_input_parm, entry_block);
    121     recordStart_phi->addIncoming(recordStart_input_parm, entry_block);
    122     Value * have_matches_cond = iBuilder->CreateICmpNE(matches_phi, ConstantInt::get(T, 0));
    123     iBuilder->CreateCondBr(have_matches_cond, process_matches_loop_entry, matches_done_block);
    124 
    125     // LOOP BODY
    126     // The loop body is entered if we have more matches to process.
    127     iBuilder->SetInsertPoint(process_matches_loop_entry);
    128     Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, matches_phi), record_breaks_phi);
    129     // Within the loop we have a conditional block that is executed if there are any prior
    130     // record breaks.
    131     Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::get(T, 0));
    132     iBuilder->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
    133 
    134     // PRIOR_BREAKS_BLOCK
    135     // If there are prior breaks, we count them and compute the record start position.
    136     iBuilder->SetInsertPoint(prior_breaks_block);
    137     Value * matchRecordNum = iBuilder->CreateAdd(generatePopcount(iBuilder, prior_breaks), recordNum_phi);
    138     Value * reverseDistance = generateCountReverseZeroes(iBuilder, prior_breaks);
    139     Value * width = ConstantInt::get(T, scanwordBitWidth);
    140     Value * matchRecordStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseDistance));
    141     iBuilder->CreateBr(loop_final_block);
    142 
    143     // LOOP FINAL BLOCK
    144     // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
    145     // and recortStart depending on whether the conditional execution of prior_breaks_block.
    146     iBuilder->SetInsertPoint(loop_final_block);
    147     PHINode * matchRecordNum_phi = iBuilder->CreatePHI(T, 2, "matchRecordNum");
    148     PHINode * matchRecordStart_phi = iBuilder->CreatePHI(T, 2, "matchRecordStart");
    149     matchRecordNum_phi->addIncoming(recordNum_phi, process_matches_loop_entry);
    150     matchRecordNum_phi->addIncoming(matchRecordNum, prior_breaks_block);
    151     matchRecordStart_phi->addIncoming(recordStart_phi, process_matches_loop_entry);
    152     matchRecordStart_phi->addIncoming(matchRecordStart, prior_breaks_block);   
    153     Value * matchRecordEnd = iBuilder->CreateAdd(scanwordPos, generateCountForwardZeroes(iBuilder, matches_phi));
    154 
    155     Value * fileBuf = iBuilder->CreateLoad(kBuilder->getInternalStateInternal(instance, "FileBuf"));
    156     if (isNameExpression) {
    157         iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf}));
    158     } else {
    159         Value * fileSize = iBuilder->CreateLoad(kBuilder->getInternalStateInternal(instance, "FileSize"));
    160         Value * fileIdx = iBuilder->CreateLoad(kBuilder->getInternalStateInternal(instance, "FileIdx"));
    161         iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf, fileSize, fileIdx}));
    162     }
    163 
    164     Value * remaining_matches = generateResetLowestBit(iBuilder, matches_phi);
    165     Value * remaining_breaks = iBuilder->CreateXor(record_breaks_phi, prior_breaks);
    166     matches_phi->addIncoming(remaining_matches, loop_final_block);
    167     record_breaks_phi->addIncoming(remaining_breaks, loop_final_block);
    168     recordNum_phi->addIncoming(matchRecordNum_phi, loop_final_block);
    169     recordStart_phi->addIncoming(matchRecordStart_phi, loop_final_block);
    170     iBuilder->CreateBr(matches_test_block);
    171 
    172 
    173     // LOOP EXIT/MATCHES_DONE
    174     iBuilder->SetInsertPoint(matches_done_block);
    175     // When the matches are done, there may be additional record breaks remaining
    176     Value * more_breaks_cond = iBuilder->CreateICmpNE(record_breaks_phi, ConstantInt::get(T, 0));
    177     iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
    178 
    179     // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
    180     iBuilder->SetInsertPoint(remaining_breaks_block);
    181     Value * break_count = generatePopcount(iBuilder, record_breaks_phi);
    182     Value * final_record_num = iBuilder->CreateAdd(recordNum_phi, break_count);
    183     Value * reverseZeroes = generateCountReverseZeroes(iBuilder, record_breaks_phi);
    184     Value * pendingLineStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseZeroes));
    185     iBuilder->CreateBr(return_block);
    186 
    187     // RETURN block
    188     iBuilder->SetInsertPoint(return_block);
    189     PHINode * finalRecordCount_phi = iBuilder->CreatePHI(T, 2, "finalRecordCount");
    190     PHINode * finalRecordStart_phi = iBuilder->CreatePHI(T, 2, "finalRecordStart");
    191     finalRecordCount_phi->addIncoming(recordNum_phi, matches_done_block);
    192     finalRecordCount_phi->addIncoming(final_record_num, remaining_breaks_block);
    193     finalRecordStart_phi->addIncoming(recordStart_phi, matches_done_block);
    194     finalRecordStart_phi->addIncoming(pendingLineStart, remaining_breaks_block);
    195     Value * retVal = UndefValue::get(returnType);
    196     retVal = iBuilder->CreateInsertValue(retVal, finalRecordStart_phi, 0);
    197     retVal = iBuilder->CreateInsertValue(retVal, finalRecordCount_phi, 1);
    198     iBuilder->CreateRet(retVal);
    199 
    200     return function;
    201 }
    202 
    203 
    204 void generateScanMatch(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanWordBitWidth, KernelBuilder * kBuilder, bool isNameExpression) {
    205    
    206     Type * T = iBuilder->getIntNTy(scanWordBitWidth);
    207     Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    208 
    209     const unsigned fieldCount = iBuilder->getBitBlockWidth() / scanWordBitWidth;
    210     Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    211 
    212     kBuilder->addInputStream(1, "matches");
    213     kBuilder->addInputStream(1, "breaks");
    214     //use index
    215     const unsigned lineStart = kBuilder->addInternalState(T, "LineStart");
    216     const unsigned lineNum = kBuilder->addInternalState(T, "LineNum");
    217     kBuilder->addInternalState(S, "FileBuf");
    218     kBuilder->addInternalState(T, "FileSize");
    219     kBuilder->addInternalState(T, "FileIdx");
    220     Function * function = kBuilder->prepareFunction();
    221 
    222 
    223     Function * scanWordFunction = generateScanWordRoutine(m, iBuilder, scanWordBitWidth, kBuilder, isNameExpression);
    224    
    225     iBuilder->SetInsertPoint(&function->getEntryBlock());
    226 
    227     Value * kernelStuctParam = kBuilder->getKernelState();
    228 
    229     Value * scanwordPos = iBuilder->CreateLoad(kBuilder->getInternalState("BlockNo"));
    230     scanwordPos = iBuilder->CreateMul(scanwordPos, ConstantInt::get(scanwordPos->getType(), iBuilder->getBitBlockWidth()));
    231    
    232     Value * recordStart = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(lineStart));
    233     Value * recordNum = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(lineNum));
    234     Value * matchWordVector = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(0)), scanwordVectorType);
    235     Value * breakWordVector = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(1)), scanwordVectorType);
    236     for(unsigned i = 0; i < fieldCount; ++i){
    237         Value * matchWord = iBuilder->CreateExtractElement(matchWordVector, ConstantInt::get(T, i));
    238         Value * recordBreaksWord = iBuilder->CreateExtractElement(breakWordVector, ConstantInt::get(T, i));
    239         Value * wordResult = iBuilder->CreateCall(scanWordFunction, {kernelStuctParam, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum});
    240         scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, scanWordBitWidth));
    241         recordStart = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({0}));
    242         recordNum = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({1}));
    243     }
    244     kBuilder->setInternalState(lineStart, recordStart);
    245     kBuilder->setInternalState(lineNum, recordNum);
    246     kBuilder->finalize();
    247 
    248 }
    24941       
    250 std::unique_ptr<llvm::Module> scanMatchKernel::createKernelModule() {
    251     std::unique_ptr<llvm::Module> theModule = KernelInterface::createKernelModule();
    252    
    253     Function * scanWordFunction = generateScanWordRoutine(theModule.get());
     42void scanMatchKernel::generateKernel() {
     43    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     44    if (mKernelStateType == nullptr) finalizeKernelStateType();
     45    KernelBuilder::generateKernel();
     46    Module * m = iBuilder->getModule();
     47   
     48    Function * scanWordFunction = generateScanWordRoutine(m);
    25449    const unsigned fieldCount = iBuilder->getBitBlockWidth() / mScanwordBitWidth;
    25550    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    25651    Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    25752
    258     Function * doBlockFunction = theModule.get()->getFunction(mKernelName + "_DoBlock");
     53    Function * doBlockFunction = m->getFunction(mKernelName + "_DoBlock");
    25954
    26055    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     
    28479   
    28580    // scanMatch FinalBlock function simply dispatches to the DoBlock function
    286     Function * finalBlockFunction = theModule.get()->getFunction(mKernelName + "_FinalBlock");
    287     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    288     Value * self = getParameter(finalBlockFunction, "self");
    289     Value * matchResults = getParameter(finalBlockFunction, "matchResults");
    290     iBuilder->CreateCall(doBlockFunction, {self, matchResults});
    291     iBuilder->CreateRetVoid();
    292     //
    293     return theModule;
     81    addTrivialFinalBlockMethod(m);
     82    iBuilder->restoreIP(savePoint);
    29483}
    29584
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5055 r5063  
    77
    88#include "streamset.h"
    9 #include "interface.h"
     9#include "kernel.h"
    1010
    1111namespace llvm { class Module; class Function;}
     
    1414
    1515namespace kernel {
    16 
    17 class KernelBuilder;
    18 
    19 void generateScanMatch(llvm::Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanWordBitWidth, KernelBuilder * kBuilder, bool isNameExpression);
    20 
    2116   
    22    
    23 class scanMatchKernel : public KernelInterface {
     17class scanMatchKernel : public KernelBuilder {
    2418public:
    2519    scanMatchKernel(IDISA::IDISA_Builder * iBuilder, unsigned scanwordBitWidth, bool isNameExpression) :
    26     KernelInterface(iBuilder, "scanMatch",
     20    KernelBuilder(iBuilder, "scanMatch",
    2721                    {StreamSetBinding{StreamSetType(2, 1), "matchResults"}},
    2822                    {},
     
    3428    mIsNameExpression(isNameExpression) {}
    3529       
    36     std::unique_ptr<llvm::Module> createKernelModule() override;
     30    void generateKernel() override;
    3731
    3832private:
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5058 r5063  
    2020
    2121/** ------------------------------------------------------------------------------------------------------------- *
    22  * @brief initialize
    23  ** ------------------------------------------------------------------------------------------------------------- */
    24 void CarryManager::initialize(PabloFunction * const function, kernel::KernelBuilder * const kBuilder) {
     22 * @brief initializeCarryData
     23 ** ------------------------------------------------------------------------------------------------------------- */
     24Type * CarryManager::initializeCarryData(PabloFunction * const function) {
    2525    mRootScope = function->getEntryBlock();
    2626    mCarryInfoVector.resize(mRootScope->enumerateScopes(0) + 1);
     
    3434    mTotalCarryDataBitBlocks = totalCarryDataSize;
    3535    ArrayType* cdArrayTy = ArrayType::get(mBitBlockType, mTotalCarryDataBitBlocks);
    36     mCdArrayIdx = kBuilder->addInternalState(cdArrayTy);
    37     if (mPabloCountCount > 0) {
    38         ArrayType* pcArrayTy = ArrayType::get(iBuilder->getIntNTy(64), mPabloCountCount);
    39         mPcArrayIdx = kBuilder->addInternalState(pcArrayTy);
    40     }
     36    return cdArrayTy;
     37}
     38
     39/** ------------------------------------------------------------------------------------------------------------- *
     40 * @brief initializeCodeGen
     41 ** ------------------------------------------------------------------------------------------------------------- */
     42void CarryManager::initializeCodeGen(PabloKernel * const kBuilder, Value * selfPtr) {
    4143    mKernelBuilder = kBuilder;
    42 }
    43 
    44 /** ------------------------------------------------------------------------------------------------------------- *
    45  * @brief reset
    46  ** ------------------------------------------------------------------------------------------------------------- */
    47 void CarryManager::reset() {
    48     Value * cdArrayPtr = mKernelBuilder->getInternalState(mCdArrayIdx);
     44    mSelf = selfPtr;
     45   
     46    Value * cdArrayPtr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt64(0), mKernelBuilder->getScalarIndex("carries")});
     47#ifndef NDEBUG
     48    iBuilder->CallPrintInt("cdArrayPtr", iBuilder->CreatePtrToInt(cdArrayPtr, iBuilder->getInt64Ty()));
     49#endif
    4950    mCarryPackBasePtr = iBuilder->CreateBitCast(cdArrayPtr, PointerType::get(mCarryPackType, 0));
    5051    mCarryBitBlockPtr = iBuilder->CreateBitCast(cdArrayPtr, PointerType::get(mBitBlockType, 0));
    51     if (mPabloCountCount > 0) {
    52         Value * pcArrayPtr = mKernelBuilder->getInternalState(mPcArrayIdx);
    53         mPopcountBasePtr = iBuilder->CreateBitCast(pcArrayPtr, Type::getInt64PtrTy(iBuilder->getContext()));
    54     }
    5552    mCurrentScope = mRootScope;
    5653    mCurrentFrameIndex = 0;
     
    201198    const unsigned bufsize = mCarryInfo->longAdvanceBufferSize(shiftAmount);
    202199    Value * indexMask = iBuilder->getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
    203     Value * blockIndex = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getBlockNo());
     200    Value * blockIndex = mKernelBuilder->getScalarField(mSelf, "BlockNo");
    204201    Value * loadIndex0 = iBuilder->CreateAdd(iBuilder->CreateAnd(iBuilder->CreateSub(blockIndex, iBuilder->getInt64(advanceEntries)), indexMask), advBaseIndex);
    205202    Value * storeIndex = iBuilder->CreateAdd(iBuilder->CreateAnd(blockIndex, indexMask), advBaseIndex);
     
    243240        storeCarryOut(carrySummaryIndex);
    244241    }
    245 }
    246 
    247 /** ------------------------------------------------------------------------------------------------------------- *
    248  * @brief popCount
    249  ** ------------------------------------------------------------------------------------------------------------- */
    250 Value * CarryManager::popCount(Value * to_count, unsigned globalIdx) {
    251     Value * countPtr = iBuilder->CreateGEP(mPopcountBasePtr, iBuilder->getInt64(globalIdx));
    252     Value * countSoFar = iBuilder->CreateAlignedLoad(countPtr, 8);
    253     Value * fieldCounts = iBuilder->simd_popcount(64, to_count);
    254     for (unsigned i = 0; i < mBitBlockWidth/64; ++i) {
    255         countSoFar = iBuilder->CreateAdd(countSoFar, iBuilder->mvmd_extract(64, fieldCounts, i));
    256     }
    257     iBuilder->CreateAlignedStore(countSoFar, countPtr, 8);
    258     return iBuilder->bitCast(iBuilder->CreateZExt(countSoFar, iBuilder->getIntNTy(mBitBlockWidth)));
    259242}
    260243
     
    407390
    408391    for (Statement * stmt : *blk) {
    409         if (Count * c = dyn_cast<Count>(stmt)) {
    410             c->setGlobalCountIndex(mPabloCountCount);
    411             mPabloCountCount++;
    412         } else if (If * ifStatement = dyn_cast<If>(stmt)) {
     392        if (If * ifStatement = dyn_cast<If>(stmt)) {
    413393            const unsigned ifCarryDataBits = enumerate(ifStatement->getBody(), ifDepth + 1, whileDepth);
    414394            CarryData * nestedBlockData = mCarryInfoVector[ifStatement->getBody()->getScopeIndex()];
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.h

    r4974 r5063  
    1010#include <llvm/IR/Module.h>
    1111#include <IDISA/idisa_builder.h>
     12#include <pablo/pablo_kernel.h>
    1213#include <pablo/codegenstate.h>
    1314#include <pablo/carry_data.h>
     
    3940    CarryManager(IDISA::IDISA_Builder * idb)
    4041    : iBuilder(idb)
     42    , mKernelBuilder(nullptr)
     43    , mSelf(nullptr)
    4144    , mBitBlockType(idb->getBitBlockType())
    4245    , mBitBlockWidth(idb->getBitBlockWidth())
     
    4750    , mCarryPackBasePtr(nullptr)
    4851    , mCarryBitBlockPtr(nullptr)
    49     , mPopcountBasePtr(nullptr)
    50     , mKernelBuilder(nullptr)
    51     , mPabloCountCount(0)
    5252    , mTotalCarryDataBitBlocks(0)
    5353    , mCarryDataAllocationSize(0)
     
    5959    ~CarryManager();
    6060   
    61     void initialize(PabloFunction * const function, kernel::KernelBuilder * const kBuilder);
     61    Type * initializeCarryData(PabloFunction * const function);
     62    void initializeCodeGen(PabloKernel * const kBuilder, Value * selfPtr);
    6263
    6364    void reset();
     
    9899    void ensureCarriesStoredRecursive();
    99100   
    100     Value * popCount(Value * to_count, unsigned globalIdx);
    101    
    102101    Value * declareCarryDataArray(Module * m);
    103102
     
    130129private:
    131130    IDISA::IDISA_Builder * const iBuilder;
     131    PabloKernel * mKernelBuilder;
     132    Value * mSelf;
    132133    Type * const mBitBlockType;
    133134    const unsigned mBitBlockWidth;
     
    139140    Type * mCarryPackType;
    140141    Value * mCarryBitBlockPtr;
    141     Value * mPopcountBasePtr;
    142     kernel::KernelBuilder * mKernelBuilder;
    143     unsigned mPabloCountCount; // Number of Pablo "Count" operations
    144142    unsigned mTotalCarryDataBitBlocks;
    145143    unsigned mCarryDataAllocationSize;
     
    152150    std::vector<Value *> mCarrySummary;
    153151    int mCdArrayIdx;
    154     int mPcArrayIdx;
    155152    int mFilePosIdx;
    156153};
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.cpp

    r5061 r5063  
    7676    return insertAtInsertionPoint(new Advance(expr, getInteger(shiftAmount), makeName(prefix, false)));
    7777}
     78
     79Count * PabloBlock::createCount(const std::string counterName, PabloAST * const expr)  {
     80    return insertAtInsertionPoint(new Count(expr, makeName(counterName, false)));
     81}
     82   
    7883
    7984PabloAST * PabloBlock::createLookahead(PabloAST * expr, PabloAST * shiftAmount) {
  • icGREP/icgrep-devel/icgrep/pablo/function.h

    r4876 r5063  
    55#include <pablo/pe_var.h>
    66#include <pablo/ps_assign.h>
     7#include <pablo/pe_count.h>
    78#include <pablo/symbol_generator.h>
    89
     
    139140    }
    140141
     142    void setResultCount(Count * value) {
     143        value->addUser(this);
     144    }
     145   
    141146    void setFunctionPtr(void * functionPtr) {
    142147        mFunctionPtr = functionPtr;
  • icGREP/icgrep-devel/icgrep/pablo/pabloAST.h

    r5042 r5063  
    3636    friend class PabloFunction;
    3737    friend class SymbolGenerator;
     38    friend class Count;
    3839public:
    3940
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5045 r5063  
    1919#include <iostream>
    2020#include <hrtime.h>
     21#include <llvm/Support/Debug.h>
    2122
    2223
     
    2526#define DSSLI_FIELDWIDTH 64
    2627
    27 PabloCompiler::PabloCompiler(Module * m, IDISA::IDISA_Builder * b)
    28 : mMod(m)
     28PabloCompiler::PabloCompiler(IDISA::IDISA_Builder * b, PabloKernel * k, PabloFunction * const function)
     29: mMod(b->getModule())
    2930, iBuilder(b)
    3031, mBitBlockType(b->getBitBlockType())
    3132, mCarryManager(nullptr)
    32 , mPabloFunction(nullptr)
     33, mPabloFunction(function)
    3334, mPabloBlock(nullptr)
    34 , mKernelBuilder(nullptr)
     35, mKernelBuilder(k)
    3536, mWhileDepth(0)
    3637, mIfDepth(0)
     
    4041}
    4142
    42 void PabloCompiler::setKernel(kernel::KernelBuilder * kBuilder){
    43     mKernelBuilder = kBuilder;
    44 }
    45 
    46 llvm::Function * PabloCompiler::compile(PabloFunction * function) {
    47 
     43
     44Type * PabloCompiler::initializeCarryData() {
     45    mCarryManager = make_unique<CarryManager>(iBuilder);
     46    Type * carryDataType = mCarryManager->initializeCarryData(mPabloFunction);
     47    return carryDataType;
     48}
     49   
     50void PabloCompiler::compile(Function * doBlockFunction) {
     51    // Make sure that we generate code into the right module.
     52    mMod = iBuilder->getModule();
     53    mFunction = doBlockFunction;
    4854    #ifdef PRINT_TIMING_INFORMATION
    4955    const timestamp_t pablo_compilation_start = read_cycle_counter();
    5056    #endif
    51  
    52     Examine(function);
    53 
    54     mCarryManager = new CarryManager(iBuilder);
    55 
    56     GenerateKernel(function);
    57        
    58     delete mCarryManager;
    59     mCarryManager = nullptr;
     57
     58    Examine(mPabloFunction);
     59   
     60    //Generate Kernel//
     61    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     62    mSelf = mKernelBuilder->getParameter(doBlockFunction, "self");
     63    mCarryManager->initializeCodeGen(mKernelBuilder, mSelf);
     64     
     65    Value * inputSet_ptr = mKernelBuilder->getParameter(doBlockFunction, "inputs");
     66   
     67    Value * outputSet_ptr = nullptr;
     68    if (mPabloFunction->getNumOfResults() > 0) {
     69        outputSet_ptr = mKernelBuilder->getParameter(doBlockFunction, "outputs");
     70    }
     71    for (unsigned j = 0; j < mPabloFunction->getNumOfParameters(); ++j) {
     72        Value * inputVal = iBuilder->CreateGEP(inputSet_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     73        //Value * inputVal = iBuilder->CreateBlockAlignedLoad(inputSet_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     74        const Var * const var = mPabloFunction->getParameter(j);
     75        if (DebugOptionIsSet(DumpTrace)) {
     76            iBuilder->CallPrintRegister(var->getName()->to_string(), iBuilder->CreateBlockAlignedLoad(inputVal));
     77        }
     78        mMarkerMap.insert(std::make_pair(var, inputVal));
     79    }
     80   
     81    compileBlock(mPabloFunction->getEntryBlock());
     82   
     83    for (unsigned j = 0; j < mPabloFunction->getNumOfResults(); ++j) {
     84        const auto f = mMarkerMap.find(mPabloFunction->getResult(j));
     85        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
     86            throw std::runtime_error("PabloCompiler: result " + std::to_string(j) + " was not assigned a value!");
     87        }
     88        iBuilder->CreateBlockAlignedStore(f->second, outputSet_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     89    }
     90    iBuilder->CreateRetVoid();
     91
    6092   
    6193    #ifdef PRINT_TIMING_INFORMATION
     
    6496    #endif
    6597
    66     return mFunction;
    67 }
    68 
    69 inline void PabloCompiler::GenerateKernel(PabloFunction * const function) {
    70  
    71     mPabloFunction = function;
    72 
    73     for (unsigned i = 0; i < function->getNumOfParameters(); ++i) {
    74         mKernelBuilder->addInputStream(1, function->getParameter(i)->getName()->to_string());
    75     }
    76     for (unsigned i = 0; i < function->getNumOfResults(); ++i) {
    77         mKernelBuilder->addOutputStream(1);
    78     }
    79 
    80     mCarryManager->initialize(function, mKernelBuilder);
    81    
    82     mKernelBuilder->addInternalState(mBitBlockType, "EOFmark");
    83    
    84     mFunction = mKernelBuilder->prepareFunction({mInputStreamOffset.begin(), mInputStreamOffset.end()});
    85 
    86     mCarryManager->reset();
    87 
    88     for (unsigned j = 0; j < function->getNumOfParameters(); ++j) {
    89         Value * inputVal = mKernelBuilder->getInputStream(j);
    90         const Var * const var = function->getParameter(j);
    91         if (DebugOptionIsSet(DumpTrace)) {
    92             iBuilder->CallPrintRegister(var->getName()->to_string(), iBuilder->CreateBlockAlignedLoad(inputVal));
    93         }
    94         mMarkerMap.insert(std::make_pair(var, inputVal));
    95     }
    96 
    97     compileBlock(function->getEntryBlock());
    98 
    99     for (unsigned j = 0; j < function->getNumOfResults(); ++j) {
    100         const auto f = mMarkerMap.find(function->getResult(j));
    101         if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
    102             throw std::runtime_error("PabloCompiler: result " + std::to_string(j) + " was not assigned a value!");
    103         }
    104         iBuilder->CreateBlockAlignedStore(f->second, mKernelBuilder->getOutputStream(j));
    105     }
    106 
    107     mKernelBuilder->finalize();
    10898}
    10999
     
    364354        expr = iBuilder->simd_and(sum, iBuilder->simd_not(cc_expr));
    365355    } else if (const InFile * e = dyn_cast<InFile>(stmt)) {
    366         Value * EOFmark = iBuilder->CreateLoad(mKernelBuilder->getInternalState("EOFmark"));
     356        Value * EOFmark = mKernelBuilder->getScalarField(mSelf, "EOFmark");
    367357        Value * infileMask = iBuilder->simd_add(iBuilder->getBitBlockWidth(), EOFmark, iBuilder->allOnes());
    368358        expr = iBuilder->simd_and(compileExpression(e->getExpr()), infileMask);
    369359    } else if (const AtEOF * e = dyn_cast<AtEOF>(stmt)) {
    370         Value * EOFmark = iBuilder->CreateLoad(mKernelBuilder->getInternalState("EOFmark"));
     360        Value * EOFmark = mKernelBuilder->getScalarField(mSelf, "EOFmark");
    371361                expr = iBuilder->simd_and(compileExpression(e->getExpr()), EOFmark);
    372362    } else if (const Count * c = dyn_cast<Count>(stmt)) {
    373363        Value * const to_count = compileExpression(c->getExpr());
    374         expr = mCarryManager->popCount(to_count, c->getGlobalCountIndex());
     364        std::string counter = c->getName()->to_string();
     365        Value * countSoFar = mKernelBuilder->getScalarField(mSelf, counter);
     366        Value * fieldCounts = iBuilder->simd_popcount(64, to_count);
     367        for (unsigned i = 0; i < iBuilder->getBitBlockWidth()/64; ++i) {
     368            countSoFar = iBuilder->CreateAdd(countSoFar, iBuilder->mvmd_extract(64, fieldCounts, i));
     369        }
     370        mKernelBuilder->setScalarField(mSelf, counter, countSoFar);
     371        expr = iBuilder->bitCast(iBuilder->CreateZExt(countSoFar, iBuilder->getIntNTy(iBuilder->getBitBlockWidth())));
    375372    } else if (const Lookahead * l = dyn_cast<Lookahead>(stmt)) {
    376373        PabloAST * const var = l->getExpr();
     
    390387        const unsigned offset1 = ((l->getAmount() + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth());
    391388        const unsigned shift = (l->getAmount() % iBuilder->getBitBlockWidth());
    392         Value * const v0 = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset0));
    393         Value * const v1 = iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset1));
     389        Value * const v0 = nullptr;//iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset0));
     390        Value * const v1 = nullptr;//iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset1));
    394391        if (LLVM_UNLIKELY((shift % 8) == 0)) { // Use a single whole-byte shift, if possible.
    395392            expr = iBuilder->mvmd_dslli(8, v1, v0, (shift / 8));
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r5000 r5063  
    1313#include <unordered_map>
    1414#include <pablo/carry_manager.h>
     15#include <pablo/pablo_kernel.h>
    1516#include <llvm/ADT/Twine.h>
    1617#include <llvm/IR/IRBuilder.h>
     
    4849    using LookaheadOffsetMap = std::unordered_map<const PabloAST *, IntSet>;
    4950public:
    50     PabloCompiler(Module * m, IDISA::IDISA_Builder * b);
    51 
    52     llvm::Function * compile(PabloFunction * function);
    53     void setKernel(kernel::KernelBuilder * kBuilder);
     51    PabloCompiler(IDISA::IDISA_Builder * b, PabloKernel * k, PabloFunction * function);
     52    Type * initializeCarryData();
     53    void compile(Function * doBlockFunction);
    5454
    5555private:
     
    7171    Type* const                         mBitBlockType;
    7272
    73     CarryManager *                      mCarryManager;
     73    std::unique_ptr<CarryManager>       mCarryManager;
    7474
    75     const PabloFunction *               mPabloFunction;
     75    PabloFunction *  const             mPabloFunction;
    7676    const PabloBlock *                  mPabloBlock;
    7777
    78     kernel::KernelBuilder *             mKernelBuilder;
     78    PabloKernel *                       mKernelBuilder;
     79    Value *                             mSelf;
    7980
    8081    unsigned                            mWhileDepth;
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5062 r5063  
    1818    KernelBuilder(builder, kernelName,
    1919                    {StreamSetBinding{StreamSetType(function->getNumOfParameters(), 1), "inputs"}},
    20                     {StreamSetBinding{StreamSetType(function->getNumOfResults(), 1), "outputs"}},
     20                    {},
    2121                    {},
    2222                    {},
    2323                    {ScalarBinding{builder->getBitBlockType(), "EOFmark"}}),
    2424    mPabloFunction(function) {
     25    unsigned output_streams = function->getNumOfResults();
     26    if (output_streams > 0) {
     27        mStreamSetOutputs = {StreamSetBinding{StreamSetType(output_streams, 1), "outputs"}};
     28    }
    2529    mScalarOutputs = accumBindings(accumulators);
    2630    pablo_compiler = new PabloCompiler(builder, this, function);
     
    3236    for (auto a : accum_names) {
    3337        vec.push_back(ScalarBinding{accum_t, a});
     38        addScalar(accum_t, a);
    3439    }
    3540    return vec;
     
    3742
    3843void PabloKernel::prepareKernel() {
    39     errs() << "PabloKernel::prepareKernel\n";
    4044    Type * carryDataType = pablo_compiler->initializeCarryData();
    4145    addScalar(carryDataType, "carries");
     
    4448
    4549void PabloKernel::generateKernel() {
     50    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    4651    KernelBuilder::generateKernel();
    4752    Module * m = iBuilder->getModule();
    4853    addFinalBlockMethod(m);
    4954    pablo_compiler->compile(m->getFunction(mKernelName + doBlock_suffix));
     55    iBuilder->restoreIP(savePoint);
    5056}
    5157
    5258void PabloKernel::addFinalBlockMethod(Module * m) {
     59    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     60    Module * saveModule = iBuilder->getModule();
     61    iBuilder->setModule(m);
    5362    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    5463    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     
    6978    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    7079    iBuilder->CreateRetVoid();
     80    iBuilder->setModule(saveModule);
     81    iBuilder->restoreIP(savePoint);
    7182}
    7283
  • icGREP/icgrep-devel/icgrep/pablo/pe_count.h

    r5061 r5063  
    3030    : Statement(ClassTypeId::Count, {expr}, counter)
    3131    {
    32 
    3332    }
    3433private:
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5046 r5063  
    407407        match_follow = mPB.createNot(match_follow);
    408408    }
    409     mFunction.setResult(0, mPB.createAssign("matches", mPB.createAnd(match_follow, mLineBreak)));
     409    Assign * matches = mPB.createAssign("matches", mPB.createAnd(match_follow, mLineBreak));
     410    mFunction.setResultCount(mPB.createCount("matchedLineCount", matches));
     411    mFunction.setResult(0, matches);
    410412}
    411413
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5042 r5063  
    2626#include <cc/cc_compiler.h>
    2727#include <pablo/function.h>
     28#include <pablo/pablo_kernel.h>
    2829#include <IDISA/idisa_builder.h>
    2930#include <IDISA/idisa_target.h>
    30 #include <kernels/instance.h>
     31#include <kernels/interface.h>
    3132#include <kernels/kernel.h>
    3233#include <kernels/s2p_kernel.h>
     
    101102pablo::PabloFunction * wc_gen(Encoding encoding) {
    102103    //  input: 8 basis bit streams
    103     //  output: 3 count streams
    104    
    105     pablo::PabloFunction * function = pablo::PabloFunction::Create("wc", 8, 3);
     104    //  output: 3 counters
     105   
     106    pablo::PabloFunction * function = pablo::PabloFunction::Create("wc", 8, 0);
    106107    cc::CC_Compiler ccc(*function, encoding);
    107108   
     
    111112    if (CountLines) {
    112113        pablo::PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
    113         function->setResult(0, pBuilder.createAssign("lineCount", pBuilder.createCount(LF)));
    114     }
    115     else function->setResult(0, pBuilder.createAssign("lineCount", pBuilder.createZeroes()));
     114        function->setResultCount(pBuilder.createCount("lineCount", LF));
     115    }
    116116    if (CountWords) {
    117117        pablo::PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
     
    122122        //
    123123        pablo::PabloAST * wordStart = pBuilder.createInFile(pBuilder.createAnd(wordChar, WS_follow_or_start));
    124         function->setResult(1, pBuilder.createAssign("wordCount", pBuilder.createCount(wordStart)));
    125     }
    126     else function->setResult(1, pBuilder.createAssign("wordCount", pBuilder.createZeroes()));
     124        function->setResultCount(pBuilder.createCount("wordCount", wordStart));
     125    }
    127126    if (CountChars) {
    128127        //
     
    131130        //
    132131        pablo::PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
    133         function->setResult(2, pBuilder.createAssign("charCount", pBuilder.createCount(u8Begin)));
    134     }
    135     else function->setResult(2, pBuilder.createAssign("charCount", pBuilder.createZeroes()));
     132        function->setResultCount(pBuilder.createCount("charCount", u8Begin));
     133    }
    136134    return function;
    137135}
     
    146144    ~wcPipelineBuilder();
    147145   
    148     void CreateKernels(pablo::PabloFunction * function);
    149     llvm::Function * ExecuteKernels();
     146    llvm::Function * ExecuteKernels(pablo::PabloFunction * function);
    150147   
    151148private:
    152149    llvm::Module *                      mMod;
    153150    IDISA::IDISA_Builder *              iBuilder;
    154     KernelBuilder *                     mS2PKernel;
    155     KernelBuilder *                     mWC_Kernel;
    156151    llvm::Type *                        mBitBlockType;
    157152    int                                 mBlockSize;
     
    171166
    172167wcPipelineBuilder::~wcPipelineBuilder(){
    173     delete mS2PKernel;
    174     delete mWC_Kernel;
    175 }
    176 
    177 void wcPipelineBuilder::CreateKernels(PabloFunction * function){
    178     mS2PKernel = new KernelBuilder(iBuilder, "s2p", codegen::SegmentSize);
    179     mWC_Kernel = new KernelBuilder(iBuilder, "wc", codegen::SegmentSize);
    180    
    181     generateS2PKernel(mMod, iBuilder, mS2PKernel);
     168}
     169
     170
     171Function * wcPipelineBuilder::ExecuteKernels(PabloFunction * function) {
     172    s2pKernel  s2pk(iBuilder);
     173    s2pk.generateKernel();
    182174   
    183175    pablo_function_passes(function);
    184    
    185     PabloCompiler pablo_compiler(mMod, iBuilder);
    186     try {
    187         pablo_compiler.setKernel(mWC_Kernel);
    188         pablo_compiler.compile(function);
    189         delete function;
    190         releaseSlabAllocatorMemory();
    191     } catch (std::runtime_error e) {
    192         delete function;
    193         releaseSlabAllocatorMemory();
    194         std::cerr << "Runtime error: " << e.what() << std::endl;
    195         exit(1);
    196     }
    197    
    198 }
    199 
    200 
    201 
    202 
    203 Function * wcPipelineBuilder::ExecuteKernels() {
     176    PabloKernel  wck(iBuilder, "wc", function, {"lineCount", "wordCount", "charCount"});
     177    wck.prepareKernel();
     178    wck.generateKernel();
     179
    204180    Constant * record_counts_routine;
    205181    Type * const int64ty = iBuilder->getInt64Ty();
     
    223199    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    224200
    225     BasicBlock * segmentCondBlock = nullptr;
    226     BasicBlock * segmentBodyBlock = nullptr;
    227     const unsigned segmentSize = codegen::SegmentSize;
    228     if (segmentSize > 1) {
    229         segmentCondBlock = BasicBlock::Create(mMod->getContext(), "segmentCond", main, 0);
    230         segmentBodyBlock = BasicBlock::Create(mMod->getContext(), "segmentBody", main, 0);
    231     }
    232201    BasicBlock * fullCondBlock = BasicBlock::Create(mMod->getContext(), "fullCond", main, 0);
    233202    BasicBlock * fullBodyBlock = BasicBlock::Create(mMod->getContext(), "fullBody", main, 0);
    234203    BasicBlock * finalBlock = BasicBlock::Create(mMod->getContext(), "final", main, 0);
    235     BasicBlock * finalPartialBlock = BasicBlock::Create(mMod->getContext(), "partial", main, 0);
    236     BasicBlock * finalEmptyBlock = BasicBlock::Create(mMod->getContext(), "empty", main, 0);
    237     BasicBlock * endBlock = BasicBlock::Create(mMod->getContext(), "end", main, 0);
    238 
    239     Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
    240     Instance * wcInstance = mWC_Kernel->instantiate(s2pInstance->getOutputStreamBuffer());
    241 
    242     Value * initialBufferSize = nullptr;
    243     BasicBlock * initialBlock = nullptr;
    244    
    245     if (segmentSize > 1) {
    246         iBuilder->CreateBr(segmentCondBlock);
    247         iBuilder->SetInsertPoint(segmentCondBlock);
    248         PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    249         remainingBytes->addIncoming(bufferSize, entryBlock);
    250         Constant * const step = ConstantInt::get(int64ty, mBlockSize * segmentSize);
    251         Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    252         iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock);
    253         iBuilder->SetInsertPoint(segmentBodyBlock);
    254         for (unsigned i = 0; i < segmentSize; ++i) {
    255             s2pInstance->CreateDoBlockCall();
    256         }
    257         for (unsigned i = 0; i < segmentSize; ++i) {
    258             wcInstance->CreateDoBlockCall();
    259         }
    260         remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
    261         iBuilder->CreateBr(segmentCondBlock);
    262         initialBufferSize = remainingBytes;
    263         initialBlock = segmentCondBlock;
    264     } else {
    265         initialBufferSize = bufferSize;
    266         initialBlock = entryBlock;
    267         iBuilder->CreateBr(fullCondBlock);
    268     }
    269 
     204
     205    StreamSetBuffer ByteStream(iBuilder, StreamSetType(1, 8), 0);
     206    StreamSetBuffer BasisBits(iBuilder, StreamSetType(8, 1), 1);
     207    ByteStream.setStreamSetBuffer(inputStream);
     208    Value * basisBits = BasisBits.allocateBuffer();
     209
     210    Value * s2pInstance = s2pk.createInstance({});
     211    Value * wcInstance = wck.createInstance({});
     212   
     213    Value * initialBufferSize = bufferSize;
     214    BasicBlock * initialBlock = entryBlock;
     215    Value * initialBlockNo = iBuilder->getInt64(0);
     216
     217    iBuilder->CreateBr(fullCondBlock);
     218
     219   
    270220    iBuilder->SetInsertPoint(fullCondBlock);
    271221    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
    272222    remainingBytes->addIncoming(initialBufferSize, initialBlock);
     223    PHINode * blockNo = iBuilder->CreatePHI(int64ty, 2, "blockNo");
     224    blockNo->addIncoming(initialBlockNo, initialBlock);
    273225
    274226    Constant * const step = ConstantInt::get(int64ty, mBlockSize);
     
    278230    iBuilder->SetInsertPoint(fullBodyBlock);
    279231
    280     s2pInstance->CreateDoBlockCall();
    281     wcInstance->CreateDoBlockCall();
     232    s2pk.createDoBlockCall(s2pInstance, {ByteStream.getBlockPointer(blockNo), basisBits});
     233    wck.createDoBlockCall(wcInstance, {basisBits});
    282234
    283235    Value * diff = iBuilder->CreateSub(remainingBytes, step);
    284236
    285237    remainingBytes->addIncoming(diff, fullBodyBlock);
     238    blockNo->addIncoming(iBuilder->CreateAdd(blockNo, iBuilder->getInt64(1)), fullBodyBlock);
    286239    iBuilder->CreateBr(fullCondBlock);
    287240   
    288241    iBuilder->SetInsertPoint(finalBlock);
    289     Value * EOFmark = iBuilder->CreateShl(ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1), remainingBytes);
    290         wcInstance->setInternalState("EOFmark", iBuilder->CreateBitCast(EOFmark, mBitBlockType));
    291    
    292     Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(int64ty, 0));
    293     iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
    294    
    295    
    296     iBuilder->SetInsertPoint(finalPartialBlock);
    297     s2pInstance->CreateDoBlockCall();
    298 
    299     iBuilder->CreateBr(endBlock);
    300    
    301     iBuilder->SetInsertPoint(finalEmptyBlock);
    302     s2pInstance->clearOutputStreamSet();
    303     iBuilder->CreateBr(endBlock);
    304    
    305     iBuilder->SetInsertPoint(endBlock);
    306 
    307     wcInstance->CreateDoBlockCall();
    308    
    309     Value * lineCount = iBuilder->CreateExtractElement(iBuilder->CreateBlockAlignedLoad(wcInstance->getOutputStream((int) 0)), iBuilder->getInt32(0));
    310     Value * wordCount = iBuilder->CreateExtractElement(iBuilder->CreateBlockAlignedLoad(wcInstance->getOutputStream(1)), iBuilder->getInt32(0));
    311     Value * charCount = iBuilder->CreateExtractElement(iBuilder->CreateBlockAlignedLoad(wcInstance->getOutputStream(2)), iBuilder->getInt32(0));
    312    
     242    s2pk.createFinalBlockCall(s2pInstance, remainingBytes, {ByteStream.getBlockPointer(blockNo), basisBits});
     243    wck.createFinalBlockCall(wcInstance, remainingBytes, {basisBits});
     244   
     245    Value * lineCount = wck.createGetAccumulatorCall(wcInstance, "lineCount");
     246    Value * wordCount = wck.createGetAccumulatorCall(wcInstance, "wordCount");
     247    Value * charCount = wck.createGetAccumulatorCall(wcInstance, "charCount");;
     248
    313249    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, bufferSize, fileIdx}));
    314250   
    315251    iBuilder->CreateRetVoid();
    316    
    317252    return main;
    318253}
     
    331266    Encoding encoding(Encoding::Type::UTF_8, 8);
    332267    pablo::PabloFunction * function = wc_gen(encoding);
    333     pipelineBuilder.CreateKernels(function);
    334     llvm::Function * main_IR = pipelineBuilder.ExecuteKernels();
     268    llvm::Function * main_IR = pipelineBuilder.ExecuteKernels(function);
    335269
    336270    wcEngine = JIT_to_ExecutionEngine(M);
Note: See TracChangeset for help on using the changeset viewer.