Changeset 4959


Ignore:
Timestamp:
Mar 7, 2016, 3:37:30 PM (4 years ago)
Author:
nmedfort
Message:

Initial modifications to Pablo Compiler and Kernel Builder to support circular buffers for Lookahead.

Location:
icGREP/icgrep-devel/icgrep
Files:
4 added
30 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.h

    r4957 r4959  
    4343    Constant * simd_lomask(unsigned fw);
    4444       
     45    LoadInst * CreateBlockAlignedLoad(Value * const ptr);
     46    LoadInst * CreateBlockAlignedLoad(Value * const ptr, Value * const index);
     47    LoadInst * CreateBlockAlignedLoad(Value * const ptr, std::initializer_list<Value *> indicies);
     48    void CreateBlockAlignedStore(Value * const value, Value * const ptr);
     49    void CreateBlockAlignedStore(Value * const value, Value * const ptr, Value * const index);
     50    void CreateBlockAlignedStore(Value * const value, Value * const ptr, std::initializer_list<Value *> indicies);
     51
    4552    virtual Value * simd_add(unsigned fw, Value * a, Value * b);
    4653    virtual Value * simd_sub(unsigned fw, Value * a, Value * b);
     
    98105};
    99106
     107inline LoadInst * IDISA_Builder::CreateBlockAlignedLoad(Value * const ptr) {
     108    return CreateAlignedLoad(ptr, mBitBlockWidth / 8);
     109}
     110
     111inline LoadInst * IDISA_Builder::CreateBlockAlignedLoad(Value * const ptr, Value * const index) {
     112    return CreateBlockAlignedLoad(CreateGEP(ptr, index));
     113}
     114
     115inline LoadInst * IDISA_Builder::CreateBlockAlignedLoad(Value * const ptr, std::initializer_list<Value *> indicies) {
     116    return CreateBlockAlignedLoad(CreateGEP(ptr, indicies));
     117}
     118
     119inline void IDISA_Builder::CreateBlockAlignedStore(Value * const value, Value * const ptr) {
     120    CreateAlignedStore(value, ptr, mBitBlockWidth / 8);
     121}
     122
     123inline void IDISA_Builder::CreateBlockAlignedStore(Value * const value, Value * const ptr, Value * const index) {
     124    CreateBlockAlignedStore(value, CreateGEP(ptr, index));
     125}
     126
     127inline void IDISA_Builder::CreateBlockAlignedStore(Value * const value, Value * const ptr, std::initializer_list<Value *> indicies) {
     128    CreateBlockAlignedStore(value, CreateGEP(ptr, indicies));
     129}
     130
    100131}
    101132#endif // IDISA_BUILDER_H
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r4949 r4959  
    2020public:
    2121
    22     GrepEngine() {};
     22    GrepEngine() {}
    2323 
    2424    void grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression = false);
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4937 r4959  
    498498../buddy-2.4/src/bddtree.h
    499499../buddy-2.4/src/tree.cpp
     500pablo/pe_lookahead.h
     501pablo/symbol-table/length_group_generator.h
     502pablo/symbol-table/length_group_generator.cpp
     503kernels/pipeline.h
     504kernels/kernel.h
     505kernels/kernel.cpp
     506kernels/scanmatchgen.h
     507kernels/pipeline.cpp
     508kernels/scanmatchgen.cpp
     509kernels/s2p_gen.cpp
     510kernels/s2p_gen.h
     511cc/cc_compiler.cpp
     512cc/cc_compiler.h
     513CMakeFiles/3.2.2/CompilerIdC/CMakeCCompilerId.c
     514CMakeFiles/3.2.2/CompilerIdCXX/CMakeCXXCompilerId.cpp
     515CMakeFiles/feature_tests.c
     516CMakeFiles/feature_tests.cxx
     517IDISA/idisa_avx_builder.cpp
     518IDISA/idisa_avx_builder.h
     519IDISA/idisa_builder.cpp
     520IDISA/idisa_builder.h
     521IDISA/idisa_sse_builder.cpp
     522IDISA/idisa_sse_builder.h
     523include/simd-lib/idisa_cpp/idisa_avx2.cpp
     524include/simd-lib/idisa_cpp/idisa_sse2.cpp
     525include/simd-lib/bitblock.hpp
     526include/simd-lib/bitblock128.hpp
     527include/simd-lib/bitblock256.hpp
     528include/simd-lib/bitblock_iterator.hpp
     529include/simd-lib/buffer.hpp
     530include/simd-lib/builtins.hpp
     531include/simd-lib/carryQ.hpp
     532include/simd-lib/config.hpp
     533include/simd-lib/idisa.hpp
     534include/simd-lib/idisa128.hpp
     535include/simd-lib/idisa256.hpp
     536include/simd-lib/pabloSupport.hpp
     537include/simd-lib/s2p.hpp
     538include/simd-lib/transpose.hpp
     539kernels/kernel.cpp
     540kernels/kernel.h
     541kernels/pipeline.cpp
     542kernels/pipeline.h
     543kernels/s2p_kernel.cpp
     544kernels/s2p_kernel.h
     545kernels/scanmatchgen.cpp
     546kernels/scanmatchgen.h
     547pablo/analysis/pabloverifier.cpp
     548pablo/analysis/pabloverifier.hpp
     549pablo/optimizers/booleanreassociationpass.cpp
     550pablo/optimizers/booleanreassociationpass.h
     551pablo/optimizers/codemotionpass.cpp
     552pablo/optimizers/codemotionpass.h
     553pablo/optimizers/distributivepass.cpp
     554pablo/optimizers/distributivepass.h
     555pablo/optimizers/graph-facade.hpp
     556pablo/optimizers/pablo_automultiplexing.cpp
     557pablo/optimizers/pablo_automultiplexing.hpp
     558pablo/optimizers/pablo_bddminimization.cpp
     559pablo/optimizers/pablo_bddminimization.h
     560pablo/optimizers/pablo_simplifier.cpp
     561pablo/optimizers/pablo_simplifier.hpp
     562pablo/optimizers/schedulingprepass.cpp
     563pablo/optimizers/schedulingprepass.h
     564pablo/passes/factorizedfg.cpp
     565pablo/passes/factorizedfg.h
     566pablo/passes/flattenassociativedfg.cpp
     567pablo/passes/flattenassociativedfg.h
     568pablo/symbol-table/length_group_generator.cpp
     569pablo/symbol-table/length_group_generator.h
     570pablo/builder.cpp
     571pablo/builder.hpp
     572pablo/carry_data.cpp
     573pablo/carry_data.h
     574pablo/carry_manager.cpp
     575pablo/carry_manager.h
     576pablo/codegenstate.cpp
     577pablo/codegenstate.h
     578pablo/expression_map.hpp
     579pablo/function.cpp
     580pablo/function.h
     581pablo/pablo_compiler.cpp
     582pablo/pablo_compiler.h
     583pablo/pabloAST.cpp
     584pablo/pabloAST.h
     585pablo/pe_advance.h
     586pablo/pe_and.h
     587pablo/pe_call.h
     588pablo/pe_count.h
     589pablo/pe_integer.h
     590pablo/pe_lookahead.h
     591pablo/pe_matchstar.h
     592pablo/pe_next.h
     593pablo/pe_not.h
     594pablo/pe_ones.h
     595pablo/pe_or.h
     596pablo/pe_scanthru.h
     597pablo/pe_sel.h
     598pablo/pe_setithbit.h
     599pablo/pe_string.h
     600pablo/pe_var.h
     601pablo/pe_xor.h
     602pablo/pe_zeroes.h
     603pablo/printer_pablos.cpp
     604pablo/printer_pablos.h
     605pablo/ps_assign.h
     606pablo/ps_if.cpp
     607pablo/ps_if.h
     608pablo/ps_while.cpp
     609pablo/ps_while.h
     610pablo/symbol_generator.cpp
     611pablo/symbol_generator.h
     612re/parsefailure.cpp
     613re/parsefailure.h
     614re/printer_re.cpp
     615re/printer_re.h
     616re/re_alt.h
     617re/re_analysis.cpp
     618re/re_analysis.h
     619re/re_any.h
     620re/re_assertion.h
     621re/re_cc.cpp
     622re/re_cc.h
     623re/re_compiler.cpp
     624re/re_compiler.h
     625re/re_diff.cpp
     626re/re_diff.h
     627re/re_end.h
     628re/re_grapheme_boundary.hpp
     629re/re_intersect.cpp
     630re/re_intersect.h
     631re/re_memoizer.hpp
     632re/re_name.h
     633re/re_nullable.cpp
     634re/re_nullable.h
     635re/re_parser.cpp
     636re/re_parser.h
     637re/re_re.cpp
     638re/re_re.h
     639re/re_rep.cpp
     640re/re_rep.h
     641re/re_seq.h
     642re/re_simplifier.cpp
     643re/re_simplifier.h
     644re/re_start.h
     645UCD/Blocks.h
     646UCD/CaseFolding_txt.cpp
     647UCD/CaseFolding_txt.h
     648UCD/DerivedAge.h
     649UCD/DerivedBidiClass.h
     650UCD/DerivedBinaryProperties.h
     651UCD/DerivedCombiningClass.h
     652UCD/DerivedCoreProperties.h
     653UCD/DerivedDecompositionType.h
     654UCD/DerivedGeneralCategory.h
     655UCD/DerivedJoiningGroup.h
     656UCD/DerivedJoiningType.h
     657UCD/DerivedNumericType.h
     658UCD/EastAsianWidth.h
     659UCD/GraphemeBreakProperty.h
     660UCD/HangulSyllableType.h
     661UCD/LineBreak.h
     662UCD/precompiled_properties.cpp
     663UCD/precompiled_properties.h
     664UCD/PropertyAliases.h
     665UCD/PropertyObjects.cpp
     666UCD/PropertyObjects.h
     667UCD/PropertyObjectTable.h
     668UCD/PropertyValueAliases.h
     669UCD/PropList.h
     670UCD/resolve_properties.cpp
     671UCD/resolve_properties.h
     672UCD/ScriptExtensions.h
     673UCD/Scripts.h
     674UCD/SentenceBreakProperty.h
     675UCD/ucd_compiler.cpp
     676UCD/ucd_compiler.hpp
     677UCD/unicode_set.cpp
     678UCD/unicode_set.h
     679UCD/WordBreakProperty.h
     680do_grep.cpp
     681do_grep.h
     682generate_predefined_ucd_functions.cpp
     683hrtime.h
     684icgrep.cpp
     685ispc.cpp
     686papi_helper.hpp
     687slab_allocator.h
     688toolchain.cpp
     689toolchain.h
     690utf8_encoder.cpp
     691utf8_encoder.h
     692utf_encoding.h
     693kernels/lookaheadextractor.h
     694kernels/lookaheadextractor.cpp
     695kernels/symboltablepipeline.h
     696kernels/symboltablepipeline.cpp
     697symboltable.cpp
     698grep_engine.h
     699grep_engine.cpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r4922 r4959  
    1515kernels
    1616IDISA
     17pablo/symbol-table
     18include/simd-lib/idisa_cpp
     19CMakeFiles/3.2.2/CompilerIdC
     20include/simd-lib
     21CMakeFiles
     22CMakeFiles/3.2.2/CompilerIdCXX
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r4945 r4959  
    55
    66#include "kernel.h"
    7 #include <iostream>
     7#include <pablo/function.h>
     8#include <IDISA/idisa_builder.h>
     9
     10using namespace llvm;
     11using namespace pablo;
     12
     13inline bool isPowerOfTwo(const unsigned x) {
     14    return (x != 0) && (x & (x - 1)) == 0;
     15}
    816
    917// sets name & sets internal state to the kernel superclass state
     
    1220, iBuilder(b)
    1321, mKernelName(name)
    14 , mPredifinedStates(2)
    1522, mBitBlockType(b->getBitBlockType())
    16 , mBlockSize(b->getBitBlockWidth()){
    17     mStates = std::vector<Type *>(4, b->getIntNTy(64));
    18     mSegmentBlocks = 1;
    19     mBufferSize = mSegmentBlocks * mBlockSize;
    20 }
    21 
    22 KernelBuilder::~KernelBuilder(){
    23 }
    24 
    25 int KernelBuilder::extendKernelInternalStateType(Type * t){
    26     int idx = mStates.size();
    27     mStates.push_back(t);
     23, mBlockSize(b->getBitBlockWidth())
     24, mBlocksPerSegment(1)
     25, mCircularBufferModulo(1)
     26, mSegmentIndex(0)
     27, mStartIndex(0) {
     28    addInternalStateType(b->getInt64Ty());
     29    addInternalStateType(b->getInt64Ty());
     30    addInternalStateType(b->getInt64Ty());
     31    addInternalStateType(b->getInt64Ty());
     32}
     33
     34unsigned KernelBuilder::addInternalStateType(Type * type){
     35    unsigned idx = mStates.size();
     36    mStates.push_back(type);
    2837    return idx;
    2938}
    30 void KernelBuilder::addKernelOutputStream(int fw){
    31     if (fw == 1){
     39void KernelBuilder::addOutputStream(const unsigned fields){
     40    if (fields == 1){
    3241        mOutputStreams.push_back(mBitBlockType);
    3342    }
    3443    else {
    35         mOutputStreams.push_back(ArrayType::get(mBitBlockType, fw));
    36     }
    37 
    38 }
    39 void KernelBuilder::addKernelOutputAccum(Type * t){
     44        mOutputStreams.push_back(ArrayType::get(mBitBlockType, fields));
     45    }
     46
     47}
     48void KernelBuilder::addOutputAccum(Type * t){
    4049    mOutputAccums.push_back(t);
    4150
    4251}
    43 void KernelBuilder::addKernelInputStream(int fw, std::string name = ""){
    44     if (name=="")
     52void KernelBuilder::addInputStream(const unsigned fields, std::string name){
     53    if (name.empty())
    4554        mInputStreamNames.push_back(mKernelName + "_inputstream_" + std::to_string(mInputStreams.size()));
    4655    else
    4756        mInputStreamNames.push_back(name);
    4857
    49     if (fw == 1){
     58    if (fields == 1){
    5059        mInputStreams.push_back(mBitBlockType);
    51     }
    52     else {
    53         mInputStreams.push_back(ArrayType::get(mBitBlockType, fw));
    54     }
    55 }
    56 void KernelBuilder::addKernelInputScalar(Type * t, std::string name = ""){
    57     if (name=="")
     60    } else {
     61        mInputStreams.push_back(ArrayType::get(mBitBlockType, fields));
     62    }
     63}
     64void KernelBuilder::addInputScalar(Type * t, std::string name){
     65    if (name.empty())
    5866        mInputScalarNames.push_back(mKernelName + "_inputscalar_" + std::to_string(mInputScalars.size()));
    5967    else
     
    6371}
    6472
    65 void KernelBuilder::PrepareDoBlockFunction(){
    66     mInputStreamType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), mInputStreams), mSegmentBlocks), 0);
     73/** ------------------------------------------------------------------------------------------------------------- *
     74 * @brief prepareFunction
     75 ** ------------------------------------------------------------------------------------------------------------- */
     76Function * KernelBuilder::prepareFunction() {   
     77    if (mCircularBufferModulo > 1) {
     78        mStartIndex = addInternalStateType(iBuilder->getInt32Ty());
     79    }
     80    const unsigned capacity = mBlocksPerSegment + mCircularBufferModulo - 1;
     81    mInputStreamType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), mInputStreams), capacity), 0);
    6782    mInputScalarType = PointerType::get(StructType::get(mMod->getContext(), mInputScalars), 0);
    68     Type * outputStreamType = ArrayType::get(StructType::get(mMod->getContext(), mOutputStreams), mSegmentBlocks);
     83    Type * outputStreamType = ArrayType::get(StructType::get(mMod->getContext(), mOutputStreams), capacity);
    6984    Type * outputAccumType = StructType::get(mMod->getContext(), mOutputAccums);
    7085    Type * stateType = StructType::create(mMod->getContext(), mStates, mKernelName);
    71     mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({stateType, outputStreamType, outputAccumType}), "KernelStruct_"+mKernelName);
    72 }
    73 
    74 struct Inputs KernelBuilder::openDoBlock(){
    75     // FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
    76     //     std::vector<Type *>({PointerType::get(mKernelStructType, 0), mInputStreamType, mInputScalarType}), false);
    77        
    78     FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
    79         std::vector<Type *>({PointerType::get(mKernelStructType, 0), mInputStreamType}), false);
    80 
    81     mDoBlockFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
    82     mDoBlockFunction->setCallingConv(CallingConv::C);
    83    
    84     Function::arg_iterator args = mDoBlockFunction->arg_begin();
    85     mKernelStructParam = args++;
    86     mKernelStructParam->setName("this");
    87     Value* input_stream_param = args++;
    88     input_stream_param->setName("input_stream");
    89     // Value* input_scalar_param = args++;
    90     // input_scalar_param->setName("input_scalar");
    91 
    92     iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mDoBlockFunction,0));
    93 
    94     struct Inputs inputs;
    95     for(int j = 0; j<mSegmentBlocks; j++){
    96         for(int i = 0; i<mInputStreams.size(); i++){
    97             Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(j), iBuilder->getInt32(i)};
    98             Value * gep = iBuilder->CreateGEP(input_stream_param, indices);
    99             Type * t = gep->getType()->getPointerElementType();
    100             if (t != mBitBlockType) {
    101                 int arraySize = t->getArrayNumElements();
    102                 inputs.streams.resize(mSegmentBlocks, std::vector<valptr>(arraySize));
    103                 for (int k=0; k<arraySize; k++){
    104                     Value * gep_array_elem = iBuilder->CreateGEP(gep, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    105                     inputs.streams[j][k] = iBuilder->CreateAlignedLoad(gep_array_elem, mBlockSize/8, false, mInputStreamNames.at(i));
    106                 }
    107             }
    108             else{
    109                 inputs.streams.resize(mSegmentBlocks, std::vector<valptr>(mInputStreams.size()));
    110                 inputs.streams[j][i] = iBuilder->CreateAlignedLoad(gep, mBlockSize/8, false, mInputStreamNames.at(i));
    111             }
    112            
     86    mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({stateType, outputStreamType, outputAccumType}), "KernelStruct_"+ mKernelName);
     87
     88    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
     89        std::vector<Type *>({PointerType::get(mKernelStructType, 0), mInputStreamType}), false);
     90
     91    mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
     92    mFunction->setCallingConv(CallingConv::C);
     93
     94    Function::arg_iterator args = mFunction->arg_begin();
     95    mKernelParam = args++;
     96    mKernelParam->setName("this");
     97
     98    mInputParam = args++;
     99    mInputParam->setName("input_stream");
     100
     101    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction, 0));
     102
     103    mSegmentIndex = 0;
     104
     105    return mFunction;
     106}
     107
     108/** ------------------------------------------------------------------------------------------------------------- *
     109 * @brief finalize
     110 ** ------------------------------------------------------------------------------------------------------------- */
     111void KernelBuilder::finalize() {
     112    Type * const int64Ty = iBuilder->getInt64Ty();
     113
     114    // Finish the actual function
     115    if (mCircularBufferModulo > 1) {
     116        Value * startIdx = getInternalState(mStartIndex);
     117        Value * value = iBuilder->CreateAdd(iBuilder->CreateBlockAlignedLoad(startIdx), iBuilder->getInt32(1));
     118        iBuilder->CreateBlockAlignedStore(value, startIdx);
     119    }
     120    iBuilder->CreateRetVoid();
     121
     122
     123    // Generate the zero initializer
     124    Function * initializer = cast<Function>(mMod->getOrInsertFunction(mKernelName + "_Init", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), nullptr));
     125    initializer->setCallingConv(CallingConv::C);
     126    Function::arg_iterator args = initializer->arg_begin();
     127
     128    mKernelParam = args++;
     129    mKernelParam->setName("this");
     130
     131    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", initializer, 0));
     132
     133    for (unsigned i = 0; i < mStates.size(); ++i) {
     134        Value * const gep = getInternalState(i);
     135        Type * const type = gep->getType();
     136        if (type->isIntegerTy() || type->isArrayTy() || type->isVectorTy()) {
     137            setInternalState(i, Constant::getNullValue(type));
     138        } else {
     139            Value * gep_next = iBuilder->CreateGEP(gep, iBuilder->getInt32(1));
     140            Value * get_int = iBuilder->CreatePtrToInt(gep, int64Ty);
     141            Value * get_next_int = iBuilder->CreatePtrToInt(gep_next, int64Ty);
     142            Value * state_size = iBuilder->CreateSub(get_next_int, get_int);
     143            iBuilder->CreateMemSet(gep, iBuilder->getInt8(0), state_size, 4);
    113144        }
    114145    }
    115146
    116     // inputs.scalars.resize(mInputScalars.size());
    117     // for(int i = 0; i<mInputScalars.size(); i++){
    118     //     Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(i)};
    119     //     Value * gep = iBuilder->CreateGEP(input_scalar_param, indices);
    120     //     inputs.scalars[i] = iBuilder->CreateAlignedLoad(gep, mBlockSize/8, false, mInputScalarNames.at(i));
    121     // }
    122 
    123     return inputs;
    124 }
    125 
    126 void KernelBuilder::closeDoBlock(struct Outputs result){
    127    
    128 
    129     for(int j=0; j<mSegmentBlocks; j++){
    130         for(int i = 0; i<mOutputStreams.size(); i++){   
    131             Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(1), iBuilder->getInt32(j), iBuilder->getInt32(i)};
    132             Value* gep = iBuilder->CreateGEP(mKernelStructParam, indices);
    133             iBuilder->CreateAlignedStore(result.streams[j][i], gep, mBlockSize/8, false);
    134         }
    135     }
    136 
    137     for(int i = 0; i<mOutputAccums.size(); i++){   
    138         Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(2), iBuilder->getInt32(i)};
    139         Value* gep = iBuilder->CreateGEP(mKernelStructParam, indices);
    140         iBuilder->CreateAlignedStore(result.accums[i], gep, mBlockSize/8, false);
    141     }
    142 
    143147    iBuilder->CreateRetVoid();
    144 }
    145 
    146 void KernelBuilder::changeKernelInternalState(Value * kernelStruct, int idx, Value * stateValue){
    147     Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(idx)};
    148     Value* gep = iBuilder->CreateGEP(kernelStruct, indices);
    149     iBuilder->CreateAlignedStore(stateValue, gep, mBlockSize/8, false);
    150 }
    151 
    152 Value * KernelBuilder::getKernelInternalState(Value * kernelStruct, int idx){
    153     Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(idx)};
    154     Value* gep = iBuilder->CreateGEP(kernelStruct, indices);
    155     return iBuilder->CreateAlignedLoad(gep, mBlockSize/8, false, "state"+std::to_string(idx));
    156 }
    157 
    158 Value * KernelBuilder::getKernelInternalStatePtr(Value * kernelStruct, int idx){
    159     Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(idx)};
    160     Value* gep = iBuilder->CreateGEP(kernelStruct, indices);
    161     return gep;
    162 }
    163 
    164 void KernelBuilder::finalizeMethods(){
    165     Type * T = iBuilder->getIntNTy(64);
    166 
    167     Constant* c = mMod->getOrInsertFunction(mKernelName+"_Init", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), NULL);
    168     Function* mInitFunction = cast<Function>(c);
    169     mInitFunction->setCallingConv(CallingConv::C);
    170     Function::arg_iterator args = mInitFunction->arg_begin();
    171 
    172     Value* this_param = args++;
    173     this_param->setName("this");
    174 
    175     int i = mPredifinedStates;
    176     iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mInitFunction, 0));
    177     Value * gep = iBuilder->CreateGEP(this_param, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i++) }));
    178     iBuilder->CreateStore(iBuilder->getInt64(0), gep);  //FileBasePos
    179     gep = iBuilder->CreateGEP(this_param, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i++) }));
    180     iBuilder->CreateStore(iBuilder->getInt64(0), gep);  //AvailableBlocks
    181 
    182     while(i < mStates.size()){
    183         gep = iBuilder->CreateGEP(this_param, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i++) }));
    184         Value * gep_next = iBuilder->CreateGEP(gep, std::vector<Value *>({iBuilder->getInt32(1)}));
    185         Value * get_int = iBuilder->CreatePtrToInt(gep, T);
    186         Value * get_next_int = iBuilder->CreatePtrToInt(gep_next, T);
    187         Value * state_size = iBuilder->CreateSub(get_next_int, get_int);
    188         iBuilder->CreateMemSet(gep, iBuilder->getInt8(0), state_size, 4);
    189     }
    190 
    191     iBuilder->CreateRetVoid();
    192 
    193     c = mMod->getOrInsertFunction(mKernelName+"_Create_Default", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), T, T, NULL);
    194     mConstructor = cast<Function>(c);
     148
     149    // and then the constructor
     150    mConstructor = cast<Function>(mMod->getOrInsertFunction(mKernelName+"_Create_Default", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), int64Ty, int64Ty, nullptr));
    195151    mConstructor->setCallingConv(CallingConv::C);
    196152    args = mConstructor->arg_begin();
    197153
    198     this_param = args++;
    199     this_param->setName("this");
     154    mKernelParam = args++;
     155    mKernelParam->setName("this");
     156
    200157    Value* block_size_param = args++;
    201158    block_size_param->setName("block_size");
    202159    Value* seg_size_param = args++;
    203160    seg_size_param->setName("seg_size");
    204  
    205161    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
    206     gep = iBuilder->CreateGEP(this_param, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(0) }));
    207     iBuilder->CreateStore(block_size_param, gep);   
    208     gep = iBuilder->CreateGEP(this_param, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(1) }));
    209     iBuilder->CreateStore(seg_size_param, gep);
    210 
    211     iBuilder->CreateCall(mInitFunction, this_param);
     162    iBuilder->CreateStore(block_size_param, getInternalState(0));
     163    iBuilder->CreateStore(seg_size_param, getInternalState(1));
     164    iBuilder->CreateCall(initializer, mKernelParam);
    212165    iBuilder->CreateRetVoid();
    213 
    214 }
    215 
    216 Value * KernelBuilder::generateKernelInstance(){
    217 
    218     mKernelStruct = iBuilder->CreateAlloca(mKernelStructType);
    219     iBuilder->CreateCall3(mConstructor, mKernelStruct,
    220         ConstantInt::get(iBuilder->getIntNTy(64), mBlockSize),
    221         ConstantInt::get(iBuilder->getIntNTy(64), mBufferSize));
     166}
     167
     168/** ------------------------------------------------------------------------------------------------------------- *
     169 * @brief generateKernelInstance
     170 ** ------------------------------------------------------------------------------------------------------------- */
     171Value * KernelBuilder::generateKernelInstance() {
     172    mKernelStruct = iBuilder->CreateAlloca(mKernelStructType);
     173    iBuilder->CreateCall3(mConstructor, mKernelStruct,
     174        ConstantInt::get(iBuilder->getIntNTy(64), mBlockSize),
     175        ConstantInt::get(iBuilder->getIntNTy(64), (mBlocksPerSegment + mCircularBufferModulo) * mBlockSize));
    222176    return mKernelStruct;
    223177
    224178}
     179
     180Value * KernelBuilder::getInputStream(const unsigned index, const unsigned streamOffset) {
     181    Value * const indices[] = {iBuilder->getInt32(0), getOffset(streamOffset), iBuilder->getInt32(index)};
     182    return iBuilder->CreateGEP(mInputParam, indices);
     183}
     184
     185Value * KernelBuilder::getKernelState(const unsigned index, const unsigned streamOffset) {
     186    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(0), getOffset(streamOffset), iBuilder->getInt32(index)};
     187    return iBuilder->CreateGEP(mKernelParam, indices);
     188}
     189
     190Value * KernelBuilder::getOutputStream(const unsigned index, const unsigned streamOffset) {
     191    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(1), getOffset(streamOffset), iBuilder->getInt32(index)};
     192    return iBuilder->CreateGEP(mKernelParam, indices);
     193}
     194
     195Value * KernelBuilder::getOutputScalar(const unsigned index, const unsigned streamOffset) {
     196    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(2), getOffset(streamOffset), iBuilder->getInt32(index)};
     197    return iBuilder->CreateGEP(mKernelParam, indices);
     198}
     199
     200Value * KernelBuilder::getInternalState(const unsigned index){
     201    Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(index)};
     202    return iBuilder->CreateGEP(mKernelParam, indices);
     203}
     204
     205void KernelBuilder::setInternalState(const unsigned index, Value * const value) {
     206    iBuilder->CreateBlockAlignedStore(value, getInternalState(index));
     207}
     208
    225209void KernelBuilder::generateInitCall(){
    226210    iBuilder->CreateCall(mInitFunction, mKernelStruct);
    227211}
    228212
    229 // void KernelBuilder::generateDoBlockCall(Value * inputStreams, Value * inputScalars){
    230 //     iBuilder->CreateCall3(mDoBlockFunction, mKernelStruct, inputStreams, inputScalars);
    231 // }
    232213void KernelBuilder::generateDoBlockCall(Value * inputStreams){
    233     iBuilder->CreateCall2(mDoBlockFunction, mKernelStruct, inputStreams);
    234 }
    235 
    236 int KernelBuilder::getSegmentBlocks(){
    237     return mSegmentBlocks;
    238 }
    239 
    240 Function * KernelBuilder::getDoBlockFunction(){
    241     return mDoBlockFunction;
    242 }
    243 
    244 Type * KernelBuilder::getKernelStructType(){
    245     return mKernelStructType;
    246 }
    247 
    248 Value * KernelBuilder::getKernelStructParam(){
    249     return mKernelStructParam;
    250 }
    251 
     214    iBuilder->CreateCall2(mFunction, mKernelStruct, inputStreams);
     215}
     216
     217/** ------------------------------------------------------------------------------------------------------------- *
     218 * @brief offset
     219 *
     220 * Compute the index of the given offset value.
     221 ** ------------------------------------------------------------------------------------------------------------- */
     222Value * KernelBuilder::getOffset(const unsigned offset) {
     223    Value * index = iBuilder->getInt32(mSegmentIndex + offset);
     224    if (mStartIndex) {
     225        index = iBuilder->CreateAdd(iBuilder->CreateBlockAlignedLoad(getInternalState(mStartIndex)), index);
     226        const unsigned capacity = (mBlocksPerSegment + mCircularBufferModulo);
     227        if (isPowerOfTwo(capacity)) {
     228            index = iBuilder->CreateAnd(index, ConstantInt::get(index->getType(), capacity - 1));
     229        } else {
     230            index = iBuilder->CreateURem(index, ConstantInt::get(index->getType(), capacity));
     231        }
     232        // TODO: generate branch / phi node when it's sufficiently unlikely that we'll wrap around.
     233    }
     234    return index;
     235}
     236
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r4945 r4959  
    66 */
    77
    8 
    9 #include <IDISA/idisa_builder.h>
    10 #include <llvm/IR/Function.h>
    11 #include <llvm/IR/Module.h>
     8#include <string>
     9#include <vector>
    1210
    1311namespace llvm {
     
    2422}
    2523
    26 using namespace llvm;
     24namespace pablo {
     25    class PabloAST;
     26    class PabloFunction;
     27}
    2728
    28 typedef Value* valptr;
     29namespace IDISA {
     30    class IDISA_Builder;
     31}
    2932
    30 struct Inputs {
    31     std::vector<std::vector<valptr>> streams;
    32     std::vector<valptr> scalars;
     33class KernelBuilder {
     34
     35public:
     36    // sets name & sets internal state to the kernel superclass state
     37    KernelBuilder(std::string name, llvm::Module * m, IDISA::IDISA_Builder * b);
     38
     39    unsigned addInternalStateType(llvm::Type * type);
     40    void addOutputStream(const unsigned fields);
     41    void addOutputAccum(llvm::Type * t);
     42    void addInputStream(const unsigned fields, std::string name);
     43    void addInputScalar(llvm::Type * t, std::string name);
     44
     45    llvm::Function * prepareFunction();
     46
     47    void increment();
     48    void incrementCircularBuffer();
     49
     50    llvm::Value * getInputStream(const unsigned index, const unsigned streamOffset = 0);
     51    llvm::Value * getKernelState(const unsigned index, const unsigned streamOffset = 0);
     52    llvm::Value * getOutputStream(const unsigned index, const unsigned streamOffset = 0);
     53    llvm::Value * getOutputScalar(const unsigned index, const unsigned streamOffset = 0);
     54
     55    void finalize();
     56
     57    llvm::Value * generateKernelInstance();
     58        void generateInitCall();
     59    void generateDoBlockCall(llvm::Value * inputStreams);
     60
     61    unsigned getSegmentBlocks() const;
     62    llvm::Function * getDoBlockFunction() const;
     63    llvm::Type * getKernelStructType() const;
     64    llvm::Value * getKernelStructParam() const;
     65
     66    void setCircularBufferSize(const unsigned blocks);
     67    void setBlocksPerSegment(const unsigned blocks);
     68
     69    void setInternalState(const unsigned index, llvm::Value * const value);
     70    llvm::Value * getInternalState(const unsigned index);
     71
     72protected:
     73
     74    llvm::Value * getOffset(const unsigned offset);
     75
     76private:
     77    llvm::Module *                      mMod;
     78    IDISA::IDISA_Builder *              iBuilder;
     79    std::string                                                 mKernelName;
     80    llvm::Type *                        mBitBlockType;
     81    std::vector<llvm::Type *>                   mStates;
     82    std::vector<llvm::Type *>           mInputStreams;
     83    std::vector<llvm::Type *>           mOutputStreams;
     84    std::vector<llvm::Type *>           mInputScalars;
     85    std::vector<llvm::Type *>           mOutputAccums;
     86    std::vector<std::string>            mInputStreamNames;
     87    std::vector<std::string>            mInputScalarNames;
     88    llvm::Function*                                     mConstructor;
     89    llvm::Function*                                             mInitFunction;
     90    llvm::Function*                                             mFunction;
     91    unsigned                            mBlockSize;
     92    unsigned                            mBlocksPerSegment;
     93    unsigned                            mCircularBufferModulo;
     94    llvm::Type *                        mKernelStructType;
     95    llvm::Type *                        mInputStreamType;
     96    llvm::Type *                        mInputScalarType;
     97    llvm::Value *                       mInputParam;
     98    llvm::Value *                       mKernelStruct;
     99    llvm::Value *                       mKernelParam;
     100    unsigned                            mSegmentIndex;
     101    unsigned                            mStartIndex;
    33102};
    34103
    35 struct Outputs {
    36     std::vector<valptr *> streams;
    37     std::vector<valptr> accums;
    38 };
     104inline unsigned KernelBuilder::getSegmentBlocks() const {
     105    return mBlocksPerSegment;
     106}
    39107
    40 class KernelBuilder{
    41 public:
    42     // sets name & sets internal state to the kernel superclass state
    43         KernelBuilder(std::string name, Module * m, IDISA::IDISA_Builder * b);
    44         ~KernelBuilder();
     108inline llvm::Function * KernelBuilder::getDoBlockFunction() const {
     109    return mFunction;
     110}
    45111
    46         int extendKernelInternalStateType(Type * t);
    47         void addKernelOutputStream(int fw);
    48         void addKernelOutputAccum(Type * t);
    49         void addKernelInputStream(int fw, std::string name);
    50         void addKernelInputScalar(Type * t, std::string name);
    51     void PrepareDoBlockFunction();
    52         struct Inputs openDoBlock();
    53         void closeDoBlock(struct Outputs);
    54         void finalizeMethods();
    55         Value * generateKernelInstance();
    56         void generateInitCall();
    57         // void generateDoBlockCall(Value * inputStreams, Value * inputScalars);
    58     void generateDoBlockCall(Value * inputStreams);
    59     int getSegmentBlocks();
    60     Function * getDoBlockFunction();
    61     Type * getKernelStructType();
    62     Value * getKernelStructParam();
     112inline llvm::Type * KernelBuilder::getKernelStructType() const{
     113    return mKernelStructType;
     114}
    63115
    64     void changeKernelInternalState(Value * kernelStruct, int idx, Value * stateValue);
    65     Value * getKernelInternalState(Value * kernelStruct, int idx);
    66     Value * getKernelInternalStatePtr(Value * kernelStruct, int idx);
     116inline llvm::Value * KernelBuilder::getKernelStructParam() const {
     117    return mKernelParam;
     118}
    67119
    68 private:
    69         Module *                            mMod;
    70     IDISA::IDISA_Builder *              iBuilder;
    71     std::string                                                 mKernelName;
    72     int                                 mPredifinedStates;
    73     Type*                               mBitBlockType;
    74     std::vector<Type *>                                 mStates;
    75     std::vector<Type *>                 mInputStreams;
    76     std::vector<Type *>                 mOutputStreams;
    77     std::vector<Type *>                 mInputScalars;
    78     std::vector<Type *>                 mOutputAccums;
    79     std::vector<std::string>            mInputStreamNames;
    80     std::vector<std::string>            mInputScalarNames;
    81     Function*                                                   mConstructor;
    82     Function*                                                   mInitFunction;
    83     Function*                                                   mDoBlockFunction;
    84     int                                 mBufferSize;
    85     int                                 mBlockSize;
    86     int                                 mSegmentBlocks;
    87     Type *                              mKernelStructType;
    88     Type *                              mInputStreamType;   
    89     Type *                              mInputScalarType;
    90     Value*                              mKernelStruct;
    91     Value*                              mKernelStructParam;
    92 };
     120inline void KernelBuilder::setCircularBufferSize(const unsigned blocks) {
     121    mCircularBufferModulo = blocks;
     122}
     123
     124inline void KernelBuilder::setBlocksPerSegment(const unsigned blocks) {
     125    mBlocksPerSegment = blocks;
     126}
     127
     128inline void KernelBuilder::increment() {
     129    ++mSegmentIndex;
     130}
    93131
    94132#endif // KERNEL_H
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r4945 r4959  
    1414#include <pablo/pablo_compiler.h>
    1515
     16using namespace pablo;
    1617
    1718PipelineBuilder::PipelineBuilder(Module * m, IDISA::IDISA_Builder * b)
     
    2526
    2627}
     28
    2729PipelineBuilder::~PipelineBuilder(){
     30    delete mS2PKernel;
     31    delete mICgrepKernel;
     32    delete mScanMatchKernel;
    2833}
    2934
    30 void PipelineBuilder::CreateKernels(pablo::PabloFunction * function, bool isNameExpression){
     35void PipelineBuilder::CreateKernels(PabloFunction * function, bool isNameExpression){
    3136    mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder);
    3237    mICgrepKernel = new KernelBuilder("icgrep", mMod, iBuilder);
    3338    mScanMatchKernel = new KernelBuilder("scanMatch", mMod, iBuilder);
    34 
    3539
    3640    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    3842
    3943    pablo_function_passes(function);
    40          
    41     pablo::PabloCompiler pablo_compiler(mMod, iBuilder);
     44
     45    PabloCompiler pablo_compiler(mMod, iBuilder);
    4246    try {
    4347        pablo_compiler.setKernel(mICgrepKernel);
     
    5155        exit(1);
    5256    }
    53 
    5457}
    5558
     
    5760    Type * T = iBuilder->getIntNTy(64);   
    5861    Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    59     Type * inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
    60  
    61     Constant* c = mMod->getOrInsertFunction("Main", Type::getVoidTy(mMod->getContext()), inputType, T, S, T, NULL);
    62     Function* mMainFunction = cast<Function>(c);
    63     mMainFunction->setCallingConv(CallingConv::C);
    64     Function::arg_iterator args = mMainFunction->arg_begin();
    65 
     62    Type * inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
     63    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", Type::getVoidTy(mMod->getContext()), inputType, T, S, T, nullptr));
     64    main->setCallingConv(CallingConv::C);
     65    Function::arg_iterator args = main->arg_begin();
    6666
    6767    Value* input_param = args++;
     
    7474    finalLineUnterminated_param->setName("finalLineUnterminated");
    7575
    76     iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mMainFunction,0));
    77 
     76    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
    7877
    7978    BasicBlock * entry_block = iBuilder->GetInsertBlock();
    80     BasicBlock * pipeline_test_block = BasicBlock::Create(mMod->getContext(), "pipeline_test_block", mMainFunction, 0);
    81     BasicBlock * pipeline_do_block = BasicBlock::Create(mMod->getContext(), "pipeline_do_block", mMainFunction, 0);
    82     BasicBlock * pipeline_final_block = BasicBlock::Create(mMod->getContext(), "pipeline_final_block", mMainFunction, 0);
    83     BasicBlock * pipeline_partial_block = BasicBlock::Create(mMod->getContext(), "pipeline_partial_block", mMainFunction, 0);
    84     BasicBlock * pipeline_empty_block = BasicBlock::Create(mMod->getContext(), "pipeline_empty_block", mMainFunction, 0);
    85     BasicBlock * pipeline_end_block = BasicBlock::Create(mMod->getContext(), "pipeline_end_block", mMainFunction, 0);   
    86     BasicBlock * pipeline_Unterminated_block = BasicBlock::Create(mMod->getContext(), "pipeline_Unterminated_block", mMainFunction, 0);
    87     BasicBlock * pipeline_return_block = BasicBlock::Create(mMod->getContext(), "pipeline_return_block", mMainFunction, 0);
     79    BasicBlock * pipeline_test_block = BasicBlock::Create(mMod->getContext(), "pipeline_test_block", main, 0);
     80    BasicBlock * pipeline_do_block = BasicBlock::Create(mMod->getContext(), "pipeline_do_block", main, 0);
     81    BasicBlock * pipeline_final_block = BasicBlock::Create(mMod->getContext(), "pipeline_final_block", main, 0);
     82    BasicBlock * pipeline_partial_block = BasicBlock::Create(mMod->getContext(), "pipeline_partial_block", main, 0);
     83    BasicBlock * pipeline_empty_block = BasicBlock::Create(mMod->getContext(), "pipeline_empty_block", main, 0);
     84    BasicBlock * pipeline_end_block = BasicBlock::Create(mMod->getContext(), "pipeline_end_block", main, 0);
     85    BasicBlock * pipeline_Unterminated_block = BasicBlock::Create(mMod->getContext(), "pipeline_Unterminated_block", main, 0);
     86    BasicBlock * pipeline_return_block = BasicBlock::Create(mMod->getContext(), "pipeline_return_block", main, 0);
    8887
    8988    Value * s2pKernelStruct = mS2PKernel->generateKernelInstance();
     
    9190    Value * scanMatchKernelStruct = mScanMatchKernel->generateKernelInstance();
    9291
     92    Value * gep = iBuilder->CreateGEP(scanMatchKernelStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(mFileBufIdx)});
    9393    Value* filebuf = iBuilder->CreateBitCast(input_param, S);
    94     mScanMatchKernel->changeKernelInternalState(scanMatchKernelStruct, mFileBufIdx, filebuf);
    95     mScanMatchKernel->changeKernelInternalState(scanMatchKernelStruct, mFileSizeIdx, buffersize_param);
    96     mScanMatchKernel->changeKernelInternalState(scanMatchKernelStruct, mFileNameIdx, filename_param);
     94    iBuilder->CreateStore(filebuf, gep);
     95    gep = iBuilder->CreateGEP(scanMatchKernelStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(mFileSizeIdx)});
     96    iBuilder->CreateStore(buffersize_param, gep);
     97    gep = iBuilder->CreateGEP(scanMatchKernelStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(mFileNameIdx)});
     98    iBuilder->CreateStore(filename_param, gep);
    9799
    98100    Value * basis_bits = iBuilder->CreateGEP(s2pKernelStruct, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     
    112114    iBuilder->SetInsertPoint(pipeline_do_block);
    113115
    114     Value * gep = iBuilder->CreateGEP(input_param, {blkNo_phi});
     116    gep = iBuilder->CreateGEP(input_param, {blkNo_phi});
    115117    Value * update_blkNo = iBuilder->CreateAdd(blkNo_phi, iBuilder->getInt64(1));
    116118    blkNo_phi->addIncoming(update_blkNo, pipeline_do_block);
     
    169171
    170172}
    171 
    172 
    173 
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.h

    r4939 r4959  
    2525}
    2626
    27 namespace pablo { class PabloFunction; }
     27namespace pablo { class PabloFunction; class PabloBlock; }
    2828
    2929using namespace llvm;
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r4957 r4959  
    11/*
    2  *  Copyright (c) 2015 International Characters.
     2 *  Copyright (c) 2016 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
    5 #include "kernel.h"
    65#include "s2p_kernel.h"
    7 #include <iostream>
     6#include <kernels/kernel.h>
     7#include <IDISA/idisa_builder.h>
    88
    99const int PACK_LANES = 1;
     
    2626}
    2727
    28 void s2p(IDISA::IDISA_Builder * iBuilder, std::vector<Value*> s, Value* p[]) {
     28inline void s2p(IDISA::IDISA_Builder * iBuilder, Value * input, Value * output[]) {
    2929    Value * bit00224466[4];
    3030    Value * bit11335577[4];
    3131    for (unsigned i = 0; i<4; i++) {
    32         s2p_step(iBuilder, s[2*i], s[2*i+1], iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
     32        Value * s0 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
     33        Value * s1 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
     34        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
    3335    }
    3436    Value * bit00004444[2];
     
    4244                 iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
    4345    }
    44     s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, p[0], p[4]);
    45     s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, p[1], p[5]);
    46     s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, p[2], p[6]);
    47     s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, p[3], p[7]);
     46    s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
     47    s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
     48    s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
     49    s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
    4850}
    4951
    50 
    51 void generateS2PKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder){
    52 
    53     kBuilder->addKernelInputStream(8, "byte_pack");
    54     for(int i=0; i<8; i++)
    55         kBuilder->addKernelOutputStream(1);
    56 
    57     int segBlocks = kBuilder->getSegmentBlocks();
    58 
    59     kBuilder->PrepareDoBlockFunction();   
    60     struct Inputs inputs = kBuilder->openDoBlock();
    61     struct Outputs outputs;
    62 
    63     valptr basis_bit[segBlocks][8];
    64     for(int i=0; i<segBlocks; i++){
    65         s2p(iBuilder, inputs.streams[i], basis_bit[i]);
    66         outputs.streams.push_back(basis_bit[i]);
     52void generateS2PKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     53    kBuilder->addInputStream(8, "byte_pack");
     54    for(unsigned i = 0; i < 8; ++i) {
     55        kBuilder->addOutputStream(1);
    6756    }
    68 
    69     kBuilder->closeDoBlock(outputs);
    70 
    71     kBuilder->finalizeMethods();
    72 
     57    kBuilder->prepareFunction();
     58    for(unsigned i = 0; i < kBuilder->getSegmentBlocks(); ++i){
     59        Value * output[8];
     60        s2p(iBuilder, kBuilder->getInputStream(0), output);
     61        for (unsigned j = 0; j < 8; ++j) {
     62            iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
     63        }
     64        kBuilder->increment();
     65    }
     66    kBuilder->finalize();
    7367}
    7468
     
    8377
    8478
     79
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r4939 r4959  
    11/*
    2  *  Copyright (c) 2015 International Characters.
     2 *  Copyright (c) 2016 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
    5 
    6 
    75#ifndef S2P_KERNEL_H
    86#define S2P_KERNEL_H
    97
    10 #include <stdio.h>
    11 #include <IDISA/idisa_builder.h>
    12 #include <llvm/IR/Module.h>
     8class KernelBuilder;
    139
    14 void generateS2PKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     10namespace llvm { class Module; }
     11
     12namespace IDISA { class IDISA_Builder; }
     13
     14
     15void generateS2PKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    1516
    1617#endif
    17 
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r4946 r4959  
    77#include "scanmatchgen.h"
    88#include <llvm/IR/Intrinsics.h>
     9#include <IDISA/idisa_builder.h>
     10#include <llvm/Support/raw_os_ostream.h>
     11
     12using namespace llvm;
    913
    1014Value * generateForwardZeroesMask(IDISA::IDISA_Builder * iBuilder, Value * bits) {
     
    3236    return iBuilder->CreateAnd(bits_minus1, bits);
    3337}
    34        
    35        
    36 void generateScanWordRoutine(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder, int scanwordBitWidth, bool isNameExpression) {
     38
     39Function * generateScanWordRoutine(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanwordBitWidth, Type * kernelStuctType, bool isNameExpression) {
     40
     41    Function * function = m->getFunction("scan_matches_in_scanword");
     42    if (LLVM_UNLIKELY(function != nullptr)) {
     43        return function;
     44    }
     45
    3746    LLVMContext & ctxt = m->getContext();
    38     Type * T = iBuilder->getIntNTy(scanwordBitWidth);   
     47    Type * T = iBuilder->getIntNTy(scanwordBitWidth);
    3948    Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    4049    Type * returnType = StructType::get(ctxt, std::vector<Type *>({T, T}));
    41     Type * kernelStuctType = PointerType::get(kBuilder->getKernelStructType(), 0);
    4250    FunctionType * functionType = FunctionType::get(returnType, std::vector<Type *>({kernelStuctType, T, T, T, T, T}), false);
    43     Function * sFunction;
    44        
     51
    4552    SmallVector<AttributeSet, 6> Attrs;
    4653    Attrs.push_back(AttributeSet::get(ctxt, ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
     
    5158    Attrs.push_back(AttributeSet::get(ctxt, 5, std::vector<Attribute::AttrKind>({})));
    5259    AttributeSet AttrSet = AttributeSet::get(ctxt, Attrs);
    53    
    54     sFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, "scan_matches_in_scanword", m);
    55     sFunction->setCallingConv(CallingConv::C);
    56     sFunction->setAttributes(AttrSet);
    57     sFunction->addFnAttr(llvm::Attribute::AlwaysInline);
    58        
    59     Function::arg_iterator args = sFunction->arg_begin();
     60
     61    function = Function::Create(functionType, GlobalValue::ExternalLinkage, "scan_matches_in_scanword", m);
     62    function->setCallingConv(CallingConv::C);
     63    function->setAttributes(AttrSet);
     64    function->addFnAttr(llvm::Attribute::AlwaysInline);
     65
     66    Function::arg_iterator args = function->arg_begin();
    6067    Value * this_input_parm = args++;
    6168    this_input_parm->setName("this");
     
    7582    else
    7683        matchProcessor = m->getOrInsertFunction("wrapped_report_match", Type::getVoidTy(ctxt), T, T, T, S, T, S, nullptr);
    77    
    78     iBuilder->SetInsertPoint(BasicBlock::Create(ctxt, "entry", sFunction,0));
     84
     85    iBuilder->SetInsertPoint(BasicBlock::Create(ctxt, "entry", function,0));
    7986
    8087    BasicBlock * entry_block = iBuilder->GetInsertBlock();
    81     BasicBlock * matches_test_block = BasicBlock::Create(ctxt, "matches_test_block", sFunction, 0);
    82     BasicBlock * process_matches_loop_entry = BasicBlock::Create(ctxt, "process_matches_loop", sFunction, 0);
    83     BasicBlock * prior_breaks_block = BasicBlock::Create(ctxt, "prior_breaks_block", sFunction, 0);
    84     BasicBlock * loop_final_block = BasicBlock::Create(ctxt, "loop_final_block", sFunction, 0);
    85     BasicBlock * matches_done_block = BasicBlock::Create(ctxt, "matches_done_block", sFunction, 0);
    86     BasicBlock * remaining_breaks_block = BasicBlock::Create(ctxt, "remaining_breaks_block", sFunction, 0);
    87     BasicBlock * return_block = BasicBlock::Create(ctxt, "return_block", sFunction, 0);
    88        
    89        
     88    BasicBlock * matches_test_block = BasicBlock::Create(ctxt, "matches_test_block", function, 0);
     89    BasicBlock * process_matches_loop_entry = BasicBlock::Create(ctxt, "process_matches_loop", function, 0);
     90    BasicBlock * prior_breaks_block = BasicBlock::Create(ctxt, "prior_breaks_block", function, 0);
     91    BasicBlock * loop_final_block = BasicBlock::Create(ctxt, "loop_final_block", function, 0);
     92    BasicBlock * matches_done_block = BasicBlock::Create(ctxt, "matches_done_block", function, 0);
     93    BasicBlock * remaining_breaks_block = BasicBlock::Create(ctxt, "remaining_breaks_block", function, 0);
     94    BasicBlock * return_block = BasicBlock::Create(ctxt, "return_block", function, 0);
     95
     96
    9097    // The match scanner works with a loop involving four variables:
    9198    // (a) the bit stream scanword of matches marking the ends of selected records,
    9299    // (b) the bit stream scanword of record_breaks marking the ends of all records,
    93     // (c) the integer lastRecordNum indicating the number of records processed so far, 
     100    // (c) the integer lastRecordNum indicating the number of records processed so far,
    94101    // (d) the index lastRecordStart indicating the file position of the last record.
    95102    // We set up a loop structure, in which a set of 4 phi nodes initialize these
     
    97104    // the loop body.
    98105
    99    
     106
    100107    iBuilder->CreateBr(matches_test_block);
    101108
    102     // LOOP Test Block 
     109    // LOOP Test Block
    103110    iBuilder->SetInsertPoint(matches_test_block);
    104111    PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
     
    112119    Value * have_matches_cond = iBuilder->CreateICmpNE(matches_phi, ConstantInt::get(T, 0));
    113120    iBuilder->CreateCondBr(have_matches_cond, process_matches_loop_entry, matches_done_block);
    114    
     121
    115122    // LOOP BODY
    116123    // The loop body is entered if we have more matches to process.
    117124    iBuilder->SetInsertPoint(process_matches_loop_entry);
    118125    Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, matches_phi), record_breaks_phi);
    119     // Within the loop we have a conditional block that is executed if there are any prior 
     126    // Within the loop we have a conditional block that is executed if there are any prior
    120127    // record breaks.
    121128    Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::get(T, 0));
     
    130137    Value * matchRecordStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseDistance));
    131138    iBuilder->CreateBr(loop_final_block);
    132    
     139
    133140    // LOOP FINAL BLOCK
    134141    // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
     
    143150    Value * matchRecordEnd = iBuilder->CreateAdd(scanwordPos, generateCountForwardZeroes(iBuilder, matches_phi));
    144151
    145     Value* filebufptr = kBuilder->getKernelInternalState(this_input_parm, 7);
    146     Value* filesize = kBuilder->getKernelInternalState(this_input_parm, 8);
    147     Value* filenameptr = kBuilder->getKernelInternalState(this_input_parm, 9);
     152    Value* filebuf_gep = iBuilder->CreateGEP(this_input_parm, {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(7)});
     153    Value* filebufptr = iBuilder->CreateLoad(filebuf_gep, "filebuf");
     154
     155    Value* filesize_gep = iBuilder->CreateGEP(this_input_parm, {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(8)});
     156    Value* filesize = iBuilder->CreateLoad(filesize_gep, "filensize");
     157
     158    Value* filename_gep = iBuilder->CreateGEP(this_input_parm, {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(9)});
     159    Value* filenameptr = iBuilder->CreateLoad(filename_gep, "filename");
    148160
    149161    if(isNameExpression)
     
    158170    recordStart_phi->addIncoming(matchRecordStart_phi, loop_final_block);
    159171    iBuilder->CreateBr(matches_test_block);
    160    
    161    
     172
     173
    162174    // LOOP EXIT/MATCHES_DONE
    163175    iBuilder->SetInsertPoint(matches_done_block);
     
    165177    Value * more_breaks_cond = iBuilder->CreateICmpNE(record_breaks_phi, ConstantInt::get(T, 0));
    166178    iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
    167    
     179
    168180    // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
    169181    iBuilder->SetInsertPoint(remaining_breaks_block);
     
    173185    Value * pendingLineStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseZeroes));
    174186    iBuilder->CreateBr(return_block);
    175    
     187
    176188    // RETURN block
    177189    iBuilder->SetInsertPoint(return_block);
     
    186198    retVal = iBuilder->CreateInsertValue(retVal, finalRecordCount_phi, 1);
    187199    iBuilder->CreateRet(retVal);
    188    
    189 }
    190 
    191 
    192 void generateScanMatch(Module * m, IDISA::IDISA_Builder * iBuilder, int scanwordBitWidth, KernelBuilder * kBuilder, bool isNameExpression){
    193    
    194    
    195     Type * T = iBuilder->getIntNTy(scanwordBitWidth);
     200
     201    return function;
     202}
     203
     204
     205void generateScanMatch(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanWordBitWidth, KernelBuilder * kBuilder, bool isNameExpression){
     206
     207
     208    Type * T = iBuilder->getIntNTy(scanWordBitWidth);
    196209    Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    197     int fieldCount = iBuilder->getBitBlockWidth()/scanwordBitWidth;
     210    const unsigned fieldCount = iBuilder->getBitBlockWidth() / scanWordBitWidth;
    198211    Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    199212
    200     kBuilder->addKernelInputStream(1, "matches");
    201     kBuilder->addKernelInputStream(1, "breaks");
     213    kBuilder->addInputStream(1, "matches");
     214    kBuilder->addInputStream(1, "breaks");
    202215    //use index
    203     int blockPosIdx = kBuilder->extendKernelInternalStateType(T);
    204     int lineStartIdx = kBuilder->extendKernelInternalStateType(T);
    205     int lineNumIdx = kBuilder->extendKernelInternalStateType(T);
    206     kBuilder->extendKernelInternalStateType(S);
    207     kBuilder->extendKernelInternalStateType(T);
    208     kBuilder->extendKernelInternalStateType(S);
    209 
    210     int segBlocks = kBuilder->getSegmentBlocks();
    211 
    212     kBuilder->PrepareDoBlockFunction();
    213 
    214     generateScanWordRoutine(m, iBuilder, kBuilder, scanwordBitWidth, isNameExpression);
    215 
    216     struct Inputs inputs = kBuilder->openDoBlock();
    217     struct Outputs outputs;   
     216    unsigned blockPosIdx = kBuilder->addInternalStateType(T);
     217    unsigned lineStartIdx = kBuilder->addInternalStateType(T);
     218    unsigned lineNumIdx = kBuilder->addInternalStateType(T);
     219    kBuilder->addInternalStateType(S);
     220    kBuilder->addInternalStateType(T);
     221    kBuilder->addInternalStateType(S);
     222
     223    Function * function = kBuilder->prepareFunction();
     224
     225    Type * kernelStuctType = PointerType::get(kBuilder->getKernelStructType(), 0);
     226
     227    Function * scanWordFunction = generateScanWordRoutine(m, iBuilder, scanWordBitWidth, kernelStuctType, isNameExpression);
     228
     229    iBuilder->SetInsertPoint(&function->getEntryBlock());
     230
    218231    Value * kernelStuctParam = kBuilder->getKernelStructParam();
    219    
    220     Value * scanwordPos = kBuilder->getKernelInternalState(kernelStuctParam, blockPosIdx);
    221     Value * recordStart = kBuilder->getKernelInternalState(kernelStuctParam, lineStartIdx);
    222     Value * recordNum = kBuilder->getKernelInternalState(kernelStuctParam, lineNumIdx);
     232
     233    Value * scanwordPos = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(blockPosIdx));
     234    Value * recordStart = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(lineStartIdx));
     235    Value * recordNum = iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(lineNumIdx));
    223236    Value * wordResult = nullptr;
    224237
    225     Function * wordScanFcn = m->getFunction("scan_matches_in_scanword");
    226     for(int j=0; j<segBlocks; j++){
    227         Value * matchWordVector = iBuilder->CreateBitCast(inputs.streams[j][0], scanwordVectorType);
    228         Value * breakWordVector = iBuilder->CreateBitCast(inputs.streams[j][1], scanwordVectorType);
    229         for(int i=0; i<segBlocks*iBuilder->getBitBlockWidth()/scanwordBitWidth; i++){
    230 
     238    const unsigned segmentBlocks = kBuilder->getSegmentBlocks();
     239    const unsigned scanWordBlocks =  segmentBlocks * fieldCount;
     240
     241    for(unsigned j = 0; j < segmentBlocks; ++j) {
     242
     243        Value * matchWordVector = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(0)), scanwordVectorType);
     244        Value * breakWordVector = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(1)), scanwordVectorType);
     245
     246        for(unsigned i = 0; i < scanWordBlocks; ++i){
    231247            Value * matchWord = iBuilder->CreateExtractElement(matchWordVector, ConstantInt::get(T, i));
    232248            Value * recordBreaksWord = iBuilder->CreateExtractElement(breakWordVector, ConstantInt::get(T, i));
    233             wordResult = iBuilder->CreateCall(wordScanFcn, std::vector<Value *>({kernelStuctParam, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum}));
    234             scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, scanwordBitWidth));
     249            wordResult = iBuilder->CreateCall(scanWordFunction, std::vector<Value *>({kernelStuctParam, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum}));
     250            scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, scanWordBitWidth));
    235251            recordStart = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({0}));
    236252            recordNum = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({1}));
    237253        }
     254        kBuilder->increment();
    238255    }
    239256
    240     kBuilder->changeKernelInternalState(kernelStuctParam, blockPosIdx, scanwordPos);
    241     kBuilder->changeKernelInternalState(kernelStuctParam, lineStartIdx, recordStart);
    242     kBuilder->changeKernelInternalState(kernelStuctParam, lineNumIdx, recordNum);
    243 
    244     kBuilder->closeDoBlock(outputs);
    245 
    246     kBuilder->finalizeMethods();
    247 }
    248 
    249 
    250 
     257    kBuilder->setInternalState(blockPosIdx, scanwordPos);
     258    kBuilder->setInternalState(lineStartIdx, recordStart);
     259    kBuilder->setInternalState(lineNumIdx, recordNum);
     260
     261    kBuilder->finalize();
     262}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r4939 r4959  
    1 #ifndef SCANMATCHGEN_H
    2 #define SCANMATCHGEN_H
    31/*
    42 *  Copyright (c) 2016 International Characters.
    53 *  This software is licensed to the public under the Open Software License 3.0.
    64 */
     5#ifndef SCANMATCHGEN_H
     6#define SCANMATCHGEN_H
    77
     8class KernelBuilder;
    89
    9 #include <IDISA/idisa_builder.h>
    10 #include <llvm/IR/Function.h>
    11 #include <llvm/IR/Module.h>
    12        
    13 void generateScanBitBlockRoutine(Module * m, IDISA::IDISA_Builder * iBuilder, int segBitWidth, Type * kernelStuctType, bool isNameExpression);
     10namespace llvm { class Module; }
    1411
    15 void generateScanMatch(Module * m, IDISA::IDISA_Builder * iBuilder, int segBitWidth, KernelBuilder * kBuilder, bool isNameExpression);
     12namespace IDISA { class IDISA_Builder; }
     13
     14void generateScanMatch(llvm::Module * m, IDISA::IDISA_Builder * iBuilder, unsigned scanWordBitWidth, KernelBuilder * kBuilder, bool isNameExpression);
    1615
    1716#endif // SCANMATCHGEN_H
  • icGREP/icgrep-devel/icgrep/pablo/analysis/pabloverifier.cpp

    r4919 r4959  
    218218 * @brief verifyProgramStructure
    219219 ** ------------------------------------------------------------------------------------------------------------- */
    220 void verifyProgramStructure(const PabloBlock * block) {
     220void verifyProgramStructure(const PabloBlock * block, unsigned & nestingDepth) {
    221221    const Statement * prev = nullptr;
    222222    for (const Statement * stmt : *block) {
     
    279279                }
    280280            }
    281             verifyProgramStructure(nested);
    282         }
    283     }
     281            ++nestingDepth;
     282            verifyProgramStructure(nested, nestingDepth);
     283            --nestingDepth;
     284        }
     285    }   
    284286}
    285287
    286288inline void verifyProgramStructure(const PabloFunction & function) {
    287     verifyProgramStructure(function.getEntryBlock());
     289    unsigned nestingDepth = 0;
     290    verifyProgramStructure(function.getEntryBlock(), nestingDepth);
     291    if (LLVM_UNLIKELY(nestingDepth != 0)) {
     292        // This error isn't actually possible to occur with the current AST structure but that could change
     293        // in the future. Leaving this test in for a reminder to check for it.
     294        throw std::runtime_error("PabloVerifier: unbalanced If or While nesting depth.");
     295    }
    288296}
    289297
  • icGREP/icgrep-devel/icgrep/pablo/builder.cpp

    r4722 r4959  
    8282}
    8383
     84PabloAST * PabloBuilder::createLookahead(PabloAST * expr, PabloAST * shiftAmount) {
     85    MAKE_BINARY(createLookahead, PabloAST::ClassTypeId::Lookahead, expr, shiftAmount);
     86    return result;
     87}
     88
     89PabloAST * PabloBuilder::createLookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix) {
     90    MAKE_BINARY(createLookahead, PabloAST::ClassTypeId::Lookahead, expr, shiftAmount, prefix);
     91    return result;
     92}
     93
    8494PabloAST * PabloBuilder::createMod64Advance(PabloAST * expr, PabloAST * shiftAmount) {
    8595    MAKE_BINARY(createMod64Advance, PabloAST::ClassTypeId::Mod64Advance, expr, shiftAmount);
     
    92102}
    93103
     104PabloAST * PabloBuilder::createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount) {
     105    MAKE_BINARY(createMod64Lookahead, PabloAST::ClassTypeId::Mod64Lookahead, expr, shiftAmount);
     106    return result;
     107}
     108
     109PabloAST * PabloBuilder::createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix) {
     110    MAKE_BINARY(createMod64Lookahead, PabloAST::ClassTypeId::Mod64Lookahead, expr, shiftAmount, prefix);
     111    return result;
     112}
     113
    94114PabloAST * PabloBuilder::createNot(PabloAST * expr) {
    95115    MAKE_UNARY(createNot, PabloAST::ClassTypeId::Not, expr);
  • icGREP/icgrep-devel/icgrep/pablo/builder.hpp

    r4870 r4959  
    8686
    8787    PabloAST * createAdvance(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
     88
     89    inline PabloAST * createLookahead(PabloAST * expr, const Integer::Type shiftAmount) {
     90        if (shiftAmount == 0) {
     91            return expr;
     92        }
     93        return createLookahead(expr, mPb->getInteger(shiftAmount));
     94    }
     95
     96    PabloAST * createLookahead(PabloAST * expr, PabloAST * shiftAmount);
     97
     98    inline PabloAST * createLookahead(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix) {
     99        if (shiftAmount == 0) {
     100            return expr;
     101        }
     102        return createLookahead(expr, mPb->getInteger(shiftAmount), prefix);
     103    }
     104
     105    PabloAST * createLookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
    88106
    89107    inline Next * createNext(Assign * assign, PabloAST * expr) {
     
    134152    }
    135153
     154    PabloAST * createMod64Advance(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
     155
    136156    inline PabloAST * createMod64Advance(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix) {
    137157        if (shiftAmount == 0) {
     
    141161    }
    142162
    143     PabloAST * createMod64Advance(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
     163    PabloAST * createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount);
     164
     165    inline PabloAST * createMod64Lookahead(PabloAST * expr, const Integer::Type shiftAmount) {
     166        if (shiftAmount == 0) {
     167            return expr;
     168        }
     169        return createMod64Lookahead(expr, mPb->getInteger(shiftAmount));
     170    }
     171
     172    PabloAST * createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
     173
     174    inline PabloAST * createMod64Lookahead(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix) {
     175        if (shiftAmount == 0) {
     176            return expr;
     177        }
     178        return createMod64Lookahead(expr, mPb->getInteger(shiftAmount), prefix);
     179    }
    144180
    145181    PabloAST * createMod64MatchStar(PabloAST * marker, PabloAST * charclass);
  • icGREP/icgrep-devel/icgrep/pablo/carry_data.cpp

    r4942 r4959  
    1515    for (Statement * stmt : *theScope) {
    1616        if (Advance * adv = dyn_cast<Advance>(stmt)) {
    17             unsigned shift_amount = adv->getAdvanceAmount();
     17            unsigned shift_amount = adv->getAmount();
    1818            if (shift_amount == 1) {
    19                 adv->setLocalAdvanceIndex(unitAdvance.entries);
     19                adv->setLocalIndex(unitAdvance.entries);
    2020                unitAdvance.entries++;               
    2121            }
     
    2929                        shortAdvance.allocatedBits = alignCeiling(shortAdvance.allocatedBits, mPackSize);
    3030                    }
    31                     adv->setLocalAdvanceIndex(shortAdvance.allocatedBits);
     31                    adv->setLocalIndex(shortAdvance.allocatedBits);
    3232                }
    3333                else {
    34                     adv->setLocalAdvanceIndex(shortAdvance.entries);
     34                    adv->setLocalIndex(shortAdvance.entries);
    3535                }
    3636                shortAdvance.entries++;
     
    3838            }
    3939            else {
    40                 adv->setLocalAdvanceIndex(longAdvance.allocatedBitBlocks);
     40                adv->setLocalIndex(longAdvance.allocatedBitBlocks);
    4141                longAdvance.entries++;
    4242                longAdvance.allocatedBitBlocks += longAdvanceBufferSize(shift_amount);
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r4954 r4959  
    2020
    2121/** ------------------------------------------------------------------------------------------------------------- *
    22  * @brief doScopeCount
    23  ** ------------------------------------------------------------------------------------------------------------- */
    24 static unsigned doScopeCount(const PabloBlock * const pb) {
    25     unsigned count = 1;
    26     for (const Statement * stmt : *pb) {
    27         if (LLVM_UNLIKELY(isa<If>(stmt))) {
    28             count += doScopeCount(cast<If>(stmt)->getBody());
    29         } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    30             count += doScopeCount(cast<While>(stmt)->getBody());
    31         }
    32     }
    33     return count;
    34 }
    35 
    36 /** ------------------------------------------------------------------------------------------------------------- *
    3722 * @brief initialize
    3823 ** ------------------------------------------------------------------------------------------------------------- */
    39 void CarryManager::initialize(PabloBlock * pb, KernelBuilder * kBuilder) {
    40     mRootScope = pb;
    41     mCarryInfoVector.resize(doScopeCount(pb));
     24void CarryManager::initialize(PabloFunction * const function, KernelBuilder * const kBuilder) {
     25    mRootScope = function->getEntryBlock();
     26    mCarryInfoVector.resize(mRootScope->enumerateScopes(0) + 1);
    4227    mCarryPackType = mBitBlockType;
    43 
    44     const unsigned totalCarryDataSize = enumerate(pb, 0, 0);
    45 
     28    const unsigned totalCarryDataSize = std::max<unsigned>(enumerate(mRootScope, 0, 0), 1);
    4629    mCarryPackPtr.resize(totalCarryDataSize, nullptr);
    4730    mCarryInPack.resize(totalCarryDataSize, nullptr);
    4831    mCarryOutPack.resize(totalCarryDataSize, nullptr);
    49 
    5032    mTotalCarryDataBitBlocks = totalCarryDataSize;
    51    
    5233    ArrayType* cdArrayTy = ArrayType::get(mBitBlockType, mTotalCarryDataBitBlocks);
    53     mCdArrayIdx = kBuilder->extendKernelInternalStateType(cdArrayTy);
    54    
     34    mCdArrayIdx = kBuilder->addInternalStateType(cdArrayTy);
    5535    if (mPabloCountCount > 0) {
    5636        ArrayType* pcArrayTy = ArrayType::get(iBuilder->getIntNTy(64), mPabloCountCount);
    57         mPcArrayIdx = kBuilder->extendKernelInternalStateType(pcArrayTy);
    58     }
    59  
     37        mPcArrayIdx = kBuilder->addInternalStateType(pcArrayTy);
     38    }
    6039    mCurrentScope = mRootScope;
    6140    mCurrentFrameIndex = 0;
     
    6443}
    6544
     45/** ------------------------------------------------------------------------------------------------------------- *
     46 * @brief initialize_setPtrs
     47 ** ------------------------------------------------------------------------------------------------------------- */
    6648void CarryManager::initialize_setPtrs(KernelBuilder * kBuilder) {
    67 
    68     Value * kernelStuctParam = kBuilder->getKernelStructParam();
    69     Value * cdArrayPtr = kBuilder->getKernelInternalStatePtr(kernelStuctParam, mCdArrayIdx);
    70  
     49    Value * cdArrayPtr = kBuilder->getInternalState(mCdArrayIdx);
    7150    mCarryPackBasePtr = iBuilder->CreateBitCast(cdArrayPtr, PointerType::get(mCarryPackType, 0));
    72     mCarryBitBlockPtr = iBuilder->CreateBitCast(cdArrayPtr, PointerType::get(mBitBlockType, 0));   
    73    
     51    mCarryBitBlockPtr = iBuilder->CreateBitCast(cdArrayPtr, PointerType::get(mBitBlockType, 0));
    7452    if (mPabloCountCount > 0) {
    75         Value * pcArrayPtr = kBuilder->getKernelInternalStatePtr(kernelStuctParam, mPcArrayIdx);
     53        Value * pcArrayPtr = kBuilder->getInternalState(mPcArrayIdx);
    7654        mPopcountBasePtr = iBuilder->CreateBitCast(pcArrayPtr, Type::getInt64PtrTy(iBuilder->getContext()));
    7755    }
    78  
    79     mBlockNo = iBuilder->CreateUDiv(kBuilder->getKernelInternalState(kernelStuctParam, mFilePosIdx), iBuilder->getInt64(mBitBlockWidth));
     56    setBlockNo(kBuilder);
    8057    mCurrentScope = mRootScope;
    8158    mCurrentFrameIndex = 0;
     
    8461}
    8562
    86 void CarryManager::set_BlockNo(KernelBuilder * kBuilder){
    87     Value * kernelStuctParam = kBuilder->getKernelStructParam();
    88     mBlockNo = iBuilder->CreateUDiv(kBuilder->getKernelInternalState(kernelStuctParam, mFilePosIdx), iBuilder->getInt64(mBitBlockWidth));
     63/** ------------------------------------------------------------------------------------------------------------- *
     64 * @brief setBlockNo
     65 ** ------------------------------------------------------------------------------------------------------------- */
     66void CarryManager::setBlockNo(KernelBuilder * kBuilder) {
     67    mBlockNo = iBuilder->CreateUDiv(iBuilder->CreateBlockAlignedLoad(kBuilder->getInternalState(mFilePosIdx)), iBuilder->getInt64(mBitBlockWidth));
    8968}
    9069
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.h

    r4941 r4959  
    4848    , mCarryBitBlockPtr(nullptr)
    4949    , mPopcountBasePtr(nullptr)
    50     , mBlockNoPtr(nullptr)
    5150    , mBlockNo(nullptr)
    5251    , mPabloCountCount(0)
     
    6059    ~CarryManager();
    6160   
    62     void initialize(PabloBlock * blk, KernelBuilder * kBuilder);
     61    void initialize(PabloFunction * const function, KernelBuilder * const kBuilder);
    6362
    64     void initialize_setPtrs(KernelBuilder * kBuilder);
     63    void initialize_setPtrs(KernelBuilder * const kBuilder);
    6564
    66     void set_BlockNo(KernelBuilder * kBuilder);
     65    void setBlockNo(KernelBuilder * kBuilder);
     66    Value * getBlockNo() const;
    6767   
    6868    unsigned enumerate(PabloBlock * blk, unsigned ifDepth, unsigned whileDepth);
    69        
    70     Value * getBlockNoPtr() const;
    71    
     69         
    7270    /* Entering and leaving scopes. */
    7371   
     
    145143    Value * mCarryBitBlockPtr;
    146144    Value * mPopcountBasePtr;
    147     Value * mBlockNoPtr;
    148145    Value * mBlockNo;
    149146    unsigned mPabloCountCount; // Number of Pablo "Count" operations
     
    166163}
    167164
    168 inline Value * CarryManager::getBlockNoPtr() const {
    169     return mBlockNoPtr;
     165inline Value * CarryManager::getBlockNo() const {
     166    return mBlockNo;
    170167}
    171168
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.cpp

    r4896 r4959  
    4444/// UNARY CREATE FUNCTIONS
    4545
    46 Assign * PabloBlock::createAssign(const std::string && prefix, PabloAST * expr)  {
     46Assign * PabloBlock::createAssign(const std::string && prefix, PabloAST * const expr)  {
     47    assert ("Assign expression cannot be null!" && expr);
    4748    return insertAtInsertionPoint(new Assign(expr, makeName(prefix, false)));
    4849}
     
    7273    if (isa<Zeroes>(expr) || shiftAmount == 0) {
    7374        return renameNonNamedNode(expr, std::move(prefix));
    74     }   
     75    }
    7576    return insertAtInsertionPoint(new Advance(expr, getInteger(shiftAmount), makeName(prefix, false)));
     77}
     78
     79PabloAST * PabloBlock::createLookahead(PabloAST * expr, PabloAST * shiftAmount) {
     80    if (isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0) {
     81        return expr;
     82    }
     83    return insertAtInsertionPoint(new Lookahead(expr, shiftAmount, makeName("lookahead")));
     84}
     85
     86PabloAST * PabloBlock::createLookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix) {
     87    if (isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0) {
     88        return expr;
     89    }
     90    return insertAtInsertionPoint(new Lookahead(expr, shiftAmount, makeName(prefix, false)));
     91}
     92
     93PabloAST * PabloBlock::createLookahead(PabloAST * expr, const Integer::Type shiftAmount) {
     94    if (isa<Zeroes>(expr) || shiftAmount == 0) {
     95        return expr;
     96    }
     97    return insertAtInsertionPoint(new Lookahead(expr, getInteger(shiftAmount), makeName("lookahead")));
     98}
     99
     100PabloAST * PabloBlock::createLookahead(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix) {
     101    if (isa<Zeroes>(expr) || shiftAmount == 0) {
     102        return renameNonNamedNode(expr, std::move(prefix));
     103    }
     104    return insertAtInsertionPoint(new Lookahead(expr, getInteger(shiftAmount), makeName(prefix, false)));
    76105}
    77106
     
    141170    }   
    142171    return insertAtInsertionPoint(new Mod64Advance(expr, getInteger(shiftAmount), makeName(prefix, false)));
     172}
     173
     174PabloAST * PabloBlock::createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount) {
     175    if (isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0) {
     176        return expr;
     177    }
     178    return insertAtInsertionPoint(new Mod64Lookahead(expr, shiftAmount, makeName("advance")));
     179}
     180
     181PabloAST * PabloBlock::createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix) {
     182    if (isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0) {
     183        return expr;
     184    }
     185    return insertAtInsertionPoint(new Mod64Lookahead(expr, shiftAmount, makeName(prefix, false)));
     186}
     187
     188PabloAST * PabloBlock::createMod64Lookahead(PabloAST * expr, const Integer::Type shiftAmount) {
     189    if (isa<Zeroes>(expr) || shiftAmount == 0) {
     190        return expr;
     191    }
     192    return insertAtInsertionPoint(new Mod64Lookahead(expr, getInteger(shiftAmount), makeName("advance")));
     193}
     194
     195PabloAST * PabloBlock::createMod64Lookahead(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix) {
     196    if (isa<Zeroes>(expr) || shiftAmount == 0) {
     197        return renameNonNamedNode(expr, std::move(prefix));
     198    }
     199    return insertAtInsertionPoint(new Mod64Lookahead(expr, getInteger(shiftAmount), makeName(prefix, false)));
    143200}
    144201
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.h

    r4927 r4959  
    1111#include <pablo/symbol_generator.h>
    1212#include <pablo/pe_advance.h>
     13#include <pablo/pe_lookahead.h>
    1314#include <pablo/pe_and.h>
    1415#include <pablo/pe_call.h>
     
    7172    PabloAST * createAdvance(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
    7273
     74    PabloAST * createLookahead(PabloAST * expr, const Integer::Type shiftAmount);
     75
     76    PabloAST * createLookahead(PabloAST * expr, PabloAST * shiftAmount);
     77
     78    PabloAST * createLookahead(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix);
     79
     80    PabloAST * createLookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
     81
    7382    static inline Zeroes * createZeroes() {
    7483        return &mZeroes;
     
    93102    }
    94103
    95     Assign * createAssign(const std::string && prefix, PabloAST * expr);
     104    Assign * createAssign(const std::string && prefix, PabloAST * const expr);
    96105
    97106    inline Var * createVar(const std::string name) {
     
    192201
    193202    PabloAST * createMod64Advance(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
     203
     204    PabloAST * createMod64Lookahead(PabloAST * expr, const Integer::Type shiftAmount);
     205
     206    PabloAST * createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount);
     207
     208    PabloAST * createMod64Lookahead(PabloAST * expr, const Integer::Type shiftAmount, const std::string prefix);
     209
     210    PabloAST * createMod64Lookahead(PabloAST * expr, PabloAST * shiftAmount, const std::string prefix);
    194211
    195212    PabloAST * createMod64MatchStar(PabloAST * marker, PabloAST * charclass);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4942 r4959  
    166166bool MultiplexingPass::optimize(PabloFunction & function, const bool independent) {
    167167
     168    if (LLVM_UNLIKELY(Samples < 1)) {
     169        return false;
     170    }
     171
     172
    168173    LOG("Seed:                    " << Seed);
    169174
     
    395400        for (unsigned j = 0; j < i; ++j) {
    396401            if (G(i, j)) {
    397                 add_edge(j, i, mConstraintGraph);
     402                add_edge(j, i, true, mConstraintGraph);
    398403            }
    399404        }
    400405        for (unsigned j = i + 1; j < advances; ++j) {
    401406            if (G(i, j)) {
    402                 add_edge(j, i, mConstraintGraph);
     407                add_edge(j, i, true, mConstraintGraph);
    403408            }
    404409        }
     
    607612
    608613    BDD Ak = bdd_ithvar(mVariables++);
    609     const BDD Nk = bdd_addref(bdd_not(Ak));
     614    const BDD Nk = bdd_addref(bdd_not(Ak));   
    610615    for (unsigned i = 0; i != k; ++i) {
    611616        if (unconstrained[i]) {
     
    627632            }
    628633        }
    629         add_edge(i, k, mConstraintGraph);
     634        add_edge(i, k, false, mConstraintGraph);
    630635    }
    631636    // To minimize the number of BDD computations, we store the negated variable instead of negating it each time.
     
    639644inline bool MultiplexingPass::independent(const ConstraintVertex i, const ConstraintVertex j) const {
    640645    assert (i < num_vertices(mConstraintGraph) && j < num_vertices(mConstraintGraph));
    641     return (mConstraintGraph.get_edge(i, j) == 0);
     646    return mConstraintGraph.get_edge(i, j).first == false;
    642647}
    643648
     
    719724    mCandidateGraph = CandidateGraph(num_vertices(mConstraintGraph));
    720725
    721     for (unsigned iteration = 0; iteration < Samples; ++iteration) {
     726    for (unsigned r = Samples; r; --r) {
    722727
    723728        // Push all source nodes into the (initial) independent set S
     
    768773    }
    769774
     775    #ifdef PRINT_DEBUG_OUTPUT
     776    const auto n = num_vertices(mConstraintGraph);
     777    const auto m = num_vertices(mCandidateGraph);
     778    unsigned sets = 0;
     779    for (auto i = n; i < m; ++i) {
     780        if (degree(i, mCandidateGraph) > 0) {
     781            ++sets;
     782        }
     783    }
     784    LOG("Unique Candidate Sets:    " << (sets));
     785    #endif
     786
    770787    return num_vertices(mCandidateGraph) > num_vertices(mConstraintGraph);
    771788}
     
    912929    const size_t n = num_vertices(mCandidateGraph) - m;
    913930
    914     degree_t remaining[n];
    915     vertex_t chosen_set[m];
    916 
    917     for (unsigned i = 0; i != n; ++i) {
    918         remaining[i] = degree(i + m, mCandidateGraph);
    919     }
    920     for (unsigned i = 0; i != m; ++i) {
    921         chosen_set[i] = 0;
    922     }
     931    std::vector<bool> chosen(n, false);
    923932
    924933    for (;;) {
    925934
    926935        // Choose the set with the greatest number of vertices not already included in some other set.
    927         vertex_t k = 0;
     936        vertex_t u = 0;
    928937        degree_t w = 0;
    929938        for (vertex_t i = 0; i != n; ++i) {
    930             degree_t r = remaining[i];
    931             if (r > 2) { // if this set has at least 3 elements.
     939            if (chosen[i]) continue;
     940            const auto t = i + m;
     941            degree_t r = degree(t, mCandidateGraph);
     942            if (LLVM_LIKELY(r >= 3)) { // if this set has at least 3 elements.
    932943                r *= r;
    933944                AdjIterator begin, end;
    934                 std::tie(begin, end) = adjacent_vertices(i + m, mCandidateGraph);
     945                std::tie(begin, end) = adjacent_vertices(t, mCandidateGraph);
    935946                for (auto ei = begin; ei != end; ++ei) {
    936947                    for (auto ej = ei; ++ej != end; ) {
     
    941952                }
    942953                if (w < r) {
    943                     k = i;
     954                    u = t;
    944955                    w = r;
    945956                }
     957            } else if (r) {
     958                clear_vertex(t, mCandidateGraph);
    946959            }
    947960        }
    948961
    949962        // Multiplexing requires 3 or more elements; if no set contains at least 3, abort.
    950         if (w == 0) {
     963        if (LLVM_UNLIKELY(w == 0)) {
    951964            break;
    952965        }
    953966
    954         for (const auto u : make_iterator_range(adjacent_vertices(k + m, mCandidateGraph))) {
    955             if (chosen_set[u] == 0) {
    956                 chosen_set[u] = (k + m);
    957                 for (const auto v : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) {
    958                     assert (v >= m);
    959                     remaining[v - m]--;
    960                 }
    961             }
    962         }
    963 
    964         assert (remaining[k] == 0);
     967        chosen[u - m] = true;
    965968
    966969        // If this contains 2^n elements for any n, discard the member that is most likely to be added
    967970        // to some future set.
    968         if (LLVM_UNLIKELY(is_power_of_2(w))) {
    969             vertex_t j = 0;
     971        if (LLVM_UNLIKELY(is_power_of_2(degree(u, mCandidateGraph)))) {
     972            vertex_t x = 0;
    970973            degree_t w = 0;
    971             for (vertex_t i = 0; i != m; ++i) {
    972                 if (chosen_set[i] == (k + m)) {
    973                     degree_t r = 1;
    974                     for (const auto u : make_iterator_range(adjacent_vertices(i, mCandidateGraph))) {
    975                         // strongly prefer adding weight to unvisited sets that have more remaining vertices
    976                         r += std::pow(remaining[u - m], 2);
    977                     }
    978                     if (w < r) {
    979                         j = i;
    980                         w = r;
    981                     }
    982                 }
    983             }
    984             assert (w > 0);
    985             chosen_set[j] = 0;
    986             for (const auto u : make_iterator_range(adjacent_vertices(j, mCandidateGraph))) {
    987                 assert (u >= m);
    988                 remaining[u - m]++;
    989             }
    990         }
    991 
    992         // If Samples > 1 then our candidate sets were generated by more than one traversal through the constraint graph.
    993         // Sets generated by differing traversals may generate a cycle in the AST if multiplex even when they are not
    994         // multiplexed together. For example,
    995 
    996         // Assume we're multiplexing set {A,B,C} and {D,E,F} and that no constraint exists between any nodes in
    997         // either set. If A is dependent on D and E is dependent on B, multiplexing both sets would result in a cycle
    998         // in the AST. To fix this, we'd have to remove A, D, B or E.
    999 
    1000         // This cannot occur with only one traversal (or between sets generated by the same traversal) because of the
    1001         // DAG traversal strategy used in "generateCandidateSets".
    1002 
    1003 
    1004     }
    1005 
    1006     for (unsigned i = 0; i != m; ++i) {
    1007         AdjIterator ei, ei_end;
    1008         std::tie(ei, ei_end) = adjacent_vertices(i, mCandidateGraph);
    1009         for (auto next = ei; ei != ei_end; ei = next) {
    1010             ++next;
    1011             if (*ei != chosen_set[i]) {
    1012                 remove_edge(i, *ei, mCandidateGraph);
    1013             }
     974            for (const auto v : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) {
     975                if (degree(v, mCandidateGraph) > w) {
     976                    x = v;
     977                    w = degree(v, mCandidateGraph);
     978                }
     979            }
     980            remove_edge(u, x, mCandidateGraph);
     981        }
     982
     983        AdjIterator begin, end;
     984        std::tie(begin, end) = adjacent_vertices(u, mCandidateGraph);
     985        for (auto vi = begin; vi != end; ) {
     986            const auto v = *vi++;
     987            clear_vertex(v, mCandidateGraph);
     988            add_edge(v, u, mCandidateGraph);
     989        }
     990
     991        if (Samples > 1) {
     992            removePotentialCycles(u);
    1014993        }
    1015994    }
     
    10601039
    10611040
     1041}
     1042
     1043/** ------------------------------------------------------------------------------------------------------------- *
     1044 * @brief removePotentialCycles
     1045 *
     1046 * If Samples > 1, our candidate sets were generated by more than one traversal through the constraint DAG.
     1047 * Multiplexing disjoint sets generated by differing traversals can induce a cycle in the AST. For example,
     1048 * suppose sets {A,B} and {C,D} and A is dependent on C and D on B; multiplexing both will result in a cycle.
     1049 *
     1050 * Eliminating all potential cycles will likely lead to the removal of many candidate sets. Instead we "fix"
     1051 * the candidate sets after the selection of a particular candidate set.
     1052 ** ------------------------------------------------------------------------------------------------------------- */
     1053void MultiplexingPass::removePotentialCycles(const CandidateGraph::vertex_descriptor i) {
     1054
     1055    using AdjIterator = graph_traits<CandidateGraph>::adjacency_iterator;
     1056
     1057    const auto m = num_vertices(mConstraintGraph);
     1058    const auto n = num_vertices(mCandidateGraph);
     1059
     1060    // Suppose we construct a graph G that indicates whether selecting candidate set V will induce a cycle, given
     1061    // that we've already chosen candidate set U. This can occur here only because some elements of V are dependent
     1062    // on U and vice versa.
     1063
     1064    // We want the minimal minimum weight feedback arc set of G; however, we also know any edge will either have
     1065    //
     1066
     1067    for (auto j = m; j < n; ++j) {
     1068        if (LLVM_UNLIKELY(i == j)) continue;
     1069        AdjIterator begin, end;
     1070        std::tie(begin, end) = adjacent_vertices(j, mCandidateGraph);
     1071        for (auto ui = begin; ui != end; )  {
     1072            const auto u = *ui++;
     1073            unsigned outgoing = 0;
     1074            unsigned incoming = 0;
     1075            for (auto v : make_iterator_range(adjacent_vertices(i, mCandidateGraph)))  {
     1076                if (dependent(u, v)) {
     1077                    ++outgoing;
     1078                } else if (dependent(v, u)) {
     1079                    ++incoming;
     1080                }
     1081            }
     1082            if (LLVM_UNLIKELY(outgoing > 0 && incoming > 0)) {
     1083                remove_edge(j, u, mCandidateGraph);
     1084            }
     1085        }
     1086    }
     1087}
     1088
     1089/** ------------------------------------------------------------------------------------------------------------- *
     1090 * @brief dependent
     1091 ** ------------------------------------------------------------------------------------------------------------- */
     1092inline bool MultiplexingPass::dependent(const ConstraintVertex i, const ConstraintVertex j) const {
     1093    const auto e = mConstraintGraph.get_edge(i, j);
     1094    return (e.second && e.first);
    10621095}
    10631096
     
    12671300                    work = 2;
    12681301                    break;
    1269 //                case TypeId::Not:
     1302                case TypeId::Not:
    12701303                case TypeId::Assign:
    12711304                case TypeId::Next:
     
    13661399            bool ready = true;
    13671400            const auto v = target(ei, G);
     1401            assert (rank[v] != 0);
    13681402            for (auto ej : make_iterator_range(in_edges(v, G))) {
    13691403                if (rank[source(ej, G)] != 0) {
     
    13731407            }
    13741408            if (ready) {
    1375                 assert (rank[v] != 0);
    13761409                readySet.insert(std::lower_bound(readySet.begin(), readySet.end(), v, by_nonincreasing_rank), v);
    13771410                assert (std::is_sorted(readySet.cbegin(), readySet.cend(), by_nonincreasing_rank));
     
    16061639
    16071640    #ifndef NDEBUG
    1608     std::vector<typename Graph::vertex_descriptor> nothing;
     1641    std::vector<Vertex> nothing;
    16091642    topological_sort(G, std::back_inserter(nothing));
    16101643    #endif
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp

    r4937 r4959  
    2525    using CharacterizationMap = llvm::DenseMap<const PabloAST *, BDD>;
    2626
    27     using ConstraintGraph = boost::adjacency_matrix<boost::directedS>;
     27    using ConstraintGraph = boost::adjacency_matrix<boost::directedS, boost::no_property, bool>;
    2828    using ConstraintVertex = ConstraintGraph::vertex_descriptor;
    2929    using Constraints = std::vector<ConstraintVertex>;
     
    4242
    4343    using AdvanceVector = std::vector<Advance *>;
    44     using AdvanceDepth = std::vector<int>;
     44    using AdvanceRank = std::vector<int>;
    4545    using AdvanceVariable = std::vector<BDD>;
    4646
     
    7878    void selectMultiplexSetsWorkingSet();
    7979
     80    void removePotentialCycles(const CandidateGraph::vertex_descriptor u);
     81    bool dependent(const ConstraintVertex i, const ConstraintVertex j) const;
     82
    8083    void eliminateSubsetConstraints();
    8184    void doTransitiveReductionOfSubsetGraph();
     
    109112    ConstraintGraph             mConstraintGraph;   
    110113    AdvanceVector               mAdvance;
    111     AdvanceDepth                mAdvanceRank;
     114    AdvanceRank                 mAdvanceRank;
    112115    AdvanceVariable             mAdvanceNegatedVariable;
    113116    SubsetGraph                 mSubsetGraph;
    114117    CliqueGraph                 mUsageGraph;
    115     CandidateGraph           mCandidateGraph;
     118    CandidateGraph              mCandidateGraph;
    116119};
    117120
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r4937 r4959  
    525525
    526526/** ------------------------------------------------------------------------------------------------------------- *
     527 * @brief unused
     528 ** ------------------------------------------------------------------------------------------------------------- */
     529inline static bool unused(const Statement * const stmt) {
     530    if (LLVM_UNLIKELY(stmt->getNumUses() == 0)) {
     531        // TODO: prototypes ought to state whether they have side effects.
     532        if (LLVM_UNLIKELY(isa<Call>(stmt) && cast<Call>(stmt)->getPrototype()->getNumOfResults() == 0)) {
     533            return false;
     534        }
     535        return true;
     536    }
     537    return false;
     538}
     539
     540/** ------------------------------------------------------------------------------------------------------------- *
    527541 * @brief deadCodeElimination
    528542 ** ------------------------------------------------------------------------------------------------------------- */
     
    530544    Statement * stmt = block->front();
    531545    while (stmt) {
    532         if (isa<If>(stmt)) {
     546        if (LLVM_UNLIKELY(isa<If>(stmt))) {
    533547            deadCodeElimination(cast<If>(stmt)->getBody());
    534         } else if (isa<While>(stmt)) {
     548        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    535549            deadCodeElimination(cast<While>(stmt)->getBody());
    536         } else if (stmt->getNumUses() == 0){
     550        } else if (LLVM_UNLIKELY(unused(stmt))){
    537551            stmt = stmt->eraseFromParent(true);
    538552            continue;
     
    562576                if (LLVM_UNLIKELY(op->getNumUses() == 1)) {
    563577                    adv->setOperand(0, op->getOperand(0));
    564                     adv->setOperand(1, block->getInteger(adv->getAdvanceAmount() + op->getAdvanceAmount()));
     578                    adv->setOperand(1, block->getInteger(adv->getAmount() + op->getAmount()));
    565579                    op->eraseFromParent(false);
    566580                }
     
    573587                if (LLVM_UNLIKELY(op->getNumUses() == 1)) {
    574588                    block->setInsertPoint(scanThru->getPrevNode());
    575                     PabloAST * expr = block->createAdvance(op->getOperand(0), op->getAdvanceAmount() - 1);
     589                    PabloAST * expr = block->createAdvance(op->getOperand(0), op->getAmount() - 1);
    576590                    scanThru->setOperand(0, expr);
    577591                    scanThru->setOperand(1, block->createOr(scanThru->getOperand(1), expr));
  • icGREP/icgrep-devel/icgrep/pablo/pabloAST.h

    r4922 r4959  
    6666        , Advance
    6767        , ScanThru
     68        , Lookahead
    6869        , MatchStar
    6970        // Mod 64 approximate stream operations
    7071        , Mod64Advance
    7172        , Mod64ScanThru
     73        , Mod64Lookahead
    7274        , Mod64MatchStar
    7375        // Statistics operations
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4945 r4959  
    7171, mBitBlockType(b->getBitBlockType())
    7272, mCarryManager(nullptr)
    73 , mInputType(nullptr)
     73, mPabloFunction(nullptr)
     74, mPabloBlock(nullptr)
    7475, mKBuilder(nullptr)
    7576, mWhileDepth(0)
    7677, mIfDepth(0)
    7778, mFunction(nullptr)
    78 , mInputAddressPtr(nullptr)
    79 , mOutputAddressPtr(nullptr)
    8079, mMaxWhileDepth(0)
    8180, mFilePosIdx(2) {
     
    8382}
    8483
    85 PabloCompiler::~PabloCompiler() {
    86 }
    87  
    8884void PabloCompiler::setKernel(KernelBuilder * kBuilder){
    8985    mKBuilder = kBuilder;   
     
    9692    #endif
    9793 
    98     PabloBlock * const mainScope = function->getEntryBlock();
    99 
    100     mainScope->enumerateScopes(0);
    101    
    10294    Examine(*function);
    10395
    10496    mCarryManager = new CarryManager(iBuilder);
    10597
    106     GenerateKernel(mainScope, function);
     98    GenerateKernel(function);
    10799       
    108100    delete mCarryManager;
     
    113105    std::cerr << "PABLO COMPILATION TIME: " << (pablo_compilation_end - pablo_compilation_start) << std::endl;
    114106    #endif
    115 
    116107
    117108    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
     
    127118
    128119    #ifndef NDEBUG
    129     raw_os_ostream err(std::cerr);
    130     verifyModule(*mMod, &err);
     120    verifyModule(*mMod, &errs());
    131121    #endif
    132122
     
    134124}
    135125
    136 inline void PabloCompiler::GenerateKernel(PabloBlock * mainScope, PabloFunction * function) {
     126inline void PabloCompiler::GenerateKernel(PabloFunction * const function) {
    137127 
    138     for(int i=0; i<8; i++){
    139         mKBuilder->addKernelInputStream(1, "basis_bits");
    140     }
    141     mKBuilder->addKernelOutputStream(1);
    142     mKBuilder->addKernelOutputStream(1);
    143 
    144     mCarryManager->initialize(mainScope, mKBuilder);
    145  
    146     int segBlocks = mKBuilder->getSegmentBlocks();
    147     mKBuilder->PrepareDoBlockFunction();
    148     struct Inputs inputs = mKBuilder->openDoBlock();
    149     struct Outputs outputs;
     128    mPabloFunction = function;
     129
     130    for (unsigned i = 0; i < function->getNumOfParameters(); ++i) {
     131        mKBuilder->addInputStream(1, function->getParameter(i)->getName()->to_string());
     132    }
     133    for (unsigned i = 0; i < function->getNumOfResults(); ++i) {
     134        mKBuilder->addOutputStream(1);
     135    }
     136
     137    mCarryManager->initialize(function, mKBuilder);
     138
     139    mKBuilder->prepareFunction();
     140
    150141    mFunction = mKBuilder->getDoBlockFunction();
    151     Value * kernelStuctParam = mKBuilder->getKernelStructParam();
    152142
    153143    mCarryManager->initialize_setPtrs(mKBuilder);
    154144
    155     valptr results[segBlocks][2];
    156     for(int j=0; j<segBlocks; j++){     
    157         for(int i=0; i<inputs.streams[j].size(); i++){
    158             mMarkerMap[function->getParameter(i)] = inputs.streams[j][i];
    159         }
    160 
    161         compileBlock(mainScope);
    162 
    163         Value * filePos = iBuilder->CreateAdd(mKBuilder->getKernelInternalState(kernelStuctParam, mFilePosIdx), iBuilder->getInt64(iBuilder->getBitBlockWidth()));
    164         mKBuilder->changeKernelInternalState(kernelStuctParam, mFilePosIdx, filePos);
    165 
    166         mCarryManager->set_BlockNo(mKBuilder);
    167 
    168         results[j][0] = mMarkerMap[function->getResult(0)];
    169         results[j][1] = mMarkerMap[function->getResult(1)];
    170         outputs.streams.push_back(results[j]);
     145    for(unsigned i = 0; i < mKBuilder->getSegmentBlocks(); i++){
     146
     147        for (unsigned j = 0; j < function->getNumOfParameters(); ++j) {
     148            mMarkerMap.insert(std::make_pair(function->getParameter(j), mKBuilder->getInputStream(j)));
     149        }
     150
     151        compileBlock(function->getEntryBlock());
     152
     153        Value * filePos = mKBuilder->getInternalState(mFilePosIdx);
     154        filePos = iBuilder->CreateBlockAlignedLoad(filePos);
     155        filePos = iBuilder->CreateAdd(filePos, iBuilder->getInt64(iBuilder->getBitBlockWidth()));
     156        mKBuilder->setInternalState(mFilePosIdx, filePos);
     157
     158        mCarryManager->setBlockNo(mKBuilder);
     159
     160        for (unsigned j = 0; j < function->getNumOfResults(); ++j) {
     161            const auto f = mMarkerMap.find(function->getResult(j));
     162            Value * result = nullptr;
     163            if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
     164                result = iBuilder->allZeroes();
     165            } else {
     166                result = f->second;
     167            }
     168            iBuilder->CreateBlockAlignedStore(result, mKBuilder->getOutputStream(j));
     169        }
     170
     171        mMarkerMap.clear();
     172
     173        mKBuilder->increment();
    171174    }   
    172175
    173     mKBuilder->closeDoBlock(outputs);
    174     mKBuilder->finalizeMethods();
    175 }
    176 
    177 inline void PabloCompiler::GenerateFunction(PabloFunction & function) {
    178     mInputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>(function.getNumOfParameters(), mBitBlockType)), 0);
    179     Type * outputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>(function.getNumOfResults(), mBitBlockType)), 0);
    180     FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), std::vector<Type *>({mInputType, outputType}), false);
    181 
    182 
    183     //Starts on process_block
    184     SmallVector<AttributeSet, 3> Attrs;
    185     Attrs.push_back(AttributeSet::get(mMod->getContext(), ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
    186     Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, std::vector<Attribute::AttrKind>({ Attribute::ReadOnly, Attribute::NoCapture })));
    187     Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, std::vector<Attribute::AttrKind>({ Attribute::ReadNone, Attribute::NoCapture })));
    188     AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
    189 
    190     // Create the function that will be generated.
    191     mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, function.getName()->value(), mMod);
    192     mFunction->setCallingConv(CallingConv::C);
    193     mFunction->setAttributes(AttrSet);
    194 
    195     Function::arg_iterator args = mFunction->arg_begin();
    196     mInputAddressPtr = args++;
    197     mInputAddressPtr->setName("input");
    198     mOutputAddressPtr = args++;
    199     mOutputAddressPtr->setName("output");
     176    mKBuilder->finalize();
    200177}
    201178
     
    205182    mMaxWhileDepth = 0;
    206183    Examine(function.getEntryBlock());
    207     if (LLVM_UNLIKELY(mWhileDepth != 0 || mIfDepth != 0)) {
    208         throw std::runtime_error("Malformed Pablo AST: Unbalanced If or While nesting depth!");
    209     }
    210184}
    211185
     
    213187void PabloCompiler::Examine(PabloBlock * block) {
    214188    for (Statement * stmt : *block) {
    215         if (If * ifStatement = dyn_cast<If>(stmt)) {
    216             Examine(ifStatement->getBody());
    217         }
    218         else if (While * whileStatement = dyn_cast<While>(stmt)) {
     189        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     190            Examine(cast<If>(stmt)->getBody());
     191        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    219192            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
    220             Examine(whileStatement->getBody());
     193            Examine(cast<While>(stmt)->getBody());
    221194            --mWhileDepth;
    222195        }
     
    368341    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
    369342        expr = compileExpression(assign->getExpression());
    370     }
    371     else if (const Next * next = dyn_cast<const Next>(stmt)) {
     343    } else if (const Next * next = dyn_cast<const Next>(stmt)) {
    372344        expr = compileExpression(next->getExpr());
    373     }
    374     else if (const If * ifStatement = dyn_cast<const If>(stmt)) {
     345    } else if (const If * ifStatement = dyn_cast<const If>(stmt)) {
    375346        compileIf(ifStatement);
    376347        return;
    377     }
    378     else if (const While * whileStatement = dyn_cast<const While>(stmt)) {
     348    } else if (const While * whileStatement = dyn_cast<const While>(stmt)) {
    379349        compileWhile(whileStatement);
    380350        return;
    381     }
    382     else if (const Call* call = dyn_cast<Call>(stmt)) {
    383         // Call the callee once and store the result in the marker map.
    384         if (mMarkerMap.count(call)) {
    385             return;
    386         }
    387 
    388         const Prototype * proto = call->getPrototype();
    389         const String * callee = proto->getName();
    390 
    391         Type * inputType = StructType::get(mMod->getContext(), std::vector<Type *>{proto->getNumOfParameters(), mBitBlockType});
    392         Type * outputType = StructType::get(mMod->getContext(), std::vector<Type *>{proto->getNumOfResults(), mBitBlockType});
    393         FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), std::vector<Type *>{PointerType::get(inputType, 0), PointerType::get(outputType, 0)}, false);
    394 
    395         //Starts on process_block
    396         SmallVector<AttributeSet, 3> Attrs;
    397         Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, std::vector<Attribute::AttrKind>({ Attribute::ReadOnly, Attribute::NoCapture })));
    398         Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, std::vector<Attribute::AttrKind>({ Attribute::ReadNone, Attribute::NoCapture })));
    399         AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
    400 
    401         Function * externalFunction = cast<Function>(mMod->getOrInsertFunction(callee->value(), functionType, AttrSet));
    402         if (LLVM_UNLIKELY(externalFunction == nullptr)) {
    403             throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
    404         }
    405         externalFunction->setCallingConv(llvm::CallingConv::C);
    406 
    407 
    408         AllocaInst * outputStruct = iBuilder->CreateAlloca(outputType);
    409         iBuilder->CreateCall2(externalFunction, mInputAddressPtr, outputStruct);
    410         Value * outputPtr = iBuilder->CreateGEP(outputStruct, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0) }));
    411         expr = iBuilder->CreateAlignedLoad(outputPtr, iBuilder->getBitBlockWidth() / 8, false);
    412     }
    413     else if (const And * pablo_and = dyn_cast<And>(stmt)) {
     351//    } else if (const Call* call = dyn_cast<Call>(stmt)) {
     352//        // Call the callee once and store the result in the marker map.
     353//        if (LLVM_UNLIKELY(mMarkerMap.count(call) == 0)) {
     354//            return;
     355//        }
     356
     357//        const Prototype * proto = call->getPrototype();
     358//        const String * callee = proto->getName();
     359
     360//        Type * inputType = StructType::get(mMod->getContext(), std::vector<Type *>{proto->getNumOfParameters(), mBitBlockType});
     361//        Type * outputType = StructType::get(mMod->getContext(), std::vector<Type *>{proto->getNumOfResults(), mBitBlockType});
     362//        FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), std::vector<Type *>{PointerType::get(inputType, 0), PointerType::get(outputType, 0)}, false);
     363
     364//        //Starts on process_block
     365//        SmallVector<AttributeSet, 3> Attrs;
     366//        Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, std::vector<Attribute::AttrKind>({ Attribute::ReadOnly, Attribute::NoCapture })));
     367//        Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, std::vector<Attribute::AttrKind>({ Attribute::ReadNone, Attribute::NoCapture })));
     368//        AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
     369
     370//        Function * externalFunction = cast<Function>(mMod->getOrInsertFunction(callee->value(), functionType, AttrSet));
     371//        if (LLVM_UNLIKELY(externalFunction == nullptr)) {
     372//            throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
     373//        }
     374//        externalFunction->setCallingConv(llvm::CallingConv::C);
     375
     376//        AllocaInst * outputStruct = iBuilder->CreateAlloca(outputType);
     377//        iBuilder->CreateCall2(externalFunction, mInputAddressPtr, outputStruct);
     378//        Value * outputPtr = iBuilder->CreateGEP(outputStruct, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(0) }));
     379
     380//        expr = iBuilder->CreateAlignedLoad(outputPtr, iBuilder->getBitBlockWidth() / 8, false);
     381    } else if (const And * pablo_and = dyn_cast<And>(stmt)) {
    414382        expr = iBuilder->simd_and(compileExpression(pablo_and->getOperand(0)), compileExpression(pablo_and->getOperand(1)));
    415     }
    416     else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
     383    } else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
    417384        expr = iBuilder->simd_or(compileExpression(pablo_or->getOperand(0)), compileExpression(pablo_or->getOperand(1)));
    418     }
    419     else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
     385    } else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
    420386        expr = iBuilder->simd_xor(compileExpression(pablo_xor->getOperand(0)), compileExpression(pablo_xor->getOperand(1)));
    421     }
    422     else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
     387    } else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
    423388        Value* ifMask = compileExpression(sel->getCondition());
    424389        Value* ifTrue = iBuilder->simd_and(ifMask, compileExpression(sel->getTrueExpr()));
    425390        Value* ifFalse = iBuilder->simd_and(iBuilder->simd_not(ifMask), compileExpression(sel->getFalseExpr()));
    426391        expr = iBuilder->simd_or(ifTrue, ifFalse);
    427     }
    428     else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
     392    } else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
    429393        expr = iBuilder->simd_not(compileExpression(pablo_not->getExpr()));
    430     }
    431     else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
     394    } else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
    432395        Value * const strm_value = compileExpression(adv->getExpr());
    433         expr = mCarryManager->advanceCarryInCarryOut(adv->getLocalAdvanceIndex(), adv->getAdvanceAmount(), strm_value);
    434     }
    435     else if (const Mod64Advance * adv = dyn_cast<Mod64Advance>(stmt)) {
     396        expr = mCarryManager->advanceCarryInCarryOut(adv->getLocalIndex(), adv->getAmount(), strm_value);
     397    } else if (const Mod64Advance * adv = dyn_cast<Mod64Advance>(stmt)) {
    436398        Value * const strm_value = compileExpression(adv->getExpr());
    437         expr = iBuilder->simd_slli(64, strm_value, adv->getAdvanceAmount());
    438     }
    439     else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt)) {
     399        expr = iBuilder->simd_slli(64, strm_value, adv->getAmount());
     400    } else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt)) {
    440401        Value * const marker = compileExpression(mstar->getMarker());
    441402        Value * const cc = compileExpression(mstar->getCharClass());
     
    443404        Value * const sum = mCarryManager->addCarryInCarryOut(mstar->getLocalCarryIndex(), marker_and_cc, cc);
    444405        expr = iBuilder->simd_or(iBuilder->simd_xor(sum, cc), marker);
    445     }
    446     else if (const Mod64MatchStar * mstar = dyn_cast<Mod64MatchStar>(stmt)) {
     406    } else if (const Mod64MatchStar * mstar = dyn_cast<Mod64MatchStar>(stmt)) {
    447407        Value * const marker = compileExpression(mstar->getMarker());
    448408        Value * const cc = compileExpression(mstar->getCharClass());
     
    450410        Value * const sum = iBuilder->simd_add(64, marker_and_cc, cc);
    451411        expr = iBuilder->simd_or(iBuilder->simd_xor(sum, cc), marker);
    452     }
    453     else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt)) {
     412    } else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt)) {
    454413        Value * const  marker_expr = compileExpression(sthru->getScanFrom());
    455414        Value * const  cc_expr = compileExpression(sthru->getScanThru());
    456415        Value * const  sum = mCarryManager->addCarryInCarryOut(sthru->getLocalCarryIndex(), marker_expr, cc_expr);
    457416        expr = iBuilder->simd_and(sum, iBuilder->simd_not(cc_expr));
    458     }
    459     else if (const Mod64ScanThru * sthru = dyn_cast<Mod64ScanThru>(stmt)) {
     417    } else if (const Mod64ScanThru * sthru = dyn_cast<Mod64ScanThru>(stmt)) {
    460418        Value * const marker_expr = compileExpression(sthru->getScanFrom());
    461419        Value * const cc_expr = compileExpression(sthru->getScanThru());
    462420        Value * const sum = iBuilder->simd_add(64, marker_expr, cc_expr);
    463421        expr = iBuilder->simd_and(sum, iBuilder->simd_not(cc_expr));
    464     }
    465     else if (const Count * c = dyn_cast<Count>(stmt)) {
     422    } else if (const Count * c = dyn_cast<Count>(stmt)) {
    466423        Value * const to_count = compileExpression(c->getExpr());
    467424        expr = mCarryManager->popCount(to_count, c->getGlobalCountIndex());
     425    } else if (const Lookahead * l = dyn_cast<Lookahead>(stmt)) {
     426        PabloAST * const var = l->getExpr();
     427        if (LLVM_UNLIKELY(!isa<Var>(var))) {
     428            throw std::runtime_error("Lookahead input type must be a Var object");
     429        }
     430        Value * index = nullptr;
     431        for (unsigned i = 0; i < mPabloFunction->getNumOfParameters(); ++i) {
     432            if (mPabloFunction->getParameter(i) == var) {
     433                index = iBuilder->getInt32(i);
     434                break;
     435            }
     436        }
     437        if (LLVM_UNLIKELY(index == nullptr)) {
     438            throw std::runtime_error("Lookahead has an illegal Var operand");
     439        }
     440        Type * const streamType = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
     441        const unsigned offset = l->getAmount() / iBuilder->getBitBlockWidth();
     442        const unsigned shift = (l->getAmount() % iBuilder->getBitBlockWidth());
     443        Value * const b0 = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(mKBuilder->getInputStream(offset), index), streamType);
     444        Value * const b1 = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(mKBuilder->getInputStream(offset + 1), index), streamType);
     445        Value * result = iBuilder->CreateOr(iBuilder->CreateLShr(b0, shift), iBuilder->CreateShl(b1, iBuilder->getBitBlockWidth() - shift), "lookahead");
     446        expr = iBuilder->CreateBitCast(result, iBuilder->getBitBlockType());
    468447    } else {
    469448        std::string tmp;
     
    482461
    483462Value * PabloCompiler::compileExpression(const PabloAST * expr) {
    484     if (isa<Ones>(expr)) {
     463    if (LLVM_UNLIKELY(isa<Ones>(expr))) {
    485464        return iBuilder->allOnes();
    486     }
    487     else if (isa<Zeroes>(expr)) {
     465    } else if (LLVM_UNLIKELY(isa<Zeroes>(expr))) {
    488466        return iBuilder->allZeroes();
    489467    }
     
    497475        throw std::runtime_error(str.str());
    498476    }
    499     return f->second;
    500 }
    501 
    502 void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
    503     if (LLVM_UNLIKELY(marker == nullptr)) {
    504         throw std::runtime_error("Cannot set result " + std::to_string(index) + " to Null");
    505     }
    506     if (LLVM_UNLIKELY(marker->getType()->isPointerTy())) {
    507         marker = iBuilder->CreateAlignedLoad(marker, iBuilder->getBitBlockWidth()/8, false);
    508     }
    509     Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(index)};
    510     Value* gep = iBuilder->CreateGEP(mOutputAddressPtr, indices);
    511     if (marker->getType() != mBitBlockType) {
    512         marker = iBuilder->CreateBitCast(marker, mBitBlockType);
    513     }
    514     iBuilder->CreateAlignedStore(marker, gep, iBuilder->getBitBlockWidth()/8, false);
    515 }
    516 
    517 }
     477    Value * result = f->second;
     478    if (LLVM_UNLIKELY(isa<Var>(expr))) {
     479        assert (isa<GetElementPtrInst>(result));
     480        result = iBuilder->CreateBlockAlignedLoad(result);
     481    }
     482    return result;
     483}
     484
     485}
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4951 r4959  
    88#define PABLO_COMPILER_H
    99
    10 
     10//Pablo Expressions
    1111#include <string>
    12 #include <list>
    1312#include <vector>
    14 #include <map>
    15 #include <algorithm>
    1613#include <unordered_map>
    17 #include <pablo/pe_string.h>
    1814#include <pablo/carry_manager.h>
    1915#include <llvm/ADT/Twine.h>
     
    3733namespace pablo {
    3834
    39 using namespace llvm;
    40 
    4135class PabloAST;
    4236class PabloBlock;
     
    5145class PabloCompiler {
    5246
    53     typedef std::unordered_map<const pablo::PabloAST *, Value *>   ASTToValueMap;
     47    using MarkerMap = std::unordered_map<const PabloAST *, Value *>;
    5448
    5549public:
    5650    PabloCompiler(Module * m, IDISA::IDISA_Builder * b);
    57     ~PabloCompiler();
    58     Function * compile(pablo::PabloFunction * function);   
     51
     52    llvm::Function * compile(PabloFunction * function);
    5953    void setKernel(KernelBuilder * kBuilder);
    60    
     54
    6155private:
    62     void GenerateFunction(PabloFunction & function);
     56
    6357    void Examine(PabloFunction & function);
    6458    void Examine(PabloBlock * block);
    65 
    66     void SetOutputValue(Value * marker, const unsigned index);
    6759
    6860    void compileBlock(PabloBlock * block);
     
    7062    void compileIf(const If * ifStmt);
    7163    void compileWhile(const While * whileStmt);
    72     Value* compileExpression(const PabloAST * expr);
    73     void GenerateKernel(PabloBlock * block, PabloFunction * function);
     64    Value * compileExpression(const PabloAST * expr);
     65    void GenerateKernel(PabloFunction * const function);
    7466
    75     ASTToValueMap                       mMarkerMap;
     67    MarkerMap                           mMarkerMap;
    7668
    7769    Module *                            mMod;
     
    8173    CarryManager *                      mCarryManager;
    8274
    83     PointerType*                        mInputType;
    84 
     75    PabloFunction *                     mPabloFunction;
    8576    PabloBlock *                        mPabloBlock;
    8677
    8778    KernelBuilder *                     mKBuilder;
    88    
     79
    8980    unsigned                            mWhileDepth;
    9081    unsigned                            mIfDepth;
    9182
    92     Function *                          mFunction;
    93     Value *                             mInputAddressPtr;
    94     Value *                             mOutputAddressPtr;
     83    llvm::Function *                    mFunction;
    9584
    9685    unsigned                            mMaxWhileDepth;
    9786    int                                 mFilePosIdx;
    98 
    9987};
    10088
  • icGREP/icgrep-devel/icgrep/pablo/pe_advance.h

    r4717 r4959  
    2828        return getOperand(0);
    2929    }
    30     inline Integer::Type getAdvanceAmount() const {
     30    inline Integer::Type getAmount() const {
    3131        return cast<Integer>(getOperand(1))->value();
    3232    }
    33     inline void setLocalAdvanceIndex(const unsigned idx) {
     33    inline void setLocalIndex(const unsigned idx) {
    3434        localAdvanceIndex = idx;
    3535    }
    36     inline unsigned getLocalAdvanceIndex() const {
     36    inline unsigned getLocalIndex() const {
    3737        return localAdvanceIndex;
    3838    }
     
    6161        return getOperand(0);
    6262    }
    63     inline Integer::Type getAdvanceAmount() const {
     63    inline Integer::Type getAmount() const {
    6464        return cast<Integer>(getOperand(1))->value();
    6565    }
  • icGREP/icgrep-devel/icgrep/pablo/printer_pablos.cpp

    r4919 r4959  
    4141            out << ":\n";
    4242            print(ifNode->getBody(), out, true, indent + BlockIndenting);
    43             out.indent(indent);
    44             out << "Else:\n";
    45             print_vars(ifNode->getDefined(), out, indent + BlockIndenting);
     43            if (ifNode->getDefined().size() > 0) {
     44                out.indent(indent);
     45                out << "Else:\n";
     46                print_vars(ifNode->getDefined(), out, indent + BlockIndenting);
     47            }
    4648        }
    4749    } else if (const While * whileNode = dyn_cast<const While>(stmt)) {
     
    5355        }
    5456    } else if (const Call * call = dyn_cast<const Call>(stmt)) {
    55         out << " = " << call->getCallee() << "()";
     57        if (call->getPrototype()->getNumOfResults() > 0) {
     58            out << " = ";
     59        }
     60        out << call->getCallee() << "(";
     61        for (unsigned i = 0; i != call->getNumOperands(); ++i) {
     62            print(call->getOperand(i), out);
     63        }
     64        out << ")";
    5665    } else if (const And * andNode = dyn_cast<const And>(stmt)) {
    5766        out << andNode->getName() << " = (";
     
    9099        out << adv->getName() << " = pablo.Advance(";
    91100        print(adv->getExpr(), out);
    92         out << ", " << std::to_string(adv->getAdvanceAmount()) << ")";
     101        out << ", " << std::to_string(adv->getAmount()) << ")";
     102    } else if (const Lookahead * adv = dyn_cast<const Lookahead>(stmt)) {
     103        out << adv->getName() << " = pablo.Lookahead(";
     104        print(adv->getExpr(), out);
     105        out << ", " << std::to_string(adv->getAmount()) << ")";
    93106    } else if (const MatchStar * mstar = dyn_cast<const MatchStar>(stmt)) {
    94107        out << mstar->getName() << " = pablo.MatchStar(";
     
    106119        out << adv->getName() << " = pablo.Mod64Advance(";
    107120        print(adv->getExpr(), out);
    108         out << ", " << std::to_string(adv->getAdvanceAmount()) << ")";
     121        out << ", " << std::to_string(adv->getAmount()) << ")";
     122    } else if (const Mod64Lookahead * adv = dyn_cast<const Mod64Lookahead>(stmt)) {
     123        out << adv->getName() << " = pablo.Mod64Lookahead(";
     124        print(adv->getExpr(), out);
     125        out << ", " << std::to_string(adv->getAmount()) << ")";
    109126    } else if (const Mod64MatchStar * mstar = dyn_cast<const Mod64MatchStar>(stmt)) {
    110127        out << mstar->getName() << " = pablo.Mod64MatchStar(";
  • icGREP/icgrep-devel/icgrep/slab_allocator.h

    r4588 r4959  
    55
    66using LLVMAllocator = llvm::BumpPtrAllocator;
    7 
    8 namespace {
    9 
    10 class __BumpPtrAllocatorProxy {
    11 public:
    12     template <typename T>
    13     static inline T * Allocate(const size_t n) {
    14         return static_cast<T*>(mAllocator.Allocate(n * sizeof(T), sizeof(void*)));
    15     }
    16     template <typename T>
    17     static inline void Deallocate(const T * pointer) {
    18         mAllocator.Deallocate(pointer);
    19     }
    20     static inline void Reset() {
    21         mAllocator.Reset();
    22     }
    23     static LLVMAllocator & get_allocator() {
    24         return mAllocator;
    25     }
    26 private:
    27     static LLVMAllocator mAllocator;
    28 };
    29 
    30 LLVMAllocator __BumpPtrAllocatorProxy::mAllocator;
    31 
    32 }
    337
    348template <typename T>
     
    5024
    5125    inline pointer allocate(size_type n, const_pointer = nullptr) noexcept {
    52         return mAllocator.Allocate<T>(n);
     26        return static_cast<T*>(mAllocator.Allocate(n * sizeof(T), sizeof(void*)));
    5327    }
    5428
    5529    inline void deallocate(pointer p, size_type = 0) noexcept {
    56         mAllocator.Deallocate<T>(p);
     30        mAllocator.Deallocate(p);
    5731    }
    5832
     
    6236
    6337    inline LLVMAllocator & get_allocator() {
    64         return mAllocator.get_allocator();
     38        return mAllocator;
     39    }
     40
     41    void Reset() {
     42        mAllocator.Reset();
    6543    }
    6644
     
    7351    inline ~SlabAllocator() { }
    7452private:
    75     __BumpPtrAllocatorProxy mAllocator;
     53    static LLVMAllocator mAllocator;
    7654};
    7755
     56template <typename T> LLVMAllocator SlabAllocator<T>::mAllocator;
     57
    7858inline void releaseSlabAllocatorMemory() {
    79     __BumpPtrAllocatorProxy::Reset();
     59    SlabAllocator<void *> T;
     60    T.Reset();
    8061}
    8162
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4954 r4959  
    7878static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
    7979static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
    80 static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
    81                                             "These options control printing of intermediate Pablo code.");
     80
     81static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code.");
    8282
    8383static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
    8484static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
    8585static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
     86static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions));
     87
     88static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
     89
     90
     91static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
     92                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
    8693
    8794static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
    8895
    89 static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
    90                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
     96static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false),
     97                                     cl::desc("Disable Pablo Simplification pass (not recommended)"),
    9198                                     cl::cat(cPabloOptimizationsOptions));
     99
    92100static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
    93101                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
     
    104112                                         cl::desc("coalesce associative functions prior to optimization passes."),
    105113                                         cl::cat(cPabloOptimizationsOptions));
     114
    106115static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
    107                                          cl::desc("apply distribution law optimization."),
     116                                         cl::desc("apply distribution law optimization prior to multiplexing."),
    108117                                         cl::cat(cPabloOptimizationsOptions));
     118
    109119static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
    110                                          cl::desc("apply distribution law optimization."),
     120                                         cl::desc("apply distribution law optimization after multiplexing."),
     121                                         cl::cat(cPabloOptimizationsOptions));
     122
     123static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false),
     124                                         cl::desc("apply pre-pass scheduling prior to LLVM IR generation."),
    111125                                         cl::cat(cPabloOptimizationsOptions));
    112126#endif
     
    162176}
    163177
     178#ifdef PRINT_TIMING_INFORMATION
     179#define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
     180#else
     181#define READ_CYCLE_COUNTER(name)
     182#endif
     183
     184#ifdef PRINT_TIMING_INFORMATION
     185unsigned COUNT_STATEMENTS(const PabloFunction * const entry) {
     186    std::stack<const Statement *> scope;
     187    unsigned statements = 0;
     188    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     189    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     190        while ( stmt ) {
     191            ++statements;
     192            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     193                // Set the next statement to be the first statement of the inner scope and push the
     194                // next statement of the current statement into the scope stack.
     195                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     196                scope.push(stmt->getNextNode());
     197                stmt = nested->front();
     198                assert (stmt);
     199                continue;
     200            }
     201            stmt = stmt->getNextNode();
     202        }
     203        if (scope.empty()) {
     204            break;
     205        }
     206        stmt = scope.top();
     207        scope.pop();
     208    }
     209    return statements;
     210}
     211
     212unsigned COUNT_ADVANCES(const PabloFunction * const entry) {
     213
     214    std::stack<const Statement *> scope;
     215    unsigned advances = 0;
     216
     217    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     218    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     219        while ( stmt ) {
     220            if (isa<Advance>(stmt)) {
     221                ++advances;
     222            }
     223            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     224                // Set the next statement to be the first statement of the inner scope and push the
     225                // next statement of the current statement into the scope stack.
     226                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     227                scope.push(stmt->getNextNode());
     228                stmt = nested->front();
     229                assert (stmt);
     230                continue;
     231            }
     232            stmt = stmt->getNextNode();
     233        }
     234        if (scope.empty()) {
     235            break;
     236        }
     237        stmt = scope.top();
     238        scope.pop();
     239    }
     240    return advances;
     241}
     242
     243using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
     244
     245DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) {
     246    std::stack<const Statement *> scope;
     247    DistributionMap distribution;
     248    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     249    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     250        while ( stmt ) {
     251            if (isa<Variadic>(stmt)) {
     252                auto f = distribution.find(stmt->getNumOperands());
     253                if (f == distribution.end()) {
     254                    distribution.emplace(stmt->getNumOperands(), 1);
     255                } else {
     256                    f->second += 1;
     257                }
     258            }
     259            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     260                // Set the next statement to be the first statement of the inner scope and push the
     261                // next statement of the current statement into the scope stack.
     262                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     263                scope.push(stmt->getNextNode());
     264                stmt = nested->front();
     265                assert (stmt);
     266                continue;
     267            }
     268            stmt = stmt->getNextNode();
     269        }
     270        if (scope.empty()) {
     271            break;
     272        }
     273        stmt = scope.top();
     274        scope.pop();
     275    }
     276    return distribution;
     277}
     278#endif
     279
    164280void pablo_function_passes(PabloFunction * function) {
    165281    // Scan through the pablo code and perform DCE and CSE
    166     if (!DisablePabloCSE) {
     282
     283#ifdef PRINT_TIMING_INFORMATION
     284    timestamp_t simplification_start = 0, simplification_end = 0;
     285    timestamp_t coalescing_start = 0, coalescing_end = 0;
     286    timestamp_t sinking_start = 0, sinking_end = 0;
     287    timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
     288    timestamp_t multiplexing_start = 0, multiplexing_end = 0;
     289    timestamp_t post_distribution_start = 0, post_distribution_end = 0;
     290    timestamp_t lowering_start = 0, lowering_end = 0;
     291    timestamp_t scheduling_start = 0, scheduling_end = 0;
     292    DistributionMap distribution;
     293    const timestamp_t optimization_start = read_cycle_counter();
     294#endif
     295    if (!DisableSimplification) {
     296        READ_CYCLE_COUNTER(simplification_start);
    167297        Simplifier::optimize(*function);
     298        READ_CYCLE_COUNTER(simplification_end);
    168299    }
    169300#ifdef ENABLE_MULTIPLEXING
    170     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
    171         FlattenAssociativeDFG::transform(*function);
     301    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     302        READ_CYCLE_COUNTER(coalescing_start);
     303        CanonicalizeDFG::transform(*function);
     304        READ_CYCLE_COUNTER(coalescing_end);
     305    }
     306    if (EnablePreDistribution) {
     307        READ_CYCLE_COUNTER(pre_distribution_start);
     308        DistributivePass::optimize(*function);
     309        READ_CYCLE_COUNTER(pre_distribution_end);
     310    }
     311    if (EnableMultiplexing) {
     312        READ_CYCLE_COUNTER(multiplexing_start);
     313        MultiplexingPass::optimize(*function);
     314        READ_CYCLE_COUNTER(multiplexing_end);
     315        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     316            CanonicalizeDFG::transform(*function);
     317        }
     318    }
     319    if (EnablePostDistribution) {
     320        READ_CYCLE_COUNTER(post_distribution_start);
     321        DistributivePass::optimize(*function);
     322        READ_CYCLE_COUNTER(post_distribution_end);
    172323    }
    173324#endif
    174325    if (PabloSinkingPass) {
     326        READ_CYCLE_COUNTER(sinking_start);
    175327        CodeMotionPass::optimize(*function);
    176     }
    177 #ifdef ENABLE_MULTIPLEXING   
    178     if (EnablePreDistribution) {
    179         DistributivePass::optimize(*function);
    180     }
    181     if (EnableMultiplexing) {
    182         MultiplexingPass::optimize(*function);
    183     }
    184     if (EnablePostDistribution) {
    185         DistributivePass::optimize(*function);
    186     }
    187     SchedulingPrePass::optimize(*function);
     328        READ_CYCLE_COUNTER(sinking_end);
     329    }
     330#ifdef ENABLE_MULTIPLEXING
    188331    if (PrintUnloweredCode) {
    189332        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     
    192335        PabloPrinter::print(*function, cerr);
    193336    }
    194     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
     337    #ifdef PRINT_TIMING_INFORMATION
     338    distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
     339    #endif
     340    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     341        READ_CYCLE_COUNTER(lowering_start);
    195342        FactorizeDFG::transform(*function);
    196     }
     343        READ_CYCLE_COUNTER(lowering_end);
     344    }
     345    if (EnablePrePassScheduling) {
     346        READ_CYCLE_COUNTER(scheduling_start);
     347        SchedulingPrePass::optimize(*function);
     348        READ_CYCLE_COUNTER(scheduling_end);
     349    }
     350#endif
     351#ifdef PRINT_TIMING_INFORMATION
     352    const timestamp_t optimization_end = read_cycle_counter();
    197353#endif
    198354    if (PrintOptimizedREcode) {
    199         PabloVerifier::verify(*function, "post-optimization");
    200         //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    201         llvm::raw_os_ostream cerr(std::cerr);
    202         cerr << "Final Pablo AST:\n";
    203         PabloPrinter::print(*function, cerr);
    204     }
     355        if (PabloOutputFilename.empty()) {
     356            //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     357            llvm::raw_os_ostream cerr(std::cerr);
     358            cerr << "Final Pablo AST:\n";
     359            PabloPrinter::print(*function, cerr);
     360        } else {
     361            std::error_code error;
     362            llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None);
     363            PabloPrinter::print(*function, out);
     364        }
     365    }
     366#ifdef PRINT_TIMING_INFORMATION
     367    std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl;
     368    std::cerr << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl;
     369    std::cerr << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl;
     370    std::cerr << "  SINKING TIME: " << (sinking_end - sinking_start) << std::endl;
     371    std::cerr << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl;
     372    std::cerr << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl;
     373    std::cerr << "  MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl;
     374    std::cerr << "  MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl;
     375    std::cerr << "  MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl;
     376    std::cerr << "  LOWERING TIME: " << (lowering_end - lowering_start) << std::endl;
     377    std::cerr << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl;
     378    std::cerr << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl;
     379    std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl;
     380    std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl;
     381    std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
     382    bool join = false;
     383    for (auto dist : distribution) {
     384        if (join) {
     385            std::cerr << ';';
     386        }
     387        std::cerr << dist.first << '|' << dist.second;
     388        join = true;
     389    }
     390    std::cerr << std::endl;
     391#endif
    205392}
    206393
     
    242429    builder.setErrorStr(&errMessage);
    243430    builder.setMCPU(sys::getHostCPUName());
    244     builder.setOptLevel(CodeGenOpt::Level::None);
     431    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
     432    switch (OptLevel) {
     433        case '0': optLevel = CodeGenOpt::None; break;
     434        case '1': optLevel = CodeGenOpt::Less; break;
     435        case '2': optLevel = CodeGenOpt::Default; break;
     436        case '3': optLevel = CodeGenOpt::Aggressive; break;
     437        default: errs() << OptLevel << " is an invalid optimization level.\n";
     438    }
     439    builder.setOptLevel(optLevel);
    245440
    246441    if (!DisableAVX2 && (strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
Note: See TracChangeset for help on using the changeset viewer.