Changeset 4288 for icGREP


Ignore:
Timestamp:
Nov 2, 2014, 5:06:13 PM (5 years ago)
Author:
cameron
Message:

Separate Advance Queue from Carry Queue

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4287 r4288  
    6767using namespace std;
    6868
    69 typedef void (*process_block_fcn)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output);
     69typedef void (*process_block_fcn)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output);
    7070
    7171
    7272#define USE_MMAP
    7373#ifndef USE_MMAP
    74 void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block);
    75 #endif
    76 #ifdef USE_MMAP
    77 void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block);
     74void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, int advance_count, process_block_fcn process_block);
     75#endif
     76#ifdef USE_MMAP
     77void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, int advance_count, process_block_fcn process_block);
    7878#endif
    7979
     
    247247    if (llvm_codegen.process_block_fptr != 0)
    248248    {
    249         void (*FP)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output) = (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
    250 #ifndef USE_MMAP
    251         do_process(infile, outfile, count_only_option, llvm_codegen.carry_q_size, FP);
    252 #endif
    253 #ifdef USE_MMAP
    254         do_process(infile_buffer, infile_sb.st_size, outfile, count_only_option, llvm_codegen.carry_q_size, FP);
     249        void (*FP)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output) = (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
     250#ifndef USE_MMAP
     251        do_process(infile, outfile, count_only_option, llvm_codegen.carry_q_size, llvm_codegen.advance_q_size, FP);
     252#endif
     253#ifdef USE_MMAP
     254        do_process(infile_buffer, infile_sb.st_size, outfile, count_only_option, llvm_codegen.carry_q_size, llvm_codegen.advance_q_size, FP);
    255255#endif
    256256    }
     
    315315
    316316#ifndef USE_MMAP
    317 void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block) {
    318 #endif
    319 #ifdef USE_MMAP
    320 void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block) {
     317void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, int advance_count, process_block_fcn process_block) {
     318#endif
     319#ifdef USE_MMAP
     320void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, int advance_count, process_block_fcn process_block) {
    321321#endif
    322322
     
    325325    BitBlock match_vector;
    326326    BitBlock carry_q[carry_count];
     327    BitBlock advance_q[advance_count];
    327328    int match_count=0;
    328329    int blk = 0;
     
    342343    match_vector = simd<1>::constant<0>();
    343344    memset (carry_q, 0, sizeof(BitBlock) * carry_count);
     345    memset (advance_q, 0, sizeof(BitBlock) * advance_count);
    344346
    345347    char * buffer_ptr;
     
    380382            s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
    381383#endif
    382             process_block(basis_bits, carry_q, output);
     384            process_block(basis_bits, carry_q, advance_q, output);
    383385
    384386            LF_scanner.load_block(output.LF, blk);
     
    406408            copy_back_pos = LF_scanner.get_final_pos() + 1;
    407409            memset (carry_q, 0, sizeof(BitBlock) * carry_count);
     410            memset (advance_q, 0, sizeof(BitBlock) * advance_count);
    408411        }
    409412        else {
     
    461464        s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
    462465#endif
    463         process_block(basis_bits, carry_q, output);
     466        process_block(basis_bits, carry_q, advance_q, output);
    464467
    465468        LF_scanner.load_block(output.LF, blk);
     
    498501     s2p_do_final_block((BytePack *) &infile_buffer[block_base], basis_bits, EOF_mask);
    499502#endif
    500     process_block(basis_bits, carry_q, output);
     503    process_block(basis_bits, carry_q, advance_q, output);
    501504
    502505    if (count_only_option)
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4284 r4288  
    123123, mNestingDepth(0)
    124124, mCarryQueueSize(0)
     125, mAdvanceQueueIdx(0)
     126, mAdvanceQueuePtr(nullptr)
     127, mAdvanceQueueSize(0)
    125128, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
    126129, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
     
    179182    mMaxNestingDepth = 0;
    180183    mCarryQueueSize = 0;
     184    mAdvanceQueueSize = 0;
    181185    Examine(pb.statements());
    182186    mCarryQueueVector.resize(mCarryQueueSize);
    183 
     187    mAdvanceQueueVector.resize(mAdvanceQueueSize);
    184188    std::string errMessage;
    185189    EngineBuilder builder(mMod);
     
    202206    mCarryQueuePtr = args++;
    203207    mCarryQueuePtr->setName("carry_q");
     208    mAdvanceQueuePtr = args++;
     209    mAdvanceQueuePtr->setName("advance_q");
    204210    mOutputAddrPtr = args++;
    205211    mOutputAddrPtr->setName("output");
    206212
    207     //Create the carry queue.
     213    //Create the carry and advance queues.
    208214    mCarryQueueIdx = 0;
     215    mAdvanceQueueIdx = 0;
    209216    mNestingDepth = 0;
    210217    mMaxNestingDepth = 0;
     
    220227        mMarkerMap.insert(std::make_pair(name, basisBit));
    221228    }
    222 
     229   
    223230    //Generate the IR instructions for the function.
    224231    compileStatements(pb.statements());
    225232
    226233    assert (mCarryQueueIdx == mCarryQueueSize);
     234    assert (mAdvanceQueueIdx == mAdvanceQueueSize);
    227235    assert (mNestingDepth == 0);
    228236    //Terminate the block
     
    262270    //Return the required size of the carry queue and a pointer to the process_block function.
    263271    retVal.carry_q_size = mCarryQueueSize;
     272    retVal.advance_q_size = mAdvanceQueueSize;
    264273    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunction);
    265274
     
    288297    //The carry q array.
    289298    //A pointer to the BitBlock vector.
     299    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
     300        // Advance q array
    290301    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
    291302
     
    362373
    363374    //Starts on process_block
    364     SmallVector<AttributeSet, 4> Attrs;
     375    SmallVector<AttributeSet, 5> Attrs;
    365376    AttributeSet PAS;
    366377    {
     
    381392        B.addAttribute(Attribute::NoCapture);
    382393        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
     394    }
     395    Attrs.push_back(PAS);
     396    {
     397        AttrBuilder B;
     398        B.addAttribute(Attribute::NoCapture);
     399        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
    383400    }
    384401    Attrs.push_back(PAS);
     
    413430        else if (If * ifStatement = dyn_cast<If>(stmt)) {
    414431            const auto preIfCarryCount = mCarryQueueSize;
     432            const auto preIfAdvanceCount = mAdvanceQueueSize;
    415433            Examine(ifStatement->getCondition());
    416434            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
     
    418436            --mNestingDepth;
    419437            ifStatement->setInclusiveCarryCount(mCarryQueueSize - preIfCarryCount);
     438            ifStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preIfAdvanceCount);
    420439        }
    421440        else if (While * whileStatement = dyn_cast<While>(stmt)) {
    422441            const auto preWhileCarryCount = mCarryQueueSize;
     442            const auto preWhileAdvanceCount = mAdvanceQueueSize;
    423443            Examine(whileStatement->getCondition());
    424444            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
     
    426446            --mNestingDepth;
    427447            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
     448            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
    428449        }
    429450    }
     
    452473    }
    453474    else if (Advance * adv = dyn_cast<Advance>(expr)) {
    454         ++mCarryQueueSize;
     475        ++mAdvanceQueueSize;
    455476        Examine(adv->getExpr());
    456477    }
     
    550571
    551572        int if_start_idx = mCarryQueueIdx;
     573        int if_start_idx_advance = mAdvanceQueueIdx;
    552574
    553575        Value* if_test_value = compileExpression(ifstmt->getCondition());
     
    564586
    565587        int if_end_idx = mCarryQueueIdx;
     588        int if_end_idx_advance = mAdvanceQueueIdx;
    566589        if (if_start_idx < if_end_idx + 1) {
    567590            // Have at least two internal carries.   Accumulate and store.
     
    576599            }
    577600            genCarryOutStore(if_carry_accum_value, if_accum_idx);
     601
     602        }
     603        if (if_start_idx_advance < if_end_idx_advance + 1) {
     604            // Have at least two internal advances.   Accumulate and store.
     605            int if_accum_idx = mAdvanceQueueIdx++;
     606
     607            Value* if_advance_accum_value = genAdvanceInLoad(if_start_idx_advance);
     608
     609            for (int c = if_start_idx_advance+1; c < if_end_idx_advance; c++)
     610            {
     611                Value* advance_q_value = genAdvanceInLoad(c);
     612                if_advance_accum_value = bIfBody.CreateOr(advance_q_value, if_advance_accum_value);
     613            }
     614            genAdvanceOutStore(if_advance_accum_value, if_accum_idx);
    578615
    579616        }
     
    588625            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
    589626        }
     627        if (if_start_idx_advance < if_end_idx_advance) {
     628            // Have at least one internal carry.
     629            int if_accum_idx = mAdvanceQueueIdx - 1;
     630            Value* last_if_pending_advances = genAdvanceInLoad(if_accum_idx);
     631            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
     632        }
    590633        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
    591634
     
    598641    {
    599642        const auto baseCarryQueueIdx = mCarryQueueIdx;
     643        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
    600644        if (mNestingDepth == 0) {
    601645            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
    602646                genCarryInLoad(baseCarryQueueIdx + i);
     647            }
     648            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
     649                genAdvanceInLoad(baseAdvanceQueueIdx + i);
    603650            }
    604651        }       
     
    624671        // but works for now.
    625672        mCarryQueueIdx = baseCarryQueueIdx;
     673        mAdvanceQueueIdx = baseAdvanceQueueIdx;
    626674
    627675        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
     
    637685        IRBuilder<> bCond(whileCondBlock);
    638686        // generate phi nodes for any carry propogating instruction
    639         std::vector<PHINode*> phiNodes(whileStatement->getInclusiveCarryCount() + nextNodes.size());
     687        int whileCarryCount = whileStatement->getInclusiveCarryCount();
     688        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
     689        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
    640690        unsigned index = 0;
    641         for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
     691        for (index = 0; index != whileCarryCount; ++index) {
    642692            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
    643693            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
    644694            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
    645695            phiNodes[index] = phi;
     696        }
     697        for (int i = 0; i != whileAdvanceCount; ++i) {
     698            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
     699            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
     700            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
     701            phiNodes[index++] = phi;
    646702        }
    647703        // and for any Next nodes in the loop body
     
    669725            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
    670726        }
     727        for (int i = 0; i != whileAdvanceCount; ++i) {
     728            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
     729            PHINode * phi = phiNodes[index++];
     730            phi->addIncoming(advOut, mBasicBlock);
     731            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
     732        }
    671733        // and for any Next nodes in the loop body
    672734        for (const Next * n : nextNodes) {
     
    683745        mBasicBlock = whileEndBlock;   
    684746        if (--mNestingDepth == 0) {
    685             for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
     747            for (index = 0; index != whileCarryCount; ++index) {
    686748                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
     749            }
     750            for (index = 0; index != whileAdvanceCount; ++index) {
     751                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
    687752            }
    688753        }
     
    845910}
    846911
     912Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {   
     913    assert (index < mAdvanceQueueVector.size());
     914    if (mNestingDepth == 0) {
     915        IRBuilder<> b(mBasicBlock);
     916        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
     917    }
     918    return mAdvanceQueueVector[index];
     919}
     920
     921void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
     922    assert (advanceOut);
     923    assert (index < mAdvanceQueueVector.size());
     924    if (mNestingDepth == 0) {       
     925        IRBuilder<> b(mBasicBlock);
     926        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
     927    }
     928    mAdvanceQueueVector[index] = advanceOut;
     929}
     930
    847931inline Value* PabloCompiler::genBitBlockAny(Value* test) {
    848932    IRBuilder<> b(mBasicBlock);
     
    872956    IRBuilder<> b(mBasicBlock);
    873957#if (BLOCK_SIZE == 128)
    874     const auto carryIdx = mCarryQueueIdx++;
     958    const auto advanceIdx = mAdvanceQueueIdx++;
    875959    if (shift_amount == 1) {
    876         Value* carryq_value = genCarryInLoad(carryIdx);
     960        Value* advanceq_value = genAdvanceInLoad(advanceIdx);
    877961        Value* srli_1_value = b.CreateLShr(strm_value, 63);
    878962        Value* packed_shuffle;
    879963        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
    880964        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
    881         packed_shuffle = b.CreateShuffleVector(carryq_value, srli_1_value, const_packed_1);
     965        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
    882966
    883967        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
     
    887971        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
    888972
    889         Value* carry_out = genShiftHighbitToLow(strm_value, "carry_out");
     973        Value* advance_out = genShiftHighbitToLow(strm_value, "advance_out");
    890974        //CarryQ - carry out:
    891         genCarryOutStore(carry_out, carryIdx);
     975        genAdvanceOutStore(advance_out, advanceIdx);
    892976           
    893977        return result_value;
     
    896980        // This is the preferred logic, but is too slow for the general case.   
    897981        // We need to speed up our custom LLVM for this code.
    898         Value* carryq_longint = b.CreateBitCast(genCarryInLoad(carryIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     982        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    899983        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    900         Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), carryq_longint, "advance");
    901     Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    902     Value* carry_out = b.CreateBitCast(b.CreateLShr(strm_longint, BLOCK_SIZE - shift_amount, "advance_out"), mBitBlockType);
    903         //CarryQ - carry out:
    904         genCarryOutStore(carry_out, carryIdx);
     984        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), advanceq_longint, "advance");
     985        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
     986        Value* advance_out = b.CreateBitCast(b.CreateLShr(strm_longint, BLOCK_SIZE - shift_amount, "advance_out"), mBitBlockType);
     987        genAdvanceOutStore(advance_out, advanceIdx);
    905988           
    906989        return result_value;
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4276 r4288  
    4040{
    4141    int carry_q_size;
     42    int advance_q_size;
    4243    void *process_block_fptr;
    4344};
     
    7172    Value* genCarryInLoad(const unsigned index);
    7273    void   genCarryOutStore(Value* carryOut, const unsigned index);
     74    Value* genAdvanceInLoad(const unsigned index);
     75    void   genAdvanceOutStore(Value* advanceOut, const unsigned index);
    7376    Value* genAddWithCarry(Value* e1, Value* e2);
    7477    Value* genAdvanceWithCarry(Value* e1, int shift_amount);
     
    8588    StringToValueMap                    mMarkerMap;
    8689    CarryQueueVector                    mCarryQueueVector;
     90    CarryQueueVector                    mAdvanceQueueVector;
    8791
    8892    const std::vector<Var *> &          mBasisBits;
     
    99103    unsigned                            mNestingDepth;
    100104    unsigned                            mCarryQueueSize;
     105
     106    unsigned                            mAdvanceQueueIdx;
     107    Value*                              mAdvanceQueuePtr;
     108    unsigned                            mAdvanceQueueSize;
    101109
    102110    ConstantAggregateZero* const        mZeroInitializer;
  • icGREP/icgrep-devel/icgrep/pablo/ps_if.h

    r4284 r4288  
    4949        return mCarryCount;
    5050    }
     51    inline void setInclusiveAdvanceCount(const unsigned count) {
     52        mAdvanceCount = count;
     53    }
     54    inline unsigned getInclusiveAdvanceCount() const {
     55        return mAdvanceCount;
     56    }
    5157protected:
    5258    If(PabloAST * expr, StatementList && body, StatementList * parent)
     
    5561    , mBody(std::move(body))
    5662    , mCarryCount(0)
     63    , mAdvanceCount(0)
    5764    {
    5865        for (Statement * s : mBody) {
     
    6471    StatementList       mBody;
    6572    unsigned            mCarryCount;
     73    unsigned            mAdvanceCount;
    6674};
    6775
  • icGREP/icgrep-devel/icgrep/pablo/ps_while.h

    r4284 r4288  
    4949        return mCarryCount;
    5050    }
     51    inline void setInclusiveAdvanceCount(const unsigned count) {
     52        mAdvanceCount = count;
     53    }
     54    inline unsigned getInclusiveAdvanceCount() const {
     55        return mAdvanceCount;
     56    }
    5157protected:
    5258    While(PabloAST * expr, StatementList && body, StatementList * parent)
     
    5561    , mBody(std::move(body))
    5662    , mCarryCount(0)
     63    , mAdvanceCount(0)
    5764    {
    5865        for (Statement * s : mBody) {
     
    6471    StatementList       mBody;
    6572    unsigned            mCarryCount;
     73    unsigned            mAdvanceCount;
    6674};
    6775
Note: See TracChangeset for help on using the changeset viewer.