Changeset 4890


Ignore:
Timestamp:
Dec 9, 2015, 4:59:02 PM (3 years ago)
Author:
nmedfort
Message:

Continued work on multiplexing pass.

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4820 r4890  
    179179            /*
    180180              If the hi_bit of n is not set, then whenever the corresponding bit
    181               is set in the target, the target will certaily be >=.  Oterwise,
     181              is set in the target, the target will certaily be >=.  Otherwise,
    182182              the value of GE_range(N-1), lo_range) is required.
    183183            */
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4885 r4890  
    490490pablo/passes/factorizedfg.h
    491491pablo/passes/factorizedfg.cpp
     492ispc.cpp
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.cpp

    r4886 r4890  
    293293And * PabloBlock::createAnd(const unsigned reserved) {
    294294    return insertAtInsertionPoint(new And(reserved, makeName("and_")));
     295}
     296
     297And * PabloBlock::createAnd(const unsigned reserved, const std::string prefix) {
     298    return insertAtInsertionPoint(new And(reserved, makeName(prefix, false)));
    295299}
    296300
     
    385389}
    386390
     391Or * PabloBlock::createOr(const unsigned reserved, const std::string prefix) {
     392    return insertAtInsertionPoint(new Or(reserved, makeName(prefix, false)));
     393}
     394
    387395PabloAST * PabloBlock::createXor(PabloAST * expr1, PabloAST * expr2) {
    388396    assert (expr1 && expr2);
     
    429437Xor * PabloBlock::createXor(const unsigned reserved) {
    430438    return insertAtInsertionPoint(new Xor(reserved, makeName("xor_")));
     439}
     440
     441Xor * PabloBlock::createXor(const unsigned reserved, const std::string prefix) {
     442    return insertAtInsertionPoint(new Xor(reserved, makeName(prefix, false)));
    431443}
    432444
     
    452464        return createXor(condition, falseExpr);
    453465    } else if (isa<Not>(falseExpr) && equals(trueExpr, cast<Not>(falseExpr)->getOperand(0))){
    454         return createXor(condition, falseExpr);
     466        return createXor(condition, trueExpr);
    455467    }
    456468    return insertAtInsertionPoint(new Sel(condition, trueExpr, falseExpr, makeName("sel")));
     
    481493    }
    482494    else if (isa<Not>(falseExpr) && equals(trueExpr, cast<Not>(falseExpr)->getOperand(0))){
    483         return createXor(condition, falseExpr, prefix);
     495        return createXor(condition, trueExpr, prefix);
    484496    }
    485497    return insertAtInsertionPoint(new Sel(condition, trueExpr, falseExpr, makeName(prefix, false)));
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.h

    r4886 r4890  
    109109    And * createAnd(const unsigned reserved);
    110110
     111    And * createAnd(const unsigned reserved, const std::string prefix);
     112
    111113    And * createAnd(std::vector<PabloAST *>::iterator begin, std::vector<PabloAST *>::iterator end) {
    112114        return insertAtInsertionPoint(new And(begin, end, makeName("and_")));
     
    127129    Or * createOr(const unsigned reserved);
    128130
     131    Or * createOr(const unsigned reserved, const std::string prefix);
     132
    129133    PabloAST * createXor(PabloAST * expr1, PabloAST * expr2);
    130134
     
    136140
    137141    Xor * createXor(const unsigned reserved);
     142
     143    Xor * createXor(const unsigned reserved, const std::string prefix);
    138144
    139145    PabloAST * createMatchStar(PabloAST * marker, PabloAST * charclass);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4888 r4890  
    1515#include <bdd.h>
    1616
     17/// TODO: Investigate why ./icgrep -c -multiplexing-window-size=13,14...,20 "^\p{l}$" causes segfault in BuDDy.
     18
    1719using namespace llvm;
    1820using namespace boost;
     
    106108//    const auto seed = rd();
    107109    const auto seed = 83234827342;
    108     RNG rng(seed);
    109110
    110111    LOG("Seed:                    " << seed);
    111112
    112     MultiplexingPass mp(limit, maxSelections, windowSize);
     113    MultiplexingPass mp(seed, limit, maxSelections, windowSize);
    113114    RECORD_TIMESTAMP(start_initialize);
    114115    const unsigned advances = mp.initialize(function, independent);
     
    127128    RECORD_TIMESTAMP(end_characterize);
    128129
    129     LOG("Characterize:            " << (end_characterize - start_characterize));
    130 
    131     LOG("Nodes in BDD:            " << bdd_getnodenum() << " of " << bdd_getallocnum());
     130    LOG("Characterize:             " << (end_characterize - start_characterize));
     131
     132    LOG("Nodes in BDD:             " << bdd_getnodenum() << " of " << bdd_getallocnum());
    132133
    133134    RECORD_TIMESTAMP(start_shutdown);
    134135    bdd_done();
    135136    RECORD_TIMESTAMP(end_shutdown);
    136     LOG("Shutdown:                " << (end_shutdown - start_shutdown));
     137    LOG("Shutdown:                 " << (end_shutdown - start_shutdown));
    137138
    138139    RECORD_TIMESTAMP(start_create_multiplex_graph);
    139     const bool multiplex = mp.generateCandidateSets(rng);
     140    const bool multiplex = mp.generateCandidateSets();
    140141    RECORD_TIMESTAMP(end_create_multiplex_graph);
    141     LOG("GenerateCandidateSets:   " << (end_create_multiplex_graph - start_create_multiplex_graph));
     142    LOG("GenerateCandidateSets:    " << (end_create_multiplex_graph - start_create_multiplex_graph));
    142143
    143144    if (multiplex) {
    144145
     146        RECORD_TIMESTAMP(start_usage_weighting);
     147        mp.generateUsageWeightingGraph();
     148        RECORD_TIMESTAMP(end_usage_weighting);
     149        LOG("GenerateUsageWeighting:   " << (end_usage_weighting - start_usage_weighting));
     150
    145151        RECORD_TIMESTAMP(start_select_multiplex_sets);
    146         mp.selectMultiplexSets(rng);
     152        mp.selectMultiplexSets();
    147153        RECORD_TIMESTAMP(end_select_multiplex_sets);
    148         LOG("SelectMultiplexSets:     " << (end_select_multiplex_sets - start_select_multiplex_sets));
     154        LOG("SelectMultiplexSets:      " << (end_select_multiplex_sets - start_select_multiplex_sets));
    149155
    150156        RECORD_TIMESTAMP(start_subset_constraints);
    151157        mp.eliminateSubsetConstraints();
    152158        RECORD_TIMESTAMP(end_subset_constraints);
    153         LOG("ApplySubsetConstraints:  " << (end_subset_constraints - start_subset_constraints));
     159        LOG("ApplySubsetConstraints:   " << (end_subset_constraints - start_subset_constraints));
    154160
    155161        RECORD_TIMESTAMP(start_select_independent_sets);
    156162        mp.multiplexSelectedIndependentSets(function);
    157163        RECORD_TIMESTAMP(end_select_independent_sets);
    158         LOG("SelectedIndependentSets: " << (end_select_independent_sets - start_select_independent_sets));
    159 
     164        LOG("SelectedIndependentSets:  " << (end_select_independent_sets - start_select_independent_sets));
     165
     166        RECORD_TIMESTAMP(start_topological_sort);
    160167        MultiplexingPass::topologicalSort(function);
     168        RECORD_TIMESTAMP(end_topological_sort);
     169        LOG("TopologicalSort:          " << (end_topological_sort - start_topological_sort));
     170
    161171        #ifndef NDEBUG
    162172        PabloVerifier::verify(function, "post-multiplexing");
     
    165175
    166176    LOG_NUMBER_OF_ADVANCES(function.getEntryBlock());
     177
    167178    return multiplex;
    168179}
     
    310321    assert (k == advances);
    311322
    312     // Compute the base constraint graph as the union of TRANSPOSE(G) without any reflective loops and the set of edges
    313     // marking which advances are in differing scope blocks.
    314 
     323    // Initialize the base constraint graph by effectively transposing G and removing reflective loops
    315324    mConstraintGraph = ConstraintGraph(advances);
    316325    for (unsigned i = 0; i != advances; ++i) {
     
    332341 * @brief initializeAdvanceDepth
    333342 ** ------------------------------------------------------------------------------------------------------------- */
    334 inline void MultiplexingPass::initializeAdvanceDepth(PabloBlock * const block, const unsigned advances) {
     343inline void MultiplexingPass::initializeAdvanceDepth(PabloBlock * const entryBlock, const unsigned advances) {
    335344
    336345    std::stack<Statement *> scope;
    337346    unsigned k = 0;
    338347    int maxDepth = 0;
    339     const Advance * advance[advances];
     348    const PabloBlock * advanceScope[advances];
     349    mAdvance.resize(advances, nullptr);
    340350    mAdvanceDepth.resize(advances, 0);
    341     for (Statement * stmt = block->front(); ; ) {
     351    mAdvanceNegatedVariable.reserve(advances);
     352    for (Statement * stmt = entryBlock->front(); ; ) {
    342353        while ( stmt ) {
    343354            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     
    349360            } else if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
    350361                int depth = 0;
    351                 advance[k] = cast<Advance>(stmt);
     362                mAdvance[k] = cast<Advance>(stmt);
     363                advanceScope[k] = cast<Advance>(stmt)->getParent();
    352364                for (unsigned i = 0; i != k; ++i) {
    353                     if (edge(i, k, mConstraintGraph).second || (advance[i]->getParent() != advance[k]->getParent())) {
     365                    if (edge(i, k, mConstraintGraph).second || (advanceScope[i] != advanceScope[k])) {
    354366                        depth = std::max<int>(depth, mAdvanceDepth[i]);
    355367                    }
     
    382394            characterize(cast<If>(stmt)->getBody());
    383395        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    384             const auto & variants = cast<While>(stmt)->getVariants();
    385             std::vector<BDD> assignments(variants.size());
    386             for (unsigned i = 0; i != variants.size(); ++i) {
    387                 assignments[i] = get(variants[i]->getInitial());
    388             }
    389396            characterize(cast<While>(stmt)->getBody());
    390             for (unsigned i = 0; i != variants.size(); ++i) {
    391                 BDD & var = get(variants[i]->getInitial());
    392                 var = bdd_addref(bdd_or(var, assignments[i]));
     397            for (const Next * var : cast<While>(stmt)->getVariants()) {
     398                BDD & assign = get(var->getInitial());
     399                assign = bdd_addref(bdd_or(assign, get(var)));
    393400            }
    394401        } else {
     
    413420 ** ------------------------------------------------------------------------------------------------------------- */
    414421inline BDD MultiplexingPass::characterize(Statement * const stmt) {
     422    assert (stmt->getNumOperands() > 0);
    415423    BDD bdd = get(stmt->getOperand(0));
    416424    switch (stmt->getClassTypeId()) {
     
    457465 ** ------------------------------------------------------------------------------------------------------------- */
    458466inline BDD MultiplexingPass::characterize(Advance * const adv, const BDD Ik) {
    459     const auto k = mAdvanceAttributes.size();
     467    const auto k = mAdvanceNegatedVariable.size();
     468    assert (mAdvance[k] == adv);
    460469    std::vector<bool> unconstrained(k , false);
    461470    for (unsigned i = 0; i != k; ++i) {
     471
    462472        // Are we interested in testing these streams to see whether they are mutually exclusive?
    463473        if (exceedsWindowSize(i, k)) continue;
     474
    464475        // Have we already proven that they are unconstrained by their subset relationship?
    465476        if (unconstrained[i]) continue;
     477
    466478        // If these Advances are mutually exclusive, in the same scope, transitively independent, and shift their
    467479        // values by the same amount, we can safely multiplex them. Otherwise mark the constraint in the graph.
    468         const Advance * const ithAdv = std::get<0>(mAdvanceAttributes[i]);
     480        const Advance * const ithAdv = mAdvance[i];
    469481        if ((mTestConstrainedAdvances || independent(i, k)) && (ithAdv->getOperand(1) == adv->getOperand(1))) {
    470482            const BDD Ii = get(ithAdv->getOperand(0));
     
    506518    }
    507519
    508     BDD Ck = bdd_ithvar(mVariables++);
    509     const BDD Nk = bdd_addref(bdd_not(Ck));
     520    BDD Ak = bdd_ithvar(mVariables++);
     521    const BDD Nk = bdd_addref(bdd_not(Ak));
    510522    for (unsigned i = 0; i != k; ++i) {
    511523        if (unconstrained[i]) {
    512             const Advance * const ithAdv = std::get<0>(mAdvanceAttributes[i]);
    513             const BDD Ni = std::get<1>(mAdvanceAttributes[i]);
    514 
    515             BDD & Ci = get(ithAdv);
    516             Ci = bdd_addref(bdd_and(Ci, Nk));
    517             Ck = bdd_addref(bdd_and(Ck, Ni));
    518             if ((!mTestConstrainedAdvances || independent(i, k)) && (adv->getParent() == ithAdv->getParent())) {
     524            // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results
     525            // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by
     526            // the conjunction of variable representing that Advance and the negation of all variables for the Advances in which
     527            // the inputs are deemed to be mutually exclusive. For example, if the input of the i-th Advance is mutually exclusive
     528            // with the input of the j-th and k-th Advance, the BDD of the i-th Advance is Ai ∧ ¬Aj ∧ ¬Ak.
     529            const Advance * const ithAdv = mAdvance[i];
     530            const BDD Ni = mAdvanceNegatedVariable[i];
     531            BDD & Ai = get(ithAdv);
     532            Ai = bdd_addref(bdd_and(Ai, Nk));
     533            Ak = bdd_addref(bdd_and(Ak, Ni));
     534            if (independent(i, k) && (adv->getParent() == ithAdv->getParent())) {
    519535                continue;
    520536            }
     
    522538        add_edge(i, k, mConstraintGraph);
    523539    }
    524     mAdvanceAttributes.emplace_back(adv, Nk);
    525     return Ck;
     540    // To minimize the number of BDD computations, store the negated variable instead of negating it each time.
     541    mAdvanceNegatedVariable.emplace_back(Nk);
     542    return Ak;
    526543}
    527544
     
    543560
    544561/** ------------------------------------------------------------------------------------------------------------- *
     562 * @brief is_power_of_2
     563 * @param n an integer
     564 ** ------------------------------------------------------------------------------------------------------------- */
     565static inline bool is_power_of_2(const size_t n) {
     566    return ((n & (n - 1)) == 0);
     567}
     568
     569/** ------------------------------------------------------------------------------------------------------------- *
     570 * @brief generateUsageWeightingGraph
     571 *
     572 * Prior to generating our candidate sets, scan through the constraint graph and generate a clique in the usage
     573 * weighting graph each set of constraint-free Advance nodes that have the same users. We may be able optimize
     574 * the demultiplexing calculations using range expressions.
     575 *
     576 * Note: it'd be preferable to contract vertices in the constraint graph prior to scanning through it but that
     577 * leaves us with a more difficult problem. Namely, an Advance node may belong to more than one clique and we
     578 * want to avoid generating a multiplexing set whose size is 2^n for any n ∈ â„€* but don't want to needlessly
     579 * limit the size of any clique. Look into this further later.
     580 ** ------------------------------------------------------------------------------------------------------------- */
     581void MultiplexingPass::generateUsageWeightingGraph() {
     582    const auto n = num_vertices(mConstraintGraph); assert (n > 2);
     583    // Let G be the complement of the constraint graph ∪ the subset graph restricted to only the edges corresponding
     584    // to pairs of Advances with the same users.
     585    CliqueGraph G(n);
     586    for (unsigned i = 0; i != (n - 1); ++i) {
     587        const Advance * const advI = mAdvance[i];
     588        for (unsigned j = i + 1; j != n; ++j) {
     589            const Advance * const advJ = mAdvance[j];
     590            if (LLVM_UNLIKELY(advI->getNumUsers() == advJ->getNumUsers()) && independent(i, j)) {
     591                if (LLVM_UNLIKELY(std::equal(advI->user_begin(), advI->user_end(), advJ->user_begin()))) {
     592                    // INVESTIGATE: we should be able to ignore subset relations if these are going to become a
     593                    // range expression. Look into making a proof for it once the range expression calculation
     594                    // is finished.
     595                    if (!(edge(i, j, mSubsetGraph).second || edge(j, i, mSubsetGraph).second)) {
     596                        add_edge(i, j, G);
     597                    }
     598                }
     599            }
     600        }
     601    }
     602    if (num_edges(G) > 0) {
     603        const CliqueSets S = findMaximalCliques(G);
     604        for (unsigned i = 0; i != n; ++i) {
     605            clear_vertex(i, G);
     606        }
     607        for (const std::vector<CliqueGraph::vertex_descriptor> & C : S) {
     608            const unsigned m = C.size(); assert (m > 1);
     609            for (unsigned i = 1; i != m; ++i) {
     610                for (unsigned j = 0; j != i; ++j) {
     611                    add_edge(C[j], C[i], G);
     612                }
     613            }
     614        }
     615    }
     616    mUsageWeightingGraph = G;
     617}
     618
     619/** ------------------------------------------------------------------------------------------------------------- *
    545620 * @brief generateMultiplexSets
    546  * @param RNG random number generator
    547  ** ------------------------------------------------------------------------------------------------------------- */
    548 bool MultiplexingPass::generateCandidateSets(RNG & rng) {
    549 
    550     using degree_t = graph_traits<ConstraintGraph>::degree_size_type;
     621 ** ------------------------------------------------------------------------------------------------------------- */
     622bool MultiplexingPass::generateCandidateSets() {
    551623
    552624    // What if we generated a "constraint free" graph instead? By taking each connected component of it
     
    555627
    556628    VertexVector S;
    557     std::vector<degree_t> D(num_vertices(mConstraintGraph));
     629    std::vector<ConstraintGraph::degree_size_type> D(num_vertices(mConstraintGraph));
    558630    S.reserve(15);
    559631
    560632    mMultiplexSetGraph = MultiplexSetGraph(num_vertices(mConstraintGraph));
    561633
    562     // Push all source nodes into the new independent set N
     634    // Push all source nodes into the (initial) independent set S
    563635    for (auto v : make_iterator_range(vertices(mConstraintGraph))) {
    564636        const auto d = in_degree(v, mConstraintGraph);
     
    575647    do {
    576648
    577         addCandidateSet(S, rng);
     649        addCandidateSet(S);
    578650
    579651        bool noNewElements = true;
     
    581653            assert (S.size() > 0);
    582654            // Randomly choose a vertex in S and discard it.
    583             const auto i = S.begin() + IntDistribution(0, S.size() - 1)(rng);
     655            const auto i = S.begin() + IntDistribution(0, S.size() - 1)(mRNG);
    584656            assert (i != S.end());
    585657            const ConstraintVertex u = *i;
     
    604676/** ------------------------------------------------------------------------------------------------------------- *
    605677 * @brief choose
    606  ** ------------------------------------------------------------------------------------------------------------- */
    607 inline unsigned long choose(const unsigned n, const unsigned k) {
     678 *
     679 * Compute n choose k
     680 ** ------------------------------------------------------------------------------------------------------------- */
     681__attribute__ ((const)) inline unsigned long choose(const unsigned n, const unsigned k) {
    608682    if (n < k)
    609683        return 0;
     
    632706    unsigned long x = (choose(n, k) - 1) - m;
    633707    for (unsigned i = 0; i != k; ++i) {
    634         while (choose(--a, b) > x);
    635         x = x - choose(a, b);
     708        unsigned long y = 0;
     709        while ((y = choose(--a, b)) > x);
     710        x = x - y;
    636711        b = b - 1;
    637712        element[i] = (n - 1) - a;
     
    643718 * @param S an independent set
    644719 ** ------------------------------------------------------------------------------------------------------------- */
    645 inline void MultiplexingPass::addCandidateSet(const VertexVector & S, RNG & rng) {
     720inline void MultiplexingPass::addCandidateSet(const VertexVector & S) {
    646721    if (S.size() >= 3) {
    647722        if (S.size() <= mMultiplexingSetSizeLimit) {
     
    663738            } else { // take m random samples
    664739                for (unsigned i = 0; i != mMaxMultiplexingSetSelections; ++i) {
    665                     select(S.size(), mMultiplexingSetSizeLimit, rng() % max, element);
     740                    select(S.size(), mMultiplexingSetSizeLimit, mRNG() % max, element);
    666741                    const auto u = add_vertex(mMultiplexSetGraph);
    667742                    for (unsigned j = 0; j != mMultiplexingSetSizeLimit; ++j) {
     
    675750
    676751/** ------------------------------------------------------------------------------------------------------------- *
    677  * @brief is_power_of_2
    678  * @param n an integer
    679  ** ------------------------------------------------------------------------------------------------------------- */
    680 static inline bool is_power_of_2(const size_t n) {
    681     return ((n & (n - 1)) == 0) ;
    682 }
    683 
    684 /** ------------------------------------------------------------------------------------------------------------- *
    685752 * @brief log2_plus_one
    686753 ** ------------------------------------------------------------------------------------------------------------- */
    687754static inline size_t log2_plus_one(const size_t n) {
    688     return std::log2<size_t>(n) + 1; // use a faster builtin function for this?
     755    return std::log2<size_t>(n) + 1;
    689756}
    690757
    691758/** ------------------------------------------------------------------------------------------------------------- *
    692759 * @brief selectMultiplexSets
    693  * @param RNG random number generator
    694760 *
    695761 * This algorithm is simply computes a greedy set cover. We want an exact max-weight set cover but can generate new
    696762 * sets by taking a subset of any existing set. With a few modifications, the greedy approach seems to work well
    697  * enough but can be trivially shown to produce a suboptimal solution if there are three (or more) sets in which
    698  * two, labelled A and B, are disjoint and the third larger set, C, that consists of elements of A and B.
    699  ** ------------------------------------------------------------------------------------------------------------- */
    700 void MultiplexingPass::selectMultiplexSets(RNG &) {
    701 
    702     using InEdgeIterator = graph_traits<MultiplexSetGraph>::in_edge_iterator;
     763 * enough but can be shown to produce a suboptimal solution if there are three candidate sets labelled A, B and C,
     764 * in which A ∩ B = âˆ
     765, |A| ≀ |B| < |C| < (|A| + |B|), and C ∩ (A ∪ B) = C.
     766 ** ------------------------------------------------------------------------------------------------------------- */
     767void MultiplexingPass::selectMultiplexSets() {
     768
     769    using SetIterator = graph_traits<MultiplexSetGraph>::in_edge_iterator;
     770    using ElementIterator = graph_traits<MultiplexSetGraph>::out_edge_iterator;
    703771    using degree_t = MultiplexSetGraph::degree_size_type;
    704772    using vertex_t = MultiplexSetGraph::vertex_descriptor;
     
    707775    const size_t n = num_vertices(mMultiplexSetGraph) - m;
    708776
    709     std::vector<degree_t> remaining(n, 0);
    710     std::vector<vertex_t> chosen_set(m, 0);
     777    degree_t remaining[n];
     778    vertex_t chosen_set[m];
    711779
    712780    for (unsigned i = 0; i != n; ++i) {
    713781        remaining[i] = out_degree(i + m, mMultiplexSetGraph);
     782    }
     783    for (unsigned i = 0; i != m; ++i) {
     784        chosen_set[i] = 0;
    714785    }
    715786
     
    720791        degree_t w = 0;
    721792        for (vertex_t i = 0; i != n; ++i) {
    722             const degree_t r = remaining[i];
    723             if (w < r) {
    724                 k = i;
    725                 w = r;
    726             }
    727         }
    728 
    729         // Multiplexing requires at least 3 elements; if the best set contains fewer than 3, abort.
    730         if (w < 3) {
     793            degree_t r = remaining[i];
     794            if (r > 2) { // if this set has at least 3 elements.
     795                r *= r;
     796                ElementIterator begin, end;
     797                std::tie(begin, end) = out_edges(i + m, mMultiplexSetGraph);
     798                for (auto ei = begin; ei != end; ++ei) {
     799                    for (auto ej = ei; ++ej != end; ) {
     800                        if (edge(target(*ei, mMultiplexSetGraph), target(*ej, mMultiplexSetGraph), mUsageWeightingGraph).second) {
     801                            ++r;
     802                        }
     803                    }
     804                }
     805                if (w < r) {
     806                    k = i;
     807                    w = r;
     808                }
     809            }
     810        }
     811
     812        // Multiplexing requires 3 or more elements; if no set contains at least 3, abort.
     813        if (w == 0) {
    731814            break;
    732815        }
     
    746829        // If this contains 2^n elements for any n, discard the member that is most likely to be added
    747830        // to some future set.
    748         if (is_power_of_2(w)) {
     831        if (LLVM_UNLIKELY(is_power_of_2(w))) {
    749832            vertex_t j = 0;
    750833            degree_t w = 0;
     
    771854
    772855    for (unsigned i = 0; i != m; ++i) {
    773         InEdgeIterator ei, ei_end;
     856        SetIterator ei, ei_end;
    774857        std::tie(ei, ei_end) = in_edges(i, mMultiplexSetGraph);
    775858        for (auto next = ei; ei != ei_end; ei = next) {
     
    795878 ** ------------------------------------------------------------------------------------------------------------- */
    796879void MultiplexingPass::eliminateSubsetConstraints() {
    797 
    798     using SubsetEdgeIterator = graph_traits<SubsetGraph>::edge_iterator;
    799 
    800880    // If Ai ⊂ Aj then the subset graph will contain the arc (i, j). Remove all arcs corresponding to vertices
    801881    // that are not elements of the same multiplexing set.
     
    827907        // Afterwards modify the AST to ensure that multiplexing algorithm can ignore any subset constraints
    828908        for (auto e : make_iterator_range(edges(mSubsetGraph))) {
    829             Advance * adv1 = std::get<0>(mAdvanceAttributes[source(e, mSubsetGraph)]);
    830             Advance * adv2 = std::get<0>(mAdvanceAttributes[target(e, mSubsetGraph)]);
     909            Advance * const adv1 = mAdvance[source(e, mSubsetGraph)];
     910            Advance * const adv2 = mAdvance[target(e, mSubsetGraph)];
    831911            assert (adv1->getParent() == adv2->getParent());
    832912            PabloBlock * const pb = adv1->getParent();
     
    844924 ** ------------------------------------------------------------------------------------------------------------- */
    845925void MultiplexingPass::multiplexSelectedIndependentSets(PabloFunction &) {
    846 
    847     const unsigned first_set = num_vertices(mConstraintGraph);
    848     const unsigned last_set = num_vertices(mMultiplexSetGraph);
    849 
    850     // Preallocate the structures based on the size of the largest multiplex set
    851     size_t max_n = 3;
    852     for (unsigned idx = first_set; idx != last_set; ++idx) {
    853         max_n = std::max<unsigned>(max_n, out_degree(idx, mMultiplexSetGraph));
    854     }
    855 
    856     circular_buffer<PabloAST *> Q(max_n);
    857 
    858     // When entering thus function, the multiplex set graph M is a DAG with edges denoting the set
    859     // relationships of our independent sets.
    860 
    861     for (unsigned idx = first_set; idx != last_set; ++idx) {
     926    const auto first_set = num_vertices(mConstraintGraph);
     927    const auto last_set = num_vertices(mMultiplexSetGraph);
     928    for (auto idx = first_set; idx != last_set; ++idx) {
    862929        const size_t n = out_degree(idx, mMultiplexSetGraph);
    863930        if (n) {
     
    865932            Advance * input[n];
    866933            Advance * muxed[m];
    867 
     934            // The multiplex set graph is a DAG with edges denoting the set relationships of our independent sets.
    868935            unsigned i = 0;
    869936            for (const auto e : make_iterator_range(out_edges(idx, mMultiplexSetGraph))) {
    870                 input[i++] = std::get<0>(mAdvanceAttributes[target(e, mMultiplexSetGraph)]);
    871             }
    872 
     937                input[i++] = mAdvance[target(e, mMultiplexSetGraph)];
     938            }
    873939            Advance * const adv = input[0];
    874940            PabloBlock * const block = adv->getParent(); assert (block);
    875             PabloBuilder builder(block);
    876             block->setInsertPoint(block->back());
    877 
     941            block->setInsertPoint(nullptr);
    878942            /// Perform n-to-m Multiplexing
    879943            for (size_t j = 0; j != m; ++j) {
    880 
    881944                std::ostringstream prefix;
    882945                prefix << "mux" << n << "to" << m << '.' << (j + 1);
     946                Or * muxing = block->createOr(n);
    883947                for (size_t i = 0; i != n; ++i) {
    884948                    if (((i + 1) & (1UL << j)) != 0) {
    885949                        assert (input[i]->getParent() == block);
    886                         Q.push_back(input[i]->getOperand(0));
    887                     }
    888                 }
    889 
    890                 while (Q.size() > 1) {
    891                     PabloAST * a1 = Q.front(); Q.pop_front(); assert (a1);
    892                     PabloAST * a2 = Q.front(); Q.pop_front(); assert (a2);
    893                     assert (!Q.full());
    894                     Q.push_back(builder.createOr(a2, a1, "muxing"));
    895                 }
    896 
    897                 PabloAST * mux = Q.front(); Q.pop_front(); assert (mux);
    898                 // The only way this did not return an Advance statement would be if either the mux or shift amount
    899                 // is zero. Since these cases would have been eliminated earlier, we are safe to cast here.
    900                 muxed[j] = cast<Advance>(builder.createAdvance(mux, adv->getOperand(1), prefix.str()));
    901             }
    902 
     950                        muxing->addOperand(input[i]->getOperand(0));
     951                    }
     952                }
     953                muxed[j] = cast<Advance>(block->createAdvance(muxing, adv->getOperand(1), prefix.str()));
     954            }
    903955            /// Perform m-to-n Demultiplexing
    904956            for (size_t i = 0; i != n; ++i) {
    905 
    906                 // Construct the demuxed values and replaces all the users of the original advances with them.
     957                // Construct the demuxed values and replaces all the users of the original advances with them.               
     958                PabloAST * demuxing[m];
    907959                for (size_t j = 0; j != m; ++j) {
     960                    demuxing[j] = muxed[j];
    908961                    if (((i + 1) & (1UL << j)) == 0) {
    909                         Q.push_back(muxed[j]);
    910                     }
    911                 }
    912 
    913                 if (LLVM_LIKELY(Q.size() > 0)) {
    914                     while (Q.size() > 1) {
    915                         PabloAST * a1 = Q.front(); Q.pop_front(); assert (a1);
    916                         PabloAST * a2 = Q.front(); Q.pop_front(); assert (a2);
    917                         assert (!Q.full());
    918                         Q.push_back(builder.createOr(a2, a1, "demuxing"));
    919                     }
    920                     assert (Q.size() == 1);
    921                     PabloAST * neg = Q.front(); Q.pop_front(); assert (neg);
    922                     Q.push_back(builder.createNot(neg, "demuxing"));
    923                 }
    924 
    925                 for (unsigned j = 0; j != m; ++j) {
    926                     if (((i + 1) & (1ULL << j)) != 0) {
    927                         assert (!Q.full());
    928                         Q.push_back(muxed[j]);
    929                     }
    930                 }
    931 
    932                 while (Q.size() > 1) {
    933                     PabloAST * a1 = Q.front(); Q.pop_front(); assert (a1);
    934                     PabloAST * a2 = Q.front(); Q.pop_front(); assert (a2);
    935                     assert (!Q.full());
    936                     Q.push_back(builder.createAnd(a1, a2, "demuxing"));
    937                 }
    938 
    939                 PabloAST * demuxed = Q.front(); Q.pop_front(); assert (demuxed);
     962                        demuxing[j] = block->createNot(demuxing[j]);
     963                    }
     964                }
     965                And * demuxed = block->createAnd(m);
     966                for (size_t j = 0; j != m; ++j) {
     967                    demuxed->addOperand(demuxing[j]);
     968                }
    940969                input[i]->replaceWith(demuxed, true, true);
    941970            }
     
    945974
    946975/** ------------------------------------------------------------------------------------------------------------- *
     976 * @brief OrderingVerifier
     977 ** ------------------------------------------------------------------------------------------------------------- */
     978struct OrderingVerifier {
     979    OrderingVerifier() : mParent(nullptr) {}
     980    OrderingVerifier(const OrderingVerifier & parent) : mParent(&parent) {}
     981    bool count(const PabloAST * expr) const {
     982        if (mSet.count(expr)) {
     983            return true;
     984        } else if (mParent) {
     985            return mParent->count(expr);
     986        }
     987        return false;
     988    }
     989    void insert(const PabloAST * expr) {
     990        mSet.insert(expr);
     991    }
     992private:
     993    const OrderingVerifier * const mParent;
     994    std::unordered_set<const PabloAST *> mSet;
     995};
     996
     997/** ------------------------------------------------------------------------------------------------------------- *
    947998 * @brief topologicalSort
    948  *
    949  * After transforming the IR, we need to run this in order to always have a valid program. Each multiplex set
    950  * contains vertices corresponding to an Advance in the IR. While we know each Advance within a set is independent
    951  * w.r.t. the transitive closure of their dependencies in the IR, the position of each Advance's dependencies and
    952  * users within the IR isn't taken into consideration. Thus while there must be a valid ordering for the program,
    953  * it's not necessarily the original ordering.
    954999 ** ------------------------------------------------------------------------------------------------------------- */
    9551000void MultiplexingPass::topologicalSort(PabloFunction & function) {
    956     // Note: not a real topological sort. I expect the original order to be very close to the resulting one.
    957     std::unordered_set<const PabloAST *> encountered;
    958     std::stack<Statement *> scope;
    959     for (Statement * stmt = function.getEntryBlock()->front(); ; ) { restart:
    960         while ( stmt ) {
    961             for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
    962                 PabloAST * const op = stmt->getOperand(i);
    963                 if (LLVM_LIKELY(isa<Statement>(op))) {
    964                     if (LLVM_UNLIKELY(encountered.count(op) == 0)) {
    965                         if (LLVM_UNLIKELY(isa<While>(stmt) && isa<Next>(op))) {
    966                             if (encountered.count(cast<Next>(op)->getInitial()) != 0) {
    967                                 continue;
     1001    OrderingVerifier set;
     1002    set.insert(PabloBlock::createZeroes());
     1003    set.insert(PabloBlock::createOnes());
     1004    for (unsigned i = 0; i != function.getNumOfParameters(); ++i) {
     1005        set.insert(function.getParameter(i));
     1006    }
     1007    topologicalSort(function.getEntryBlock(), set);
     1008}
     1009
     1010/** ------------------------------------------------------------------------------------------------------------- *
     1011 * @brief topologicalSort
     1012 ** ------------------------------------------------------------------------------------------------------------- */
     1013void MultiplexingPass::topologicalSort(PabloBlock * block, OrderingVerifier & parent) {
     1014    OrderingVerifier encountered(parent);
     1015    for (Statement * stmt = block->front(); stmt; ) {
     1016        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     1017            topologicalSort(cast<If>(stmt)->getBody(), encountered);
     1018            for (Assign * def : cast<If>(stmt)->getDefined()) {
     1019                encountered.insert(def);
     1020            }
     1021        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
     1022            topologicalSort(cast<While>(stmt)->getBody(), encountered);
     1023            for (Next * var : cast<While>(stmt)->getVariants()) {
     1024                encountered.insert(var);
     1025            }
     1026        }
     1027        bool unmodified = true;
     1028        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     1029            PabloAST * const op = stmt->getOperand(i);
     1030            if (LLVM_UNLIKELY(encountered.count(op) == 0 && isa<Statement>(op))) {
     1031                Statement * const next = stmt->getNextNode();
     1032                Statement * pos = cast<Statement>(op);
     1033                if (cast<Statement>(op)->getParent() != block) {
     1034                    // If we haven't already encountered the Assign or Next node, it must come from a If or
     1035                    // While node that we haven't processed yet. Scan ahead and try to locate it.
     1036                    pos = nullptr;
     1037                    if (isa<Assign>(pos)) {
     1038                        for (pos = cast<Statement>(op); pos; pos = pos->getNextNode()) {
     1039                            if (LLVM_UNLIKELY(isa<If>(pos))) {
     1040                                const auto & defs = cast<If>(pos)->getDefined();
     1041                                if (LLVM_LIKELY(std::find(defs.begin(), defs.end(), op) != defs.end())) {
     1042                                    break;
     1043                                }
    9681044                            }
    9691045                        }
    970                         Statement * const next = stmt->getNextNode();
    971                         stmt->insertAfter(cast<Statement>(op));
    972                         stmt = next;
    973                         goto restart;
    974                     }
    975                 }
    976             }
    977             if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
    978                 // Set the next statement to be the first statement of the inner scope and push the
    979                 // next statement of the current statement into the scope stack.
    980                 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    981                 scope.push(stmt->getNextNode());
    982                 stmt = nested->front();
    983                 continue;
    984             }
     1046                    } else if (isa<Next>(pos)) {
     1047                        for (pos = cast<Statement>(op); pos; pos = pos->getNextNode()) {
     1048                            if (LLVM_UNLIKELY(isa<While>(pos))) {
     1049                                const auto & vars = cast<While>(pos)->getVariants();
     1050                                if (LLVM_LIKELY(std::find(vars.begin(), vars.end(), op) != vars.end())) {
     1051                                    break;
     1052                                }
     1053                            }
     1054                        }
     1055                    }
     1056                    if (LLVM_UNLIKELY(pos == nullptr)) {
     1057                        throw std::runtime_error("Unexpected error: MultiplexingPass failed to topologically sort the function!");
     1058                    }
     1059                }
     1060                stmt->insertAfter(pos);
     1061                stmt = next;
     1062                unmodified = false;
     1063                break;
     1064            }
     1065        }
     1066        if (unmodified) {
    9851067            encountered.insert(stmt);
    9861068            stmt = stmt->getNextNode();
    9871069        }
    988         if (scope.empty()) {
    989             break;
    990         }
    991         stmt = scope.top();
    992         scope.pop();
    9931070    }
    9941071}
     
    10251102}
    10261103
     1104
     1105/** ------------------------------------------------------------------------------------------------------------- *
     1106 * @brief findMaximalCliques
     1107 *
     1108 * Adaptation of the Bron-Kerbosch algorithm.
     1109 ** ------------------------------------------------------------------------------------------------------------- */
     1110inline MultiplexingPass::CliqueSets MultiplexingPass::findMaximalCliques(const CliqueGraph & G) {
     1111    CliqueSets S;
     1112    const auto n = num_vertices(G);
     1113    std::vector<CliqueGraph::vertex_descriptor> ordering;
     1114    ordering.reserve(n);
     1115    for (auto u : make_iterator_range(vertices(G))) {
     1116        if (degree(u, G)) {
     1117            ordering.push_back(u);
     1118        }
     1119    }
     1120    CliqueSet R;
     1121    CliqueSet P(ordering.begin(), ordering.end());   
     1122    CliqueSet X;
     1123    X.reserve(ordering.size());
     1124    // compute a degeneracy ordering of G
     1125    std::sort(ordering.begin(), ordering.end(), [&G](const CliqueGraph::vertex_descriptor i, const CliqueGraph::vertex_descriptor j){ return degree(i, G) < degree(j, G); });
     1126    for (auto v : ordering) {
     1127        R.insert(v);
     1128        CliqueSet PN, XN;
     1129        for (const auto u : make_iterator_range(adjacent_vertices(v, G))) {
     1130            if (P.count(u)) PN.insert(u);
     1131            if (X.count(u)) XN.insert(u);
     1132        }
     1133        findMaximalCliques(G, R, std::move(PN), std::move(XN), S); // ({v}, P ∩ N(v), X ∩ N(v))
     1134        R.clear();
     1135        P.erase(v);
     1136        X.insert(v);
     1137    }
     1138    return S;
     1139}
     1140
     1141/** ------------------------------------------------------------------------------------------------------------- *
     1142 * @brief findMaximalCliques
     1143 ** ------------------------------------------------------------------------------------------------------------- */
     1144void MultiplexingPass::findMaximalCliques(const CliqueGraph & G, CliqueSet & R, CliqueSet && P, CliqueSet && X, CliqueSets & S) {
     1145    if (LLVM_UNLIKELY(P.empty() && X.empty())) { // Report R as a maximal clique
     1146        S.emplace(R.begin(), R.end());
     1147    } else {
     1148        // choose the pivot vertex u in P ∪ X as the vertex with the highest number of neighbors in P (Tomita et al. 2006.)
     1149        CliqueSet N;
     1150        CliqueGraph::degree_size_type size = 0;
     1151        for (const CliqueGraph::vertex_descriptor u : P) {
     1152            if (degree(u, G) >= size) {
     1153                CliqueGraph::degree_size_type neighbours = 0;
     1154                for (const CliqueGraph::vertex_descriptor v : make_iterator_range(adjacent_vertices(u, G))) {
     1155                    neighbours += P.count(v);
     1156                }
     1157                if (size <= neighbours) {
     1158                    if (size < neighbours) {
     1159                        size = neighbours;
     1160                        N.clear();
     1161                    }
     1162                    N.insert(u);
     1163                }
     1164            }
     1165        }
     1166        for (const CliqueGraph::vertex_descriptor u : X) {
     1167            if (degree(u, G) >= size) {
     1168                CliqueGraph::degree_size_type neighbours = 0;
     1169                for (const CliqueGraph::vertex_descriptor v : make_iterator_range(adjacent_vertices(u, G))) {
     1170                    neighbours += P.count(v);
     1171                }
     1172                if (size <= neighbours) {
     1173                    if (size < neighbours) {
     1174                        size = neighbours;
     1175                        N.clear();
     1176                    }
     1177                    N.insert(u);
     1178                }
     1179            }
     1180        }
     1181        const CliqueGraph::vertex_descriptor u = *(N.nth(IntDistribution(0, N.size() - 1)(mRNG)));
     1182        // for each vertex v in P \ N(u):
     1183        for (auto v = P.begin(); v != P.end(); v = P.erase(v)) {
     1184            if (edge(u, *v, G).second) continue;
     1185            const bool added = R.insert(*v).second;
     1186            CliqueSet PN, XN;
     1187            for (const CliqueGraph::vertex_descriptor u : make_iterator_range(adjacent_vertices(*v, G))) {
     1188                if (P.count(u)) PN.insert(u);
     1189                if (X.count(u)) XN.insert(u);
     1190            }
     1191            findMaximalCliques(G, R, std::move(PN), std::move(XN), S); // (R ∪ {v}, P ∩ N(v), X ∩ N(v))
     1192            if (LLVM_LIKELY(added)) R.erase(*v);
     1193            X.insert(*v);
     1194        }
     1195    }
     1196}
     1197
    10271198/** ------------------------------------------------------------------------------------------------------------- *
    10281199 * @brief get
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp

    r4888 r4890  
    44#include <pablo/codegenstate.h>
    55#include <slab_allocator.h>
    6 #include <queue>
    76#include <boost/graph/adjacency_list.hpp>
    87#include <boost/graph/adjacency_matrix.hpp>
     8#include <boost/container/flat_set.hpp>
    99#include <boost/container/flat_map.hpp>
    1010#include <boost/numeric/ublas/matrix.hpp>
     
    1919class PabloBuilder;
    2020class PabloFunction;
     21struct OrderingVerifier;
    2122
    2223class MultiplexingPass {
     
    2930    using MultiplexSetGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS>;
    3031    using SubsetGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS>;
     32    using SubsetEdgeIterator = boost::graph_traits<SubsetGraph>::edge_iterator;
     33    using CliqueGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::undirectedS>;
     34    using CliqueSet = boost::container::flat_set<CliqueGraph::vertex_descriptor>;
     35    using CliqueSets = boost::container::flat_set<std::vector<CliqueGraph::vertex_descriptor>>;
     36
     37    using AdvanceVector = std::vector<Advance *>;
    3138    using AdvanceDepth = std::vector<int>;
    32     using AdvanceAttributes = std::vector<std::pair<Advance *, BDD>>; // the Advance pointer and its respective base BDD variable
     39    using AdvanceVariable = std::vector<BDD>;
    3340    using VertexVector = std::vector<ConstraintVertex>;
    34     using ScopeMap = boost::container::flat_map<const PabloBlock *, Statement *>;
    3541
    3642public:
    37     static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 10, const bool independent = false);
     43
     44    static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 1, const bool independent = false);
     45
    3846protected:
     47
    3948    unsigned initialize(PabloFunction & function, const bool independent);
    4049    void initializeBaseConstraintGraph(PabloBlock * const block, const unsigned statements, const unsigned advances);
     
    4655    bool independent(const ConstraintVertex i, const ConstraintVertex j) const;
    4756    bool exceedsWindowSize(const ConstraintVertex i, const ConstraintVertex j) const;
    48     bool generateCandidateSets(RNG & rng);
    4957
    50     void addCandidateSet(const VertexVector & S, RNG & rng);
    51     void selectMultiplexSets(RNG & rng);
     58    void generateUsageWeightingGraph();
     59    CliqueSets findMaximalCliques(const CliqueGraph & G);
     60    void findMaximalCliques(const CliqueGraph & G, CliqueSet & R, CliqueSet && P, CliqueSet && X, CliqueSets & S);
     61
     62    bool generateCandidateSets();
     63    void addCandidateSet(const VertexVector & S);
     64    void selectMultiplexSets();
     65
     66    void eliminateSubsetConstraints();
    5267    void doTransitiveReductionOfSubsetGraph();
    53     void eliminateSubsetConstraints();
     68
    5469    void multiplexSelectedIndependentSets(PabloFunction & function);
     70
    5571    static void topologicalSort(PabloFunction & function);
     72    static void topologicalSort(PabloBlock * block, OrderingVerifier & parent);
     73
    5674    BDD & get(const PabloAST * const expr);
    5775
    58     inline MultiplexingPass(const unsigned limit, const unsigned maxSelections, const unsigned windowSize)
     76    inline MultiplexingPass(const RNG::result_type seed, const unsigned limit, const unsigned maxSelections, const unsigned windowSize)
    5977    : mMultiplexingSetSizeLimit(limit)
    6078    , mMaxMultiplexingSetSelections(maxSelections)
     
    6280    , mTestConstrainedAdvances(true)
    6381    , mVariables(0)
     82    , mRNG(seed)
    6483    , mConstraintGraph(0)
     84    , mAdvance(0, nullptr)
    6585    , mAdvanceDepth(0, 0)
     86    , mAdvanceNegatedVariable(0, 0)
    6687    {
    6788
     
    7495    const bool                  mTestConstrainedAdvances;
    7596    unsigned                    mVariables;
     97    RNG                         mRNG;
    7698    CharacterizationMap         mCharacterization;
    77     ConstraintGraph             mConstraintGraph;
     99    ConstraintGraph             mConstraintGraph;   
     100    AdvanceVector               mAdvance;
    78101    AdvanceDepth                mAdvanceDepth;
     102    AdvanceVariable             mAdvanceNegatedVariable;
    79103    SubsetGraph                 mSubsetGraph;
    80     AdvanceAttributes           mAdvanceAttributes;
     104    CliqueGraph                 mUsageWeightingGraph;
    81105    MultiplexSetGraph           mMultiplexSetGraph;
    82     ScopeMap                    mResolvedScopes;
    83106};
    84107
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r4886 r4890  
    141141    if (isReassociative(stmt)) {
    142142        return fold(cast<Variadic>(stmt), block);
     143    } else if (isa<Not>(stmt)) {
     144        PabloAST * value = stmt->getOperand(0);
     145        if (LLVM_UNLIKELY(isa<Not>(value))) {
     146            return cast<Not>(value)->getOperand(0);
     147        } else if (LLVM_UNLIKELY(isa<Zeroes>(value))) {
     148            return block->createOnes();
     149        }  else if (LLVM_UNLIKELY(isa<Ones>(value))) {
     150            return block->createZeroes();
     151        }
     152    } else if (isa<Advance>(stmt)) {
     153        if (LLVM_UNLIKELY(isa<Zeroes>(stmt->getOperand(0)))) {
     154            return block->createZeroes();
     155        }
    143156    } else {
    144157        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
    145158            if (LLVM_UNLIKELY(isa<Zeroes>(stmt->getOperand(i)))) {
    146159                switch (stmt->getClassTypeId()) {
    147                     case PabloAST::ClassTypeId::Advance:
    148                         return block->createZeroes();
    149                     case PabloAST::ClassTypeId::Not:
    150                         return block->createOnes();
    151160                    case PabloAST::ClassTypeId::Sel:
    152161                        block->setInsertPoint(stmt->getPrevNode());
     
    164173                block->setInsertPoint(stmt->getPrevNode());
    165174                switch (stmt->getClassTypeId()) {
    166                     case PabloAST::ClassTypeId::Not:
    167                         return block->createZeroes();
    168175                    case PabloAST::ClassTypeId::Sel:
    169176                        block->setInsertPoint(stmt->getPrevNode());
     
    186193                }
    187194            }
    188         }
    189         return nullptr;
    190     }
     195        }       
     196    }
     197    return nullptr;
    191198}
    192199
  • icGREP/icgrep-devel/icgrep/pablo/passes/factorizedfg.cpp

    r4886 r4890  
    44#include <pablo/analysis/pabloverifier.hpp>
    55#include <pablo/optimizers/pablo_simplifier.hpp>
    6 #include <pablo/optimizers/codemotionpass.h>
     6#include <pablo/passes/flattenassociativedfg.h>
    77#include <boost/container/flat_set.hpp>
    88#include <set>
     
    239239
    240240/** ------------------------------------------------------------------------------------------------------------- *
    241  * @brief factorize
    242  ** ------------------------------------------------------------------------------------------------------------- */
    243 inline void FactorizeDFG::factorize(Variadic * var) {
     241 * @brief Common Subexpression Elimination
     242 ** ------------------------------------------------------------------------------------------------------------- */
     243inline void FactorizeDFG::CSE(Variadic * var) {
    244244    while (var->getNumOperands() > 2) {
    245         unsigned numOperands = var->getNumOperands();
    246245        BicliqueSet bicliques = enumerateBicliques(var);
    247246        if (bicliques.empty()) {
     
    268267            cast<Variadic>(user)->addOperand(factored);
    269268        }
    270         assert (var->getNumOperands() < numOperands);
    271     }
    272 }
    273 
    274 /** ------------------------------------------------------------------------------------------------------------- *
    275  * @brief factorize
    276  ** ------------------------------------------------------------------------------------------------------------- */
    277 void FactorizeDFG::factorize(PabloBlock * const block) {
     269    }
     270}
     271
     272/** ------------------------------------------------------------------------------------------------------------- *
     273 * @brief Common Subexpression Elimination
     274 ** ------------------------------------------------------------------------------------------------------------- */
     275void FactorizeDFG::CSE(PabloBlock * const block) {
    278276    Statement * stmt = block->front();
    279277    while (stmt) {
    280278        if (isa<If>(stmt) || isa<While>(stmt)) {           
    281             factorize(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
     279            CSE(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
    282280        } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) {
    283             factorize(cast<Variadic>(stmt));
     281            CSE(cast<Variadic>(stmt));
    284282        }
    285283        stmt = stmt->getNextNode();
     
    288286
    289287/** ------------------------------------------------------------------------------------------------------------- *
    290  * @brief initialize
     288 * @brief preScanDFG
    291289 ** ------------------------------------------------------------------------------------------------------------- */
    292290void FactorizeDFG::initialize(const PabloBlock * const block, const unsigned depth) {
     
    303301
    304302/** ------------------------------------------------------------------------------------------------------------- *
    305  * @brief initialize
    306  ** ------------------------------------------------------------------------------------------------------------- */
    307 inline void FactorizeDFG::initialize(const PabloFunction & function) {
     303 * @brief preScanDFG
     304 ** ------------------------------------------------------------------------------------------------------------- */
     305inline void FactorizeDFG::initialize(const PabloFunction &function) {
    308306    mScopeDepth.emplace(function.getEntryBlock(), 0);
    309307    initialize(function.getEntryBlock(), 1);
     
    316314    FactorizeDFG ldfg;
    317315    ldfg.initialize(function);
    318     ldfg.factorize(function.getEntryBlock());
     316    ldfg.CSE(function.getEntryBlock());
    319317    #ifndef NDEBUG
    320318    PabloVerifier::verify(function, "post-factorize");
  • icGREP/icgrep-devel/icgrep/pablo/passes/factorizedfg.h

    r4886 r4890  
    1010class PabloBlock;
    1111class Variadic;
     12class Not;
    1213class Statement;
    1314class PabloAST;
     
    2122    void initialize(const PabloFunction & function);
    2223    void initialize(const PabloBlock * const block, const unsigned depth);
    23     void factorize(PabloBlock * const block);
    24     void factorize(Variadic * const var);
     24    void CSE(PabloBlock * const block);
     25    void CSE(Variadic * const var);
    2526    PabloBlock * chooseInsertionScope(const VertexSet & users);
    2627    void findInsertionPoint(const VertexSet & operands, PabloBlock * const scope);
  • icGREP/icgrep-devel/icgrep/pablo/passes/flattenassociativedfg.h

    r4887 r4890  
    1313class FlattenAssociativeDFG {
    1414    friend class DistributivePass;
     15    friend class FactorizeDFG;
    1516public:
    1617    static void transform(PabloFunction & function);
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4889 r4890  
    7979
    8080#ifdef ENABLE_MULTIPLEXING
    81 static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(true),
     81static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
    8282                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
    8383                                        cl::cat(cPabloOptimizationsOptions));
     
    8989                                        cl::desc("maximum number of selections from any partial candidate multiplexing set."),
    9090                                        cl::cat(cPabloOptimizationsOptions));
    91 static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(100),
     91static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(1),
    9292                                        cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."),
    9393                                        cl::cat(cPabloOptimizationsOptions));
     
    152152    }
    153153#ifdef ENABLE_MULTIPLEXING
    154     if (EnableLowering || EnableMultiplexing || EnableDistribution) {
     154    if (EnableLowering || EnableDistribution || EnableMultiplexing) {
    155155        FlattenAssociativeDFG::transform(*function);
    156156    }
     
    160160    }
    161161#ifdef ENABLE_MULTIPLEXING   
     162    if (EnableDistribution) {
     163        DistributivePass::optimize(*function);
     164    }
    162165    if (EnableMultiplexing) {
    163166        MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
    164167    }
    165     if (EnableDistribution) {
    166         DistributivePass::optimize(*function);
    167     }
    168     if (EnableLowering || EnableMultiplexing || EnableDistribution) {
     168    if (EnableLowering || EnableDistribution || EnableMultiplexing) {
    169169        FactorizeDFG::transform(*function);
    170170    }
    171171#endif
    172172    if (PrintOptimizedREcode) {
     173        PabloVerifier::verify(*function, "post-optimization");
    173174        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    174175        llvm::raw_os_ostream cerr(std::cerr);
Note: See TracChangeset for help on using the changeset viewer.