Ignore:
Timestamp:
Jan 21, 2016, 5:15:33 PM (4 years ago)
Author:
nmedfort
Message:

Work on lowering + some timing and papi information that will be cleaned up later.

Location:
icGREP/icgrep-devel/icgrep/pablo/optimizers
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/booleanreassociationpass.cpp

    r4899 r4919  
    2222
    2323using TypeId = PabloAST::ClassTypeId;
    24 using Graph = BooleanReassociationPass::Graph;
    25 using Vertex = Graph::vertex_descriptor;
     24using DependencyGraph = BooleanReassociationPass::Graph;
     25using Vertex = DependencyGraph::vertex_descriptor;
    2626using VertexData = BooleanReassociationPass::VertexData;
    2727using SinkMap = BooleanReassociationPass::Map;
    2828using DistributionGraph = adjacency_list<hash_setS, vecS, bidirectionalS, Vertex>;
    29 using DistributionMap = flat_map<Graph::vertex_descriptor, DistributionGraph::vertex_descriptor>;
     29using DistributionMap = flat_map<DependencyGraph::vertex_descriptor, DistributionGraph::vertex_descriptor>;
    3030using VertexSet = std::vector<Vertex>;
    3131using VertexSets = std::vector<VertexSet>;
     
    3939 ** ------------------------------------------------------------------------------------------------------------- */
    4040template<typename Iterator>
    41 inline Graph::edge_descriptor first(const std::pair<Iterator, Iterator> & range) {
     41inline DependencyGraph::edge_descriptor first(const std::pair<Iterator, Iterator> & range) {
    4242    assert (range.first != range.second);
    4343    return *range.first;
     
    4545
    4646#ifndef NDEBUG
    47 static bool no_path(const Vertex u, const Vertex v, const Graph & G) {
     47static bool no_path(const Vertex u, const Vertex v, const DependencyGraph & G) {
    4848    if (u == v) return false;
    4949    flat_set<Vertex> V;
     
    7070#endif
    7171
    72 inline void add_edge(PabloAST * expr, const Vertex u, const Vertex v, Graph & G) {
     72inline void add_edge(PabloAST * expr, const Vertex u, const Vertex v, DependencyGraph & G) {
    7373    assert (no_path(v, u, G));
    7474    // Make sure each edge is unique
     
    821821 * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique.
    822822 ** ------------------------------------------------------------------------------------------------------------- */
    823 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const Graph & G, DistributionGraph & H) {
     823static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const DependencyGraph & G, DistributionGraph & H) {
    824824    for (auto ci = cliques.begin(); ci != cliques.end(); ) {
    825825        const auto cardinalityA = std::get<0>(*ci).size();
     
    844844 * @brief safeDistributionSets
    845845 ** ------------------------------------------------------------------------------------------------------------- */
    846 static DistributionSets safeDistributionSets(const Graph & G, DistributionGraph & H) {
     846static DistributionSets safeDistributionSets(const DependencyGraph & G, DistributionGraph & H) {
    847847
    848848    VertexSet sinks;
     
    868868 * @brief generateDistributionGraph
    869869 ** ------------------------------------------------------------------------------------------------------------- */
    870 void generateDistributionGraph(const Graph & G, DistributionGraph & H) {
     870void generateDistributionGraph(const DependencyGraph & G, DistributionGraph & H) {
    871871    DistributionMap M;
    872872    for (const Vertex u : make_iterator_range(vertices(G))) {
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.cpp

    r4896 r4919  
    1313 ** ------------------------------------------------------------------------------------------------------------- */
    1414bool CodeMotionPass::optimize(PabloFunction & function) {
    15     CodeMotionPass::process(function.getEntryBlock());
     15    CodeMotionPass::global(function.getEntryBlock());
    1616    #ifndef NDEBUG
    1717    PabloVerifier::verify(function, "post-code-motion");
     
    2323 * @brief process
    2424 ** ------------------------------------------------------------------------------------------------------------- */
    25 void CodeMotionPass::process(PabloBlock * const block) {
     25void CodeMotionPass::global(PabloBlock * const block) {
    2626    sink(block);
    2727    for (Statement * stmt : *block) {
    2828        if (isa<If>(stmt)) {
    29             process(cast<If>(stmt)->getBody());
     29            global(cast<If>(stmt)->getBody());
    3030        } else if (isa<While>(stmt)) {
    31             process(cast<While>(stmt)->getBody());
     31            global(cast<While>(stmt)->getBody());
    3232            // TODO: if we analyzed the probability of this loop being executed once, twice, or many times, we could
    3333            // determine whether hoisting will helpful or harmful to the expected run time.
     
    132132                // must be the LCA of our original scopes.
    133133                while (scope1 != scope2) {
     134                    assert (scope1 && scope2);
    134135                    scope1 = scope1->getParent();
    135136                    scope2 = scope2->getParent();
    136137                }
    137                 assert (scope1 && scope2);
     138                assert (scope1);
    138139                // But if the LCA is the current block, we can't sink the statement.
    139140                if (scope1 == block) {
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.h

    r4896 r4919  
    3131    static bool optimize(PabloFunction & function);
    3232protected:
    33     static void process(PabloBlock * const block);
     33    static void global(PabloBlock * const block);
    3434    static bool isAcceptableTarget(Statement *stmt, ScopeSet & scopeSet, const PabloBlock * const block);
    3535    static void sink(PabloBlock * const block);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/distributivepass.cpp

    r4899 r4919  
    1414namespace pablo {
    1515
    16 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>;
    17 using Vertex = Graph::vertex_descriptor;
     16using DependencyGraph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>;
     17using Vertex = DependencyGraph::vertex_descriptor;
    1818using SinkMap = flat_map<PabloAST *, Vertex>;
    1919using VertexSet = std::vector<Vertex>;
     
    2828 * @brief getVertex
    2929 ** ------------------------------------------------------------------------------------------------------------- */
    30 static inline Vertex getVertex(PabloAST * value, Graph & G, SinkMap & M) {
     30static inline Vertex getVertex(PabloAST * value, DependencyGraph & G, SinkMap & M) {
    3131    const auto f = M.find(value);
    3232    if (f != M.end()) {
     
    4343 * Generate a graph G describing the potential applications of the distributive law for the given block.
    4444 ** ------------------------------------------------------------------------------------------------------------- */
    45 VertexSet generateDistributionGraph(PabloBlock * block, Graph & G) {
     45VertexSet generateDistributionGraph(PabloBlock * block, DependencyGraph & G) {
    4646    SinkMap M;
    4747    VertexSet distSet;
     
    126126 ** ------------------------------------------------------------------------------------------------------------- */
    127127template <unsigned side>
    128 inline static BicliqueSet && independentCliqueSets(BicliqueSet && cliques, const unsigned minimum) {
     128inline static BicliqueSet && independentCliqueSets(BicliqueSet && bicliques, const unsigned minimum) {
    129129    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
    130130
    131     const auto l = cliques.size();
     131    const auto l = bicliques.size();
    132132    IndependentSetGraph I(l);
    133133
    134     // Initialize our weights
     134    // Initialize our weights and determine the constraints
    135135    for (unsigned i = 0; i != l; ++i) {
    136         I[i] = std::pow(std::get<side>(cliques[i]).size(), 2);
    137     }
    138 
    139     // Determine our constraints
    140     for (unsigned i = 0; i != l; ++i) {
     136        I[i] = std::pow(std::get<side>(bicliques[i]).size(), 2);
    141137        for (unsigned j = i + 1; j != l; ++j) {
    142             if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) {
     138            if (intersects(std::get<side>(bicliques[i]), std::get<side>(bicliques[j]))) {
    143139                add_edge(i, j, I);
    144140            }
    145141        }
    146142    }
     143
     144//    // Initialize our weights and determine the constraints
     145//    for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
     146//        I[std::distance(bicliques.begin(), i)] = std::pow(std::get<side>(*i).size(), 2);
     147//        for (auto j = i; ++j != bicliques.end(); ) {
     148//            if (intersects(i->second, j->second) && intersects(i->first, j->first)) {
     149//                add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
     150//            }
     151//        }
     152//    }
    147153
    148154    // Use the greedy algorithm to choose our independent set
     
    167173    // Sort the selected list and then remove the unselected cliques
    168174    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
    169     auto end = cliques.end();
     175    auto end = bicliques.end();
    170176    for (const unsigned offset : selected) {
    171         end = cliques.erase(cliques.begin() + offset + 1, end) - 1;
    172     }
    173     cliques.erase(cliques.begin(), end);
    174 
    175     return std::move(cliques);
     177        end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
     178    }
     179    bicliques.erase(bicliques.begin(), end);
     180
     181    return std::move(bicliques);
    176182}
    177183
     
    183189 * bipartition A and their adjacencies to be in B.
    184190  ** ------------------------------------------------------------------------------------------------------------- */
    185 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) {
     191static BicliqueSet enumerateBicliques(const DependencyGraph & G, const VertexSet & A) {
    186192    using IntersectionSets = std::set<VertexSet>;
    187193
     
    265271 * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique.
    266272 ** ------------------------------------------------------------------------------------------------------------- */
    267 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, Graph & G) {
     273static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, DependencyGraph & G) {
    268274    for (auto ci = cliques.begin(); ci != cliques.end(); ) {
    269275        const auto cardinalityA = std::get<0>(*ci).size();
     
    288294 * @brief safeDistributionSets
    289295 ** ------------------------------------------------------------------------------------------------------------- */
    290 static DistributionSets safeDistributionSets(Graph & G, const VertexSet & distSet) {
     296static DistributionSets safeDistributionSets(DependencyGraph & G, const VertexSet & distSet) {
    291297    DistributionSets T;
    292298    BicliqueSet lowerSet = independentCliqueSets<1>(removeUnhelpfulBicliques(enumerateBicliques(G, distSet), G), 1);
     
    309315        // FlattenAssociativeDFG::coalesce(block, false);
    310316
    311         Graph G;
     317        DependencyGraph G;
    312318
    313319        const VertexSet distSet = generateDistributionGraph(block, G);
     
    350356                outerOp->addOperand(G[u]);
    351357            }
    352             FlattenAssociativeDFG::coalesce(outerOp);
     358            CoalesceDFG::coalesce(outerOp);
    353359
    354360            for (const Vertex u : sources) {
     
    359365            }
    360366            innerOp->addOperand(outerOp);
    361             FlattenAssociativeDFG::coalesce(innerOp);
     367            CoalesceDFG::coalesce(innerOp);
    362368
    363369            for (const Vertex u : sinks) {
    364                 Variadic * const resultOp = cast<Variadic>(G[u]);
    365                 resultOp->addOperand(innerOp);
    366                 FlattenAssociativeDFG::coalesce(resultOp);
     370                cast<Variadic>(G[u])->addOperand(innerOp);
    367371            }
    368372        }
     
    391395    #endif
    392396    Simplifier::optimize(function);
    393 //    FlattenAssociativeDFG::deMorgansReduction(function.getEntryBlock());
    394 //    #ifndef NDEBUG
    395 //    PabloVerifier::verify(function, "post-demorgans-reduction");
    396 //    #endif
    397 //    Simplifier::optimize(function);
    398 }
    399 
    400 
    401 }
     397}
     398
     399
     400}
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4899 r4919  
    1111#include <pablo/analysis/pabloverifier.hpp>
    1212#include <pablo/optimizers/pablo_simplifier.hpp>
     13#include <pablo/builder.hpp>
    1314#include <stack>
    1415#include <queue>
     
    2324using namespace boost::numeric::ublas;
    2425
    25 #define PRINT_DEBUG_OUTPUT
     26// #define PRINT_DEBUG_OUTPUT
    2627
    2728#if !defined(NDEBUG) && !defined(PRINT_DEBUG_OUTPUT)
     
    102103namespace pablo {
    103104
     105#ifdef PRINT_TIMING_INFORMATION
     106MultiplexingPass::seed_t MultiplexingPass::SEED = 0;
     107unsigned MultiplexingPass::NODES_ALLOCATED = 0;
     108unsigned MultiplexingPass::NODES_USED = 0;
     109#endif
     110
    104111using TypeId = PabloAST::ClassTypeId;
    105112
    106113bool MultiplexingPass::optimize(PabloFunction & function, const unsigned limit, const unsigned maxSelections, const unsigned windowSize, const bool independent) {
    107114
    108 //    std::random_device rd;
    109 //    const auto seed = rd();
    110     const auto seed = 83234827342;
     115    std::random_device rd;
     116    const RNG::result_type seed = rd();
     117    // const RNG::result_type seed = 83234827342;
    111118
    112119    LOG("Seed:                    " << seed);
     120
     121    #ifdef PRINT_TIMING_INFORMATION
     122    MultiplexingPass::SEED = seed;
     123    MultiplexingPass::NODES_ALLOCATED = 0;
     124    MultiplexingPass::NODES_USED = 0;
     125    #endif
    113126
    114127    MultiplexingPass mp(seed, limit, maxSelections, windowSize);
     
    132145
    133146    LOG("Nodes in BDD:             " << bdd_getnodenum() << " of " << bdd_getallocnum());
     147
     148    #ifdef PRINT_TIMING_INFORMATION
     149    MultiplexingPass::NODES_ALLOCATED = bdd_getallocnum();
     150    MultiplexingPass::NODES_USED = bdd_getnodenum();
     151    #endif
    134152
    135153    RECORD_TIMESTAMP(start_shutdown);
     
    491509.
    492510                for (auto e : make_iterator_range(in_edges(i, mSubsetGraph))) {
    493                     unconstrained[source(e, mSubsetGraph)] = true;
     511                    const auto j = source(e, mSubsetGraph);
     512                    if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {
     513                        continue;
     514                    }
     515                    unconstrained[j] = true;
    494516                }
    495517                unconstrained[i] = true;
     
    503525                    const auto j = source(e, mSubsetGraph);
    504526                    add_edge(j, k, mSubsetGraph);
     527                    if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {
     528                        continue;
     529                    }
    505530                    unconstrained[j] = true;
    506531                }
     
    513538                    const auto j = target(e, mSubsetGraph);
    514539                    add_edge(k, j, mSubsetGraph);
     540                    if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {
     541                        continue;
     542                    }
    515543                    unconstrained[j] = true;
    516544                }
     
    527555            // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results
    528556            // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by
    529             // the conjunction of variable representing that Advance and the negation of all variables for the Advances in which
    530             // the inputs are deemed to be mutually exclusive. For example, if the input of the i-th Advance is mutually exclusive
     557            // the conjunction of variable representing the k-th Advance and the negation of all variables for the Advances whose
     558            // inputs are mutually exclusive with the k-th input. For example, if the input of the i-th Advance is mutually exclusive
    531559            // with the input of the j-th and k-th Advance, the BDD of the i-th Advance is Ai ∧ ¬Aj ∧ ¬Ak.
    532560            const Advance * const ithAdv = mAdvance[i];
     
    943971            Advance * const adv = input[0];
    944972            PabloBlock * const block = adv->getParent(); assert (block);
    945             block->setInsertPoint(nullptr);           
    946             /// Perform n-to-m Multiplexing
     973            block->setInsertPoint(nullptr);
     974            circular_buffer<PabloAST *> Q(n);
     975            PabloBuilder builder(block);
     976            /// Perform n-to-m Multiplexing           
    947977            for (size_t j = 0; j != m; ++j) {
    948978                std::ostringstream prefix;
    949979                prefix << "mux" << n << "to" << m << '.' << (j + 1);
    950                 Or * muxing = block->createOr(n);
     980//                Or * muxing = block->createOr(n);
     981//                for (size_t i = 0; i != n; ++i) {
     982//                    if (((i + 1) & (1UL << j)) != 0) {
     983//                        assert (input[i]->getParent() == block);
     984//                        muxing->addOperand(input[i]->getOperand(0));
     985//                    }
     986//                }
    951987                for (size_t i = 0; i != n; ++i) {
    952988                    if (((i + 1) & (1UL << j)) != 0) {
    953                         assert (input[i]->getParent() == block);
    954                         muxing->addOperand(input[i]->getOperand(0));
    955                     }
    956                 }
    957                 muxed[j] = block->createAdvance(muxing, adv->getOperand(1), prefix.str());
    958                 muxed_n[j] = block->createNot(muxed[j]);
    959             }
    960             /// Perform m-to-n Demultiplexing
     989                        Q.push_back(input[i]->getOperand(0));
     990                    }
     991                }
     992                while (Q.size() > 1) {
     993                    PabloAST * a = Q.front(); Q.pop_front();
     994                    PabloAST * b = Q.front(); Q.pop_front();
     995                    Q.push_back(builder.createOr(a, b));
     996                }
     997                PabloAST * muxing =  Q.front(); Q.clear();
     998                muxed[j] = builder.createAdvance(muxing, adv->getOperand(1), prefix.str());
     999                muxed_n[j] = builder.createNot(muxed[j]);
     1000            }
     1001            /// Perform m-to-n Demultiplexing           
    9611002            for (size_t i = 0; i != n; ++i) {
    962                 // Construct the demuxed values and replaces all the users of the original advances with them.               
    963                 And * demuxed = block->createAnd(m);
     1003                // Construct the demuxed values and replaces all the users of the original advances with them.
     1004                assert (Q.empty());
    9641005                for (size_t j = 0; j != m; ++j) {
    965                     demuxed->addOperand((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]);
    966                 }
     1006                    Q.push_back((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]);
     1007                }
     1008                while (Q.size() > 1) {
     1009                    PabloAST * a = Q.front(); Q.pop_front();
     1010                    PabloAST * b = Q.front(); Q.pop_front();
     1011                    Q.push_back(builder.createAnd(a, b));
     1012                }
     1013                PabloAST * demuxed =  Q.front(); Q.clear();
     1014//                And * demuxed = block->createAnd(m);
     1015//                for (size_t j = 0; j != m; ++j) {
     1016//                    demuxed->addOperand((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]);
     1017//                }
    9671018                input[i]->replaceWith(demuxed, true, true);
    9681019            }
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp

    r4896 r4919  
    4444
    4545    static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 1, const bool independent = false);
    46 
     46    #ifdef PRINT_TIMING_INFORMATION
     47    using seed_t = RNG::result_type;
     48    static seed_t SEED;
     49    static unsigned NODES_ALLOCATED;
     50    static unsigned NODES_USED;
     51    #endif
    4752protected:
    4853
     
    8186    , mWindowSize(windowSize)
    8287    , mTestConstrainedAdvances(true)
     88    , mSubsetImplicationsAdhereToWindowingSizeConstraint(false)
    8389    , mVariables(0)
    8490    , mRNG(seed)
     
    96102    const unsigned              mWindowSize;
    97103    const bool                  mTestConstrainedAdvances;
     104    const bool                  mSubsetImplicationsAdhereToWindowingSizeConstraint;
    98105    unsigned                    mVariables;
    99106    RNG                         mRNG;
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r4899 r4919  
    55#include <pablo/printer_pablos.h>
    66#include <pablo/analysis/pabloverifier.hpp>
    7 #ifdef USE_BOOST
    87#include <boost/container/flat_set.hpp>
    9 #else
    10 #include <unordered_set>
    11 #endif
    128
    139namespace pablo {
    1410
    15 #ifdef USE_BOOST
    1611template <typename Type>
    1712using SmallSet = boost::container::flat_set<Type>;
    18 #else
    19 template <typename Type>
    20 using SmallSet = std::unordered_set<Type>;
    21 #endif
    2213
    2314/** ------------------------------------------------------------------------------------------------------------- *
     
    2516 ** ------------------------------------------------------------------------------------------------------------- */
    2617bool Simplifier::optimize(PabloFunction & function) {
     18    // negationsShouldImmediatelySucceedTheirLiteral(function.getEntryBlock());
     19    #ifndef NDEBUG
     20    PabloVerifier::verify(function, "post-negation-must-immediately-succeed-their-literal");
     21    #endif
    2722    eliminateRedundantCode(function.getEntryBlock());
    2823    #ifndef NDEBUG
     
    476471}
    477472
    478 }
     473/** ------------------------------------------------------------------------------------------------------------- *
     474 * @brief negationsShouldImmediatelySucceedTheirLiteral
     475 *
     476 * Assuming that most negations are actually replaced by an ANDC operations or that we only need a literal or its
     477 * negation, by pushing all negations up to immediately succeed the literal we should generate equally efficient
     478 * code whilst simplifying analysis.
     479 *
     480 * TODO: this theory needs evaluation.
     481 ** ------------------------------------------------------------------------------------------------------------- */
     482void Simplifier::negationsShouldImmediatelySucceedTheirLiteral(PabloBlock * const block) {
     483    Statement * stmt = block->front();
     484    while (stmt) {
     485        Statement * next = stmt->getNextNode();
     486        if (isa<If>(stmt)) {
     487            negationsShouldImmediatelySucceedTheirLiteral(cast<If>(stmt)->getBody());
     488        } else if (isa<While>(stmt)) {
     489            negationsShouldImmediatelySucceedTheirLiteral(cast<While>(stmt)->getBody());
     490        } else if (isa<Not>(stmt)) {
     491            PabloAST * op = cast<Not>(stmt)->getExpr();
     492            if (LLVM_LIKELY(isa<Statement>(op))) {
     493                PabloBlock * scope = cast<Statement>(op)->getParent();
     494                // If the operand is not in a nested scope, we can move it.
     495                for (;;) {
     496                    if (LLVM_UNLIKELY(scope == nullptr)) {
     497                        stmt->insertAfter(cast<Statement>(op));
     498                        break;
     499                    }
     500                    scope = scope->getParent();
     501                    if (LLVM_UNLIKELY(scope == block)) {
     502                        break;
     503                    }
     504                }
     505            }
     506        }
     507        stmt = next;
     508    }
     509}
     510
     511}
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.hpp

    r4886 r4919  
    1515    Simplifier() = default;
    1616private:
     17    static void negationsShouldImmediatelySucceedTheirLiteral(PabloBlock * const block);
    1718    static void eliminateRedundantCode(PabloBlock * const block, ExpressionTable * predecessor = nullptr);
    1819    static PabloAST * fold(Variadic * const var, PabloBlock * const block);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/schedulingprepass.cpp

    r4899 r4919  
    33#include <boost/circular_buffer.hpp>
    44#include <boost/container/flat_set.hpp>
     5#include <boost/container/flat_map.hpp>
    56#include <pablo/analysis/pabloverifier.hpp>
     7#include <boost/graph/adjacency_list.hpp>
     8#include <unordered_map>
    69
    710#include <pablo/printer_pablos.h>
     
    1316namespace pablo {
    1417
     18using DependencyGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, std::vector<PabloAST *>>;
     19using Vertex = DependencyGraph::vertex_descriptor;
     20using Map = std::unordered_map<const PabloAST *, Vertex>;
     21using ReadyPair = std::pair<unsigned, Vertex>;
     22using ReadySet = std::vector<ReadyPair>;
     23
     24using weight_t = unsigned;
     25using TypeId = PabloAST::ClassTypeId;
     26using LiveSet = flat_set<Vertex>;
     27
     28void schedule(PabloBlock * const block);
     29
     30/** ------------------------------------------------------------------------------------------------------------- *
     31 * @brief printGraph
     32 ** ------------------------------------------------------------------------------------------------------------- */
     33template <typename DependencyGraph>
     34static void printGraph(const DependencyGraph & G, const std::vector<unsigned> & ordering, const std::string name, raw_ostream & out) {
     35    out << "*******************************************\n";
     36    out << "digraph " << name << " {\n";
     37    for (auto u : make_iterator_range(vertices(G))) {
     38        if (G[u].empty()) {
     39            continue;
     40        }
     41        out << "v" << u << " [label=\"" << ordering[u] << " : ";
     42        bool newLine = false;
     43        for (PabloAST * expr : G[u]) {
     44            if (newLine) {
     45                out << '\n';
     46            }
     47            newLine = true;
     48            if (isa<Statement>(expr)) {
     49                PabloPrinter::print(cast<Statement>(expr), out);
     50            } else {
     51                PabloPrinter::print(expr, out);
     52            }
     53        }
     54        out << "\"];\n";
     55    }
     56    for (auto e : make_iterator_range(edges(G))) {
     57        out << "v" << source(e, G) << " -> v" << target(e, G);
     58        out << ";\n";
     59    }
     60
     61    out << "}\n\n";
     62    out.flush();
     63}
     64
    1565/** ------------------------------------------------------------------------------------------------------------- *
    1666 * @brief resolveNestedUsages
    1767 ** ------------------------------------------------------------------------------------------------------------- */
    18 void SchedulingPrePass::resolveNestedUsages(Statement * const root, Statement * const stmt, Graph & G, Map & M, PabloBlock * const block) {
     68static void resolveNestedUsages(Statement * const root, Statement * const stmt, DependencyGraph & G, Map & M, PabloBlock * const block) {
    1969    for (PabloAST * use : stmt->users()) {
    2070        if (LLVM_LIKELY(isa<Statement>(use))) {
     
    2373                for (PabloBlock * prior = scope->getParent(); prior; scope = prior, prior = prior->getParent()) {
    2474                    if (prior == block) {
    25                         auto s = mResolvedScopes.find(scope);
    26                         assert (s != mResolvedScopes.end());
    27                         assert (s->second);
    28                         auto v = M.find(s->second);
     75                        assert (scope->getBranch());
     76                        auto v = M.find(scope->getBranch());
    2977                        assert (v != M.end());
    3078                        auto u = M.find(root);
     
    4189}
    4290
    43 ///** ------------------------------------------------------------------------------------------------------------- *
    44 // * @brief printGraph
    45 // ** ------------------------------------------------------------------------------------------------------------- */
    46 //template <typename Graph>
    47 //static void printGraph(const Graph & G, const std::vector<unsigned> & ordering, const std::string name, raw_ostream & out) {
    48 //    out << "*******************************************\n";
    49 //    out << "digraph " << name << " {\n";
    50 //    for (auto u : make_iterator_range(vertices(G))) {
    51 //        out << "v" << u << " [label=\"" << ordering[u] << " : ";
    52 //        PabloPrinter::print(G[u], out);
    53 //        out << "\"];\n";
    54 //    }
    55 //    for (auto e : make_iterator_range(edges(G))) {
    56 //        out << "v" << source(e, G) << " -> v" << target(e, G);
    57 //        out << ";\n";
    58 //    }
    59 
    60 //    out << "}\n\n";
    61 //    out.flush();
    62 //}
    63 
    64 /** ------------------------------------------------------------------------------------------------------------- *
    65  * @brief schedule
    66  ** ------------------------------------------------------------------------------------------------------------- */
    67 void SchedulingPrePass::schedule(PabloBlock * const block) {
    68     Graph G;
     91/** ------------------------------------------------------------------------------------------------------------- *
     92 * @brief resolveNestedUsages
     93 ** ------------------------------------------------------------------------------------------------------------- */
     94static void computeDependencyGraph(DependencyGraph & G, PabloBlock * const block) {
    6995    Map M;
    70 
    7196    // Construct a use-def graph G representing the current scope block
    7297    for (Statement * stmt : *block) {
    73         if (isa<If>(stmt) || isa<While>(stmt)) {
    74             PabloBlock * body = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    75             mResolvedScopes.emplace(body, stmt);
    76             schedule(body);
    77         }
    78         const auto u = add_vertex(stmt, G);
     98        if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     99            schedule(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
     100        }
     101        const auto u = add_vertex({stmt}, G);
    79102        M.insert(std::make_pair(stmt, u));
    80103        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     
    94117                        // Was this instruction computed by a nested block?
    95118                        if (prior == block) {
    96                             auto s = mResolvedScopes.find(scope);
    97                             assert (s != mResolvedScopes.end());
    98                             assert (s->second);
    99                             auto v = M.find(s->second);
     119                            assert (scope->getBranch());
     120                            auto v = M.find(scope->getBranch());
    100121                            assert (v != M.end());
    101122                            if (v->second != u) {
     
    109130        }
    110131    }
    111     // Do a second pass to ensure that we've accounted for any nested usage of a statement
     132    // Do a second pass to ensure that we've accounted for any nested usage of an If or While statement
    112133    for (Statement * stmt : *block) {
    113134        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     
    123144        }
    124145    }
    125 
     146    // Contract the graph
     147    for (;;) {
     148        bool done = true;
     149        for (const Vertex u : make_iterator_range(vertices(G))) {
     150            if (out_degree(u, G) == 1) {
     151                const Vertex v = target(*(out_edges(u, G).first), G);
     152                G[v].insert(G[v].begin(), G[u].begin(), G[u].end());
     153                for (auto e : make_iterator_range(in_edges(u, G))) {
     154                    add_edge(source(e, G), v, G);
     155                }
     156                G[u].clear();
     157                clear_vertex(u, G);
     158                done = false;
     159            }
     160        }
     161        if (done) {
     162            break;
     163        }
     164    }
     165
     166
     167}
     168
     169/** ------------------------------------------------------------------------------------------------------------- *
     170 * @brief computeGraphOrdering
     171 ** ------------------------------------------------------------------------------------------------------------- */
     172std::vector<weight_t> computeGraphOrdering(const DependencyGraph & G) {
    126173    // Determine the bottom-up ordering of G
    127     std::vector<unsigned> ordering(num_vertices(G));
     174    std::vector<weight_t> ordering(num_vertices(G));
    128175    circular_buffer<Vertex> Q(num_vertices(G));
    129176    for (const Vertex u : make_iterator_range(vertices(G))) {
    130         if (out_degree(u, G) == 0) {
     177        if (out_degree(u, G) == 0 && G[u].size() > 0) {
    131178            ordering[u] = 1;
    132179            Q.push_back(u);
    133180        }
    134181    }
    135 
    136182    while (!Q.empty()) {
    137183        const Vertex u = Q.front();
     
    141187            const Vertex v = source(ei, G);
    142188            if (ordering[v] == 0) {
    143                 unsigned weight = 0;
     189                weight_t weight = 0;
    144190                bool ready = true;
    145191                for (const auto ej : make_iterator_range(out_edges(v, G))) {
    146192                    const Vertex w = target(ej, G);
    147                     const unsigned t = ordering[w];
     193                    const weight_t t = ordering[w];
    148194                    if (t == 0) {
    149195                        ready = false;
     
    155201                    assert (weight < std::numeric_limits<unsigned>::max());
    156202                    assert (weight > 0);
    157                     ordering[v] = (weight + 1);
     203                    ordering[v] = weight + 1;
    158204                    Q.push_back(v);
    159205                }
     
    161207        }
    162208    }
     209    return ordering;
     210}
     211
     212/** ------------------------------------------------------------------------------------------------------------- *
     213 * @brief schedule
     214 ** ------------------------------------------------------------------------------------------------------------- */
     215void schedule(PabloBlock * const block) {
     216    DependencyGraph G;
     217    computeDependencyGraph(G, block);
     218    std::vector<weight_t> ordering = computeGraphOrdering(G);
     219
     220//    raw_os_ostream out(std::cerr);
     221//    printGraph(G, ordering, "G", out);
    163222
    164223    // Compute the initial ready set
    165224    ReadySet readySet;
    166225    for (const Vertex u : make_iterator_range(vertices(G))) {
    167         if (in_degree(u, G) == 0) {
     226        if (in_degree(u, G) == 0 && G[u].size() > 0) {
    168227            readySet.emplace_back(ordering[u], u);
    169228        }
     
    180239    block->setInsertPoint(nullptr);
    181240
    182 
    183     // Rewrite the AST using the bottom-up ordering
     241    // Rewrite the AST
    184242    while (!readySet.empty()) {
    185 
    186         // Scan through the ready set to determine which one 'kills' the greatest number of inputs
    187         // NOTE: the readySet is kept in sorted "distance to sink" order; thus those closest to a
    188         // sink will be evaluated first.
    189         double best = 0;
     243        DependencyGraph::degree_size_type killed = 0;
    190244        auto chosen = readySet.begin();
    191245        for (auto ri = readySet.begin(); ri != readySet.end(); ++ri) {
    192             const Vertex u = std::get<1>(*ri);
    193             const PabloAST * const expr = G[u];
    194             if (expr && (isa<Assign>(expr) || isa<Next>(expr))) {
     246            DependencyGraph::degree_size_type kills = 0;
     247            for (auto e : make_iterator_range(in_edges(ri->first, G))) {
     248                if (out_degree(source(e, G), G) == 1) {
     249                    ++kills;
     250                }
     251            }
     252            if (kills > killed) {
    195253                chosen = ri;
    196                 break;
    197             }
    198             double progress = 0;
    199             for (auto ei : make_iterator_range(in_edges(u, G))) {
    200                 const auto v = source(ei, G);
    201                 const auto totalUsesOfIthOperand = out_degree(v, G);
    202                 if (LLVM_UNLIKELY(totalUsesOfIthOperand == 0)) {
    203                     progress += 1.0;
    204                 } else {
    205                     unsigned unscheduledUsesOfIthOperand = 0;
    206                     for (auto ej : make_iterator_range(out_edges(v, G))) {
    207                         if (ordering[target(ej, G)]) { // if this edge leads to an unscheduled statement
    208                             ++unscheduledUsesOfIthOperand;
    209                         }
    210                     }
    211                     progress += std::pow((double)(totalUsesOfIthOperand - unscheduledUsesOfIthOperand + 1) / (double)(totalUsesOfIthOperand), 2);
    212                 }
    213             }
    214             if (progress > best) {
    215                 chosen = ri;
    216                 best = progress;
     254                killed = kills;
    217255            }
    218256        }
     
    221259        std::tie(weight, u) = *chosen;
    222260        readySet.erase(chosen);
     261        clear_in_edges(u, G);
    223262
    224263        assert ("Error: SchedulingPrePass is attempting to reschedule a statement!" && (weight > 0));
    225264
    226265        // insert the statement then mark it as written ...
    227         block->insert(cast<Statement>(G[u]));
     266        for (PabloAST * expr : G[u]) {
     267            block->insert(cast<Statement>(expr));
     268        }
     269
    228270        ordering[u] = 0;
    229271        // Now check whether any new instructions are ready
     
    245287        }
    246288    }
     289
    247290}
    248291
     
    250293 * @brief schedule
    251294 ** ------------------------------------------------------------------------------------------------------------- */
    252 void SchedulingPrePass::schedule(PabloFunction & function) {
    253     mResolvedScopes.emplace(function.getEntryBlock(), nullptr);
     295void schedule(PabloFunction & function) {
    254296    schedule(function.getEntryBlock());
    255297}
     
    259301 ** ------------------------------------------------------------------------------------------------------------- */
    260302bool SchedulingPrePass::optimize(PabloFunction & function) {
    261     SchedulingPrePass pp;
    262     pp.schedule(function);
     303    schedule(function);
    263304    #ifndef NDEBUG
    264305    PabloVerifier::verify(function, "post-scheduling-prepass");
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/schedulingprepass.h

    r4896 r4919  
    11#ifndef SCHEDULINGPREPASS_H
    22#define SCHEDULINGPREPASS_H
    3 
    4 #include <boost/container/flat_map.hpp>
    5 #include <boost/graph/adjacency_list.hpp>
    6 #include <unordered_map>
    73
    84namespace pablo {
    95
    106class PabloFunction;
    11 class PabloBlock;
    12 class Statement;
    13 class PabloAST;
    147
    158class SchedulingPrePass {
    169public:
    17     using Graph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, PabloAST *>;
    18     using Vertex = Graph::vertex_descriptor;
    19     using Map = std::unordered_map<const PabloAST *, Vertex>;
    20     using ScopeMap = boost::container::flat_map<const PabloBlock *, Statement *>;
    21     using ReadyPair = std::pair<unsigned, Vertex>;
    22     using ReadySet = std::vector<ReadyPair>;
    23 public:
    2410    static bool optimize(PabloFunction & function);
    2511protected:
    26     void schedule(PabloFunction & function);
    27     void schedule(PabloBlock * const block);
    28     void resolveNestedUsages(Statement * const root, Statement * const stmt, Graph & G, Map & M, PabloBlock * const block);
    2912    SchedulingPrePass() = default;
    30 private:
    31     ScopeMap mResolvedScopes;
    3213};
    33 
    3414
    3515}
    3616
    37 
    3817#endif // SCHEDULINGPREPASS_H
Note: See TracChangeset for help on using the changeset viewer.