Ignore:
Timestamp:
Jul 8, 2015, 2:59:03 PM (4 years ago)
Author:
nmedfort
Message:

Partial roll back of Trie structure. Seemed to introduce the potential of generating a cycle in the AST. More analysis is needed here before mixing together multiple Advance DAG traversals.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4648 r4650  
    11#include "pablo_automultiplexing.hpp"
    2 #include <pablo/codegenstate.h>
     2#include <include/simd-lib/builtins.hpp>
     3#include <pablo/builder.hpp>
     4#include <pablo/printer_pablos.h>
     5#include <boost/container/flat_set.hpp>
     6#include <boost/numeric/ublas/matrix.hpp>
     7#include <boost/circular_buffer.hpp>
     8#include <boost/range/iterator_range.hpp>
    39#include <llvm/ADT/BitVector.h>
     10#include <cudd.h>
     11#include <util.h>
    412#include <stack>
    513#include <queue>
    614#include <unordered_set>
    7 #include <boost/container/flat_set.hpp>
    8 #include <boost/numeric/ublas/matrix.hpp>
    9 #include <boost/circular_buffer.hpp>
    10 #include <include/simd-lib/builtins.hpp>
    11 #include <pablo/builder.hpp>
    12 #include <boost/range/iterator_range.hpp>
    13 #include <pablo/printer_pablos.h>
    14 #include <cudd.h>
    15 #include <util.h>
    1615
    1716using namespace llvm;
     
    2019using namespace boost::numeric::ublas;
    2120
    22 #define PRINT_DEBUG_OUTPUT
     21// #define PRINT_DEBUG_OUTPUT
    2322
    2423#if !defined(NDEBUG) && !defined(PRINT_DEBUG_OUTPUT)
     
    101100bool AutoMultiplexing::optimize(const std::vector<Var *> & input, PabloBlock & entry) {
    102101
    103     // std::random_device rd;
    104     const auto seed = 2938639837; // rd();
     102    std::random_device rd;
     103    const auto seed = rd();
    105104    RNG rng(seed);
    106105
     
    123122
    124123    RECORD_TIMESTAMP(start_create_multiplex_graph);
    125     const bool multiplex = am.generateMultiplexSets(rng, 1);
     124    const bool multiplex = am.generateCandidateSets(rng);
    126125    RECORD_TIMESTAMP(end_create_multiplex_graph);
    127126    LOG("GenerateMultiplexSets:   " << (end_create_multiplex_graph - start_create_multiplex_graph));
     
    145144
    146145        RECORD_TIMESTAMP(start_topological_sort);
    147         // am.topologicalSort(entry);
     146        am.topologicalSort(entry);
    148147        RECORD_TIMESTAMP(end_topological_sort);
    149148        LOG("TopologicalSort:         " << (end_topological_sort - start_topological_sort));
     
    616615
    617616inline void AutoMultiplexing::Shutdown() {
    618     #ifdef PRINT_DEBUG_OUTPUT
    619     Cudd_PrintInfo(mManager, stderr);
    620     #endif
     617//    #ifdef PRINT_DEBUG_OUTPUT
     618//    Cudd_PrintInfo(mManager, stderr);
     619//    #endif
    621620    Cudd_Quit(mManager);
    622621}
     
    626625 * @param RNG random number generator
    627626 ** ------------------------------------------------------------------------------------------------------------- */
    628 bool AutoMultiplexing::generateMultiplexSets(RNG & rng, unsigned k) {
     627bool AutoMultiplexing::generateCandidateSets(RNG & rng) {
    629628
    630629    using degree_t = graph_traits<ConstraintGraph>::degree_size_type;
     
    634633    // have the same problem here but decomposed into subgraphs.
    635634
    636     VertexVector M;
     635    VertexVector S;
    637636    std::vector<degree_t> D(num_vertices(mConstraintGraph));
    638     M.reserve(15);
    639 
    640     Trie T;
    641     std::vector<ConstraintVertex> roots;
    642 
    643     while (k) {
    644 
    645         // Push all source nodes into the new independent set N
    646         for (auto v : make_iterator_range(vertices(mConstraintGraph))) {
    647             const auto d = in_degree(v, mConstraintGraph);
    648             D[v] = d;
    649             if (d == 0) {
    650                 M.push_back(v);
    651             }
    652         }
    653 
    654         std::sort(M.begin(), M.end());
    655 
    656         auto remainingVerticies = num_vertices(mConstraintGraph) - M.size();
    657 
    658         for (;;) {
    659 
    660             addCandidateSet(M, T, roots);
    661 
    662             if (remainingVerticies == 0) {
    663                 break;
    664             }
    665 
    666             auto entries = M.size();
    667 
    668             do {
    669                 // Randomly choose a vertex in S and discard it.
    670                 assert (!M.empty());
    671                 const auto i = M.begin() + IntDistribution(0, entries - 1)(rng);
    672                 const ConstraintVertex u = *i;
    673                 M.erase(i);
    674                 --remainingVerticies;
    675                 --entries;
    676                 for (auto e : make_iterator_range(out_edges(u, mConstraintGraph))) {
    677                     const ConstraintVertex v = target(e, mConstraintGraph);
    678                     if ((--D[v]) == 0) {
    679                         M.push_back(v);
    680                     }
    681                 }
    682             }
    683             while ((M.size() == entries) && remainingVerticies != 0);
    684             // sort our new entries then merge the sorted lists
    685             std::sort(M.begin() + entries, M.end());
    686             std::inplace_merge(M.begin(), M.begin() + entries, M.end());
    687             assert (std::is_sorted(M.begin(), M.end()));
    688         }
    689         M.clear();
    690 
    691         std::cerr << "T" << k << "=" << num_vertices(T) << std::endl;
    692 
    693         if (--k == 0) {
    694             break;
    695         }       
    696     }
    697 
    698     // At this point T is a forest in which the set of any vertices from a source to a sink represents a
    699     // unique candidate set. Add all of them to the multiplex set graph.
    700 
    701     if (num_vertices(T) == 0) {
    702         return false;
    703     }
     637    S.reserve(15);
    704638
    705639    mMultiplexSetGraph = MultiplexSetGraph(num_vertices(mConstraintGraph));
    706     for (auto t : roots) {
    707         writeCandidateSet(t, T, M);
    708     }
    709     return true;
     640
     641    // Push all source nodes into the new independent set N
     642    for (auto v : make_iterator_range(vertices(mConstraintGraph))) {
     643        const auto d = in_degree(v, mConstraintGraph);
     644        D[v] = d;
     645        if (d == 0) {
     646            S.push_back(v);
     647        }
     648    }
     649
     650    auto remainingVerticies = num_vertices(mConstraintGraph) - S.size();
     651
     652    do {
     653
     654        addCandidateSet(S);
     655
     656        bool noNewElements = true;
     657        do {
     658            // Randomly choose a vertex in S and discard it.
     659            const auto i = S.begin() + IntDistribution(0, S.size() - 1)(rng);
     660            const ConstraintVertex u = *i;
     661            S.erase(i);
     662            --remainingVerticies;
     663
     664            for (auto e : make_iterator_range(out_edges(u, mConstraintGraph))) {
     665                const ConstraintVertex v = target(e, mConstraintGraph);
     666                if ((--D[v]) == 0) {
     667                    S.push_back(v);
     668                    noNewElements = false;
     669                }
     670            }
     671        }
     672        while (noNewElements && remainingVerticies);
     673    }
     674    while (remainingVerticies);
     675
     676    return num_vertices(mMultiplexSetGraph) > num_vertices(mConstraintGraph);
    710677}
    711678
    712679/** ------------------------------------------------------------------------------------------------------------- *
    713680 * @brief addCandidateSet
    714  * @param M an independent set
     681 * @param S an independent set
    715682 * @param T the trie in which to encode this new set into
    716683 * @param roots the roots (source nodes) for each tree in T
    717684 ** ------------------------------------------------------------------------------------------------------------- */
    718 inline void AutoMultiplexing::addCandidateSet(const VertexVector & M, Trie & T, VertexVector & roots) const {
    719     if (M.size() >= 3) {
    720         auto mi = M.cbegin();
    721         ConstraintVertex u = 0;
    722         for (ConstraintVertex s : roots) {
    723             if (T[s] == *mi) {
    724                 u = s;
    725                 while (++mi != M.cend()) {
    726                     bool not_found = true;
    727                     for (const auto e : make_iterator_range(out_edges(u, T))) {
    728                         const auto v = target(e, T);
    729                         if (T[v] == *mi) {
    730                             u = v;
    731                             not_found = false;
    732                             break;
    733                         }
    734                     }
    735                     if (not_found) {
    736                         break;
    737                     }
    738                 }
    739                 break;
    740             }
    741         }
    742         if (mi == M.cbegin()) {
    743             u = add_vertex(*mi++, T);
    744             roots.push_back(u);
    745         }
    746         for (; mi != M.cend(); ++mi) {
    747             const auto v = add_vertex(*mi, T);
    748             add_edge(u, v, T);
    749             u = v;
    750         }
    751     }
    752 }
    753 
    754 /** ------------------------------------------------------------------------------------------------------------- *
    755  * @brief writeCandidateSet
    756  * @param M an independent set
    757  * @param T the trie in which to encode this new set into
    758  * @param roots the roots (source nodes) for each tree in T
    759  ** ------------------------------------------------------------------------------------------------------------- */
    760 void AutoMultiplexing::writeCandidateSet(const Trie::vertex_descriptor t, const Trie & T, VertexVector & S) {
    761     S.push_back(T[t]);
    762     if (out_degree(t, T) == 0) {
     685inline void AutoMultiplexing::addCandidateSet(const VertexVector & S) {
     686    if (S.size() >= 3) {
    763687        const auto u = add_vertex(mMultiplexSetGraph);
    764688        for (const auto v : S) {
     
    766690        }
    767691    }
    768     else {
    769         for (const auto e : make_iterator_range(out_edges(t, T))) {
    770             writeCandidateSet(target(e, T), T, S);
    771         }
    772     }
    773     assert (S.back() == T[t]);
    774     S.pop_back();
    775 }
    776 
     692}
    777693
    778694/** ------------------------------------------------------------------------------------------------------------- *
     
    797713 * This algorithm is simply computes a greedy set cover. We want an exact max-weight set cover but can generate new
    798714 * sets by taking a subset of any existing set. With a few modifications, the greedy approach seems to work well
    799  * enough but more analysis is needed.
     715 * enough but can be trivially shown to produce a suboptimal solution if there are three (or more) sets in which
     716 * two, labelled A and B, are disjoint and the third larger set, C, that consists of elements of A and B.
    800717 ** ------------------------------------------------------------------------------------------------------------- */
    801718void AutoMultiplexing::selectMultiplexSets(RNG &) {
     
    10941011            }
    10951012
    1096 
    10971013            /// Perform m-to-n Demultiplexing
    10981014            // Now construct the demuxed values and replaces all the users of the original advances with them.
     
    11481064 ** ------------------------------------------------------------------------------------------------------------- */
    11491065void AutoMultiplexing::topologicalSort(PabloBlock & entry) const {
    1150     // Note: not a real topological sort. I expect the original order to be very close to the resulting one.
     1066    // Note: not a real topological sort. I expect the original graph to be close to the resulting one. Thus cost
     1067    // of constructing a graph in which to perform the sort takes longer than potentially moving an instruction
     1068    // multiple times.
    11511069    std::unordered_set<const PabloAST *> encountered;
    11521070    std::stack<Statement *> scope;
Note: See TracChangeset for help on using the changeset viewer.