 Timestamp:
 Feb 21, 2016, 1:05:57 PM (4 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp
r4927 r4937 16 16 #include <unordered_set> 17 17 #include <bdd.h> 18 #include <functional> 19 20 #include <llvm/Support/CommandLine.h> 18 21 19 22 using namespace llvm; … … 22 25 using namespace boost::numeric::ublas; 23 26 24 /// Interesting test case: ./icgrep '[\p{Lm}\p{Meetei_Mayek}]' disableifhierarchystrategy 27 /// Interesting test cases: 28 /// ./icgrep c multiplexing '[\p{Lm}\p{Meetei_Mayek}]' disableifhierarchystrategy 29 30 /// ./icgrep c multiplexing '\p{Imperial_Aramaic}(?<!\p{Sm})' disableifhierarchystrategy 31 32 33 static cl::OptionCategory MultiplexingOptions("Multiplexing Optimization Options", "These options control the Pablo Multiplexing optimization pass."); 34 35 #ifdef NDEBUG 36 #define INITIAL_SEED_VALUE (std::random_device()()) 37 #else 38 #define INITIAL_SEED_VALUE (83234827342) 39 #endif 40 41 static cl::opt<std::mt19937::result_type> Seed("multiplexingseed", cl::init(INITIAL_SEED_VALUE), 42 cl::desc("randomization seed used when performing any nondeterministic operations."), 43 cl::cat(MultiplexingOptions)); 44 45 #undef INITIAL_SEED_VALUE 46 47 static cl::opt<unsigned> SetLimit("multiplexingsetlimit", cl::init(std::numeric_limits<unsigned>::max()), 48 cl::desc("maximum size of any candidate set."), 49 cl::cat(MultiplexingOptions)); 50 51 static cl::opt<unsigned> SelectionLimit("multiplexingselectionlimit", cl::init(100), 52 cl::desc("maximum number of selections from any partial candidate set."), 53 cl::cat(MultiplexingOptions)); 54 55 static cl::opt<unsigned> WindowSize("multiplexingwindowsize", cl::init(1), 56 cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."), 57 cl::cat(MultiplexingOptions)); 58 59 static cl::opt<unsigned> Samples("multiplexingsamples", cl::init(1), 60 cl::desc("number of times the Advance constraint graph is sampled to find multiplexing opportunities."), 61 cl::cat(MultiplexingOptions)); 62 63 64 enum SelectionStrategy {Greedy, WorkingSet}; 65 66 static cl::opt<SelectionStrategy> Strategy(cl::desc("Choose set selection strategy:"), 67 cl::values( 68 clEnumVal(Greedy, "choose the largest multiplexing sets possible (w.r.t. the multiplexingsetlimit)."), 69 clEnumVal(WorkingSet, "choose multiplexing sets that share common input values."), 70 clEnumValEnd), 71 cl::init(Greedy), 72 cl::cat(MultiplexingOptions)); 25 73 26 74 // #define PRINT_DEBUG_OUTPUT … … 111 159 using TypeId = PabloAST::ClassTypeId; 112 160 113 bool MultiplexingPass::optimize(PabloFunction & function, const unsigned limit, const unsigned maxSelections, const unsigned windowSize, const bool independent) { 114 115 std::random_device rd; 116 const RNG::result_type seed = rd(); 117 // const RNG::result_type seed = 83234827342; 118 119 LOG("Seed: " << seed); 161 template<typename Graph> 162 static Graph construct(PabloBlock * const block); 163 164 template<typename Graph, typename Map> 165 static Graph construct(PabloBlock * const block, Map & M); 166 167 bool MultiplexingPass::optimize(PabloFunction & function, const bool independent) { 168 169 LOG("Seed: " << Seed); 120 170 121 171 #ifdef PRINT_TIMING_INFORMATION 122 MultiplexingPass::SEED = seed;172 MultiplexingPass::SEED = Seed; 123 173 MultiplexingPass::NODES_ALLOCATED = 0; 124 174 MultiplexingPass::NODES_USED = 0; 125 175 #endif 126 176 127 MultiplexingPass mp( seed, limit, maxSelections, windowSize);177 MultiplexingPass mp(Seed); 128 178 RECORD_TIMESTAMP(start_initialize); 129 179 const unsigned advances = mp.initialize(function, independent); … … 169 219 170 220 RECORD_TIMESTAMP(start_select_multiplex_sets); 171 mp.selectMultiplexSets(); 221 if (Strategy == SelectionStrategy::Greedy) { 222 mp.selectMultiplexSetsGreedy(); 223 } else if (Strategy == SelectionStrategy::WorkingSet) { 224 mp.selectMultiplexSetsWorkingSet(); 225 } 172 226 RECORD_TIMESTAMP(end_select_multiplex_sets); 173 227 LOG("SelectMultiplexSets: " << (end_select_multiplex_sets  start_select_multiplex_sets)); … … 256 310 bdd_setcacheratio(64); 257 311 bdd_setmaxincrease(10000000); 258 bdd_autoreorder(BDD_REORDER_SIFT); 312 bdd_autoreorder(BDD_REORDER_SIFT); // BDD_REORDER_SIFT 259 313 260 314 // Map the constants and input variables … … 364 418 const PabloBlock * advanceScope[advances]; 365 419 mAdvance.resize(advances, nullptr); 366 mAdvance Depth.resize(advances, 0);420 mAdvanceRank.resize(advances, 0); 367 421 mAdvanceNegatedVariable.reserve(advances); 368 422 for (Statement * stmt = entryBlock>front(); ; ) { … … 380 434 for (unsigned i = 0; i != k; ++i) { 381 435 if (edge(i, k, mConstraintGraph).second  (advanceScope[i] != advanceScope[k])) { 382 depth = std::max<int>(depth, mAdvance Depth[i]);436 depth = std::max<int>(depth, mAdvanceRank[i]); 383 437 } 384 438 } 385 mAdvance Depth[k++] = ++depth;439 mAdvanceRank[k++] = ++depth; 386 440 maxDepth = std::max(maxDepth, depth); 387 441 } … … 396 450 assert (k == advances); 397 451 398 LOG("Window Size / Max Depth: " << mWindowSize << " of " << maxDepth);452 LOG("Window Size / Max Depth: " << WindowSize << " of " << maxDepth); 399 453 } 400 454 … … 409 463 if (LLVM_UNLIKELY(isa<If>(stmt))) { 410 464 characterize(cast<If>(stmt)>getBody()); 465 for (const Assign * def : cast<If>(stmt)>getDefined()) { 466 if (LLVM_LIKELY(escapes(def))) { 467 bdd_addref(get(def)); 468 } 469 } 411 470 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 412 471 characterize(cast<While>(stmt)>getBody()); 413 472 for (const Next * var : cast<While>(stmt)>getVariants()) { 414 BDD & assign = get(var>getInitial()); 415 assign = bdd_addref(bdd_or(assign, get(var))); 473 if (LLVM_LIKELY(escapes(var))) { 474 BDD & initial = get(var>getInitial()); 475 BDD & escaped = get(var); 476 initial = bdd_addref(bdd_or(initial, escaped)); 477 escaped = initial; 478 } 416 479 } 417 480 } else { … … 548 611 for (unsigned i = 0; i != k; ++i) { 549 612 if (unconstrained[i]) { 550 // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results 551 // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by 552 // the conjunction of variable representing the kth Advance and the negation of all variables for the Advances whose 553 // inputs are mutually exclusive with the kth input. For example, if the input of the ith Advance is mutually exclusive 554 // with the input of the jth and kth Advance, the BDD of the ith Advance is Ai â§ Â¬Aj â§ Â¬Ak. 555 const Advance * const ithAdv = mAdvance[i]; 613 // This algorithm deems two streams mutually exclusive if and only if the conjuntion of their BDDs is a contradiction. 614 // To generate a contradiction when comparing Advances, the BDD of each Advance is represented by the conjunction of 615 // variables representing the kth Advance and the negation of all variables for the Advances whose inputs are mutually 616 // exclusive with the kth input. 617 618 // For example, if the input of the ith Advance is mutually exclusive with the input of the jth and kth Advance, the 619 // BDD of the ith Advance is Ai â§ Â¬Aj â§ Â¬Ak. Similarly, the j and kth Advance is Aj â§ Â¬Ai and Ak â§ Â¬Ai, respectively 620 // (assuming that the jth and kth Advance are not mutually exclusive.) 621 556 622 const BDD Ni = mAdvanceNegatedVariable[i]; 557 BDD & Ai = get( ithAdv);623 BDD & Ai = get(mAdvance[i]); 558 624 Ai = bdd_addref(bdd_and(Ai, Nk)); 559 625 Ak = bdd_addref(bdd_and(Ak, Ni)); 560 if (independent(i, k) && (adv>getParent() == ithAdv>getParent())) {626 if (independent(i, k) && (adv>getParent() == mAdvance[i]>getParent())) { 561 627 continue; 562 628 } … … 564 630 add_edge(i, k, mConstraintGraph); 565 631 } 566 // To minimize the number of BDD computations, store the negated variable instead of negating it each time.632 // To minimize the number of BDD computations, we store the negated variable instead of negating it each time. 567 633 mAdvanceNegatedVariable.emplace_back(Nk); 568 634 return Ak; … … 581 647 **  */ 582 648 inline bool MultiplexingPass::exceedsWindowSize(const ConstraintVertex i, const ConstraintVertex j) const { 583 assert (i < mAdvance Depth.size() && j < mAdvanceDepth.size());584 return (std::abs<int>(mAdvance Depth[i]  mAdvanceDepth[j]) > mWindowSize);649 assert (i < mAdvanceRank.size() && j < mAdvanceRank.size()); 650 return (std::abs<int>(mAdvanceRank[i]  mAdvanceRank[j]) > WindowSize); 585 651 } 586 652 … … 644 710 645 711 /**  * 646 * @brief generate MultiplexSets712 * @brief generateCandidateSets 647 713 **  */ 648 714 bool MultiplexingPass::generateCandidateSets() { 649 715 650 // What if we generated a "constraint free" graph instead? By taking each connected component of it 651 // and computing the complement of it (with the same lesser to greater index ordering), we should 652 // have the same problem here but decomposed into subgraphs. 653 654 VertexVector S; 655 std::vector<ConstraintGraph::degree_size_type> D(num_vertices(mConstraintGraph)); 656 S.reserve(15); 657 658 mMultiplexSetGraph = MultiplexSetGraph(num_vertices(mConstraintGraph)); 659 660 // Push all source nodes into the (initial) independent set S 661 for (auto v : make_iterator_range(vertices(mConstraintGraph))) { 662 const auto d = in_degree(v, mConstraintGraph); 663 D[v] = d; 664 if (d == 0) { 665 S.push_back(v); 666 } 667 } 668 669 assert (S.size() > 0); 670 671 auto remainingVerticies = num_vertices(mConstraintGraph)  S.size(); 672 673 do { 674 675 addCandidateSet(S); 676 677 bool noNewElements = true; 678 do { 716 Constraints S; 717 718 ConstraintGraph::degree_size_type D[num_vertices(mConstraintGraph)]; 719 720 mCandidateGraph = CandidateGraph(num_vertices(mConstraintGraph)); 721 722 for (unsigned iteration = 0; iteration < Samples; ++iteration) { 723 724 // Push all source nodes into the (initial) independent set S 725 for (const auto v : make_iterator_range(vertices(mConstraintGraph))) { 726 const auto d = in_degree(v, mConstraintGraph); 727 D[v] = d; 728 if (d == 0) { 729 S.push_back(v); 730 } 731 } 732 733 auto remaining = num_vertices(mConstraintGraph)  S.size(); 734 735 for (;;) { 679 736 assert (S.size() > 0); 680 // Randomly choose a vertex in S and discard it. 681 const auto i = S.begin() + IntDistribution(0, S.size()  1)(mRNG); 682 assert (i != S.end()); 683 const ConstraintVertex u = *i; 684 S.erase(i); 685 686 for (auto e : make_iterator_range(out_edges(u, mConstraintGraph))) { 687 const ConstraintVertex v = target(e, mConstraintGraph); 688 if ((D[v]) == 0) { 689 S.push_back(v); 690 remainingVerticies; 691 noNewElements = false; 692 } 693 } 694 } 695 while (noNewElements && remainingVerticies); 696 } 697 while (remainingVerticies); 698 699 return num_vertices(mMultiplexSetGraph) > num_vertices(mConstraintGraph); 737 addCandidateSet(S); 738 if (LLVM_UNLIKELY(remaining == 0)) { 739 break; 740 } 741 for (;;) { 742 assert (S.size() > 0); 743 // Randomly choose a vertex in S and discard it. 744 const auto i = S.begin() + IntDistribution(0, S.size()  1)(mRNG); 745 assert (i != S.end()); 746 const auto u = *i; 747 S.erase(i); 748 bool checkCandidate = false; 749 for (auto e : make_iterator_range(out_edges(u, mConstraintGraph))) { 750 const auto v = target(e, mConstraintGraph); 751 assert ("Constraint set degree subtraction error!" && (D[v] != 0)); 752 if ((D[v]) == 0) { 753 assert ("Error v is already in S!" && std::count(S.begin(), S.end(), v) == 0); 754 S.push_back(v); 755 assert (remaining != 0); 756 remaining; 757 if (LLVM_LIKELY(S.size() >= 3)) { 758 checkCandidate = true; 759 } 760 } 761 } 762 if (checkCandidate  LLVM_UNLIKELY(remaining == 0)) { 763 break; 764 } 765 } 766 } 767 768 S.clear(); 769 } 770 771 return num_vertices(mCandidateGraph) > num_vertices(mConstraintGraph); 700 772 } 701 773 … … 727 799 * James McCaffrey's algorithm for "Generating the mth Lexicographical Element of a Mathematical Combination" 728 800 **  */ 729 void select(const unsigned n, const unsigned k, const unsigned m, unsigned *element) {801 void MultiplexingPass::selectCandidateSet(const unsigned n, const unsigned k, const unsigned m, const Constraints & S, ConstraintVertex * const element) { 730 802 unsigned long a = n; 731 803 unsigned long b = k; … … 736 808 x = x  y; 737 809 b = b  1; 738 element[i] = (n  1)  a; 739 } 810 element[i] = S[(n  1)  a]; 811 } 812 } 813 814 /**  * 815 * @brief updateCandidateSet 816 **  */ 817 void MultiplexingPass::updateCandidateSet(ConstraintVertex * const begin, ConstraintVertex * const end) { 818 819 using Vertex = CandidateGraph::vertex_descriptor; 820 821 const auto n = num_vertices(mConstraintGraph); 822 const auto m = num_vertices(mCandidateGraph); 823 const auto d = end  begin; 824 825 std::sort(begin, end); 826 827 Vertex u = 0; 828 829 for (Vertex i = n; i != m; ++i) { 830 831 if (LLVM_UNLIKELY(degree(i, mCandidateGraph) == 0)) { 832 u = i; 833 continue; 834 } 835 836 const auto adj = adjacent_vertices(i, mCandidateGraph); 837 if (degree(i, mCandidateGraph) < d) { 838 // set_i can only be a subset of the new set 839 if (LLVM_UNLIKELY(std::includes(begin, end, adj.first, adj.second))) { 840 clear_vertex(i, mCandidateGraph); 841 u = i; 842 } 843 } else if (LLVM_UNLIKELY(std::includes(adj.first, adj.second, begin, end))) { 844 // the new set is a subset of set_i; discard it. 845 return; 846 } 847 848 } 849 850 if (LLVM_LIKELY(u == 0)) { // n must be at least 3 so u is 0 if and only if we're not reusing a set vertex. 851 u = add_vertex(mCandidateGraph); 852 } 853 854 for (ConstraintVertex * i = begin; i != end; ++i) { 855 add_edge(u, *i, mCandidateGraph); 856 } 857 740 858 } 741 859 … … 744 862 * @param S an independent set 745 863 **  */ 746 inline void MultiplexingPass::addCandidateSet(const VertexVector& S) {864 inline void MultiplexingPass::addCandidateSet(const Constraints & S) { 747 865 if (S.size() >= 3) { 748 if (S.size() <= mMultiplexingSetSizeLimit) {749 const auto u = add_vertex(mMultiplexSetGraph);750 for (const auto v : S) {751 add_edge(u, v, mMultiplexSetGraph);752 }866 const unsigned setLimit = SetLimit; 867 if (S.size() <= setLimit) { 868 ConstraintVertex E[S.size()]; 869 std::copy(S.cbegin(), S.cend(), E); 870 updateCandidateSet(E, E + S.size()); 753 871 } else { 754 const auto max = choose(S.size(), mMultiplexingSetSizeLimit); 755 unsigned element[mMultiplexingSetSizeLimit]; 756 if (LLVM_UNLIKELY(max <= mMaxMultiplexingSetSelections)) { 872 assert (setLimit > 0); 873 ConstraintVertex E[setLimit]; 874 const auto max = choose(S.size(), setLimit); 875 if (LLVM_UNLIKELY(max <= SelectionLimit)) { 757 876 for (unsigned i = 0; i != max; ++i) { 758 select(S.size(), mMultiplexingSetSizeLimit, i, element); 759 const auto u = add_vertex(mMultiplexSetGraph); 760 for (unsigned j = 0; j != mMultiplexingSetSizeLimit; ++j) { 761 add_edge(u, S[element[j]], mMultiplexSetGraph); 762 } 877 selectCandidateSet(S.size(), setLimit, i, S, E); 878 updateCandidateSet(E, E + setLimit); 763 879 } 764 880 } else { // take m random samples 765 for (unsigned i = 0; i != mMaxMultiplexingSetSelections; ++i) { 766 select(S.size(), mMultiplexingSetSizeLimit, mRNG() % max, element); 767 const auto u = add_vertex(mMultiplexSetGraph); 768 for (unsigned j = 0; j != mMultiplexingSetSizeLimit; ++j) { 769 add_edge(u, S[element[j]], mMultiplexSetGraph); 770 } 881 for (unsigned i = 0; i != SelectionLimit; ++i) { 882 selectCandidateSet(S.size(), setLimit, mRNG() % max, S, E); 883 updateCandidateSet(E, E + setLimit); 771 884 } 772 885 } … … 783 896 784 897 /**  * 785 * @brief selectMultiplexSets 898 * @brief selectMultiplexSetsGreedy 786 899 * 787 900 * This algorithm is simply computes a greedy set cover. We want an exact maxweight set cover but can generate new … … 791 904 , A â€ B < C, and C â (A âª B). 792 905 **  */ 793 void MultiplexingPass::selectMultiplexSets() { 794 795 using SetIterator = graph_traits<MultiplexSetGraph>::in_edge_iterator; 796 using ElementIterator = graph_traits<MultiplexSetGraph>::out_edge_iterator; 797 using degree_t = MultiplexSetGraph::degree_size_type; 798 using vertex_t = MultiplexSetGraph::vertex_descriptor; 906 void MultiplexingPass::selectMultiplexSetsGreedy() { 907 908 using AdjIterator = graph_traits<CandidateGraph>::adjacency_iterator; 909 using degree_t = CandidateGraph::degree_size_type; 910 using vertex_t = CandidateGraph::vertex_descriptor; 799 911 800 912 const size_t m = num_vertices(mConstraintGraph); 801 const size_t n = num_vertices(m MultiplexSetGraph)  m;913 const size_t n = num_vertices(mCandidateGraph)  m; 802 914 803 915 degree_t remaining[n]; … … 805 917 806 918 for (unsigned i = 0; i != n; ++i) { 807 remaining[i] = out_degree(i + m, mMultiplexSetGraph);919 remaining[i] = degree(i + m, mCandidateGraph); 808 920 } 809 921 for (unsigned i = 0; i != m; ++i) { … … 820 932 if (r > 2) { // if this set has at least 3 elements. 821 933 r *= r; 822 ElementIterator begin, end;823 std::tie(begin, end) = out_edges(i + m, mMultiplexSetGraph);934 AdjIterator begin, end; 935 std::tie(begin, end) = adjacent_vertices(i + m, mCandidateGraph); 824 936 for (auto ei = begin; ei != end; ++ei) { 825 937 for (auto ej = ei; ++ej != end; ) { 826 if (edge( target(*ei, mMultiplexSetGraph), target(*ej, mMultiplexSetGraph), mUsageGraph).second) {938 if (edge(*ei, *ej, mUsageGraph).second) { 827 939 ++r; 828 940 } … … 841 953 } 842 954 843 for (const auto ei : make_iterator_range(out_edges(k + m, mMultiplexSetGraph))) {844 const vertex_t j = target(ei, mMultiplexSetGraph);845 if (chosen_set[j] == 0) {846 chosen_set[j] = (k + m);847 for (const auto ej : make_iterator_range(in_edges(j, mMultiplexSetGraph))) {848 remaining[ source(ej, mMultiplexSetGraph) m];955 for (const auto u : make_iterator_range(adjacent_vertices(k + m, mCandidateGraph))) { 956 if (chosen_set[u] == 0) { 957 chosen_set[u] = (k + m); 958 for (const auto v : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) { 959 assert (v >= m); 960 remaining[v  m]; 849 961 } 850 962 } … … 861 973 if (chosen_set[i] == (k + m)) { 862 974 degree_t r = 1; 863 for (const auto ej : make_iterator_range(in_edges(i, mMultiplexSetGraph))) {975 for (const auto u : make_iterator_range(adjacent_vertices(i, mCandidateGraph))) { 864 976 // strongly prefer adding weight to unvisited sets that have more remaining vertices 865 r += std::pow(remaining[ source(ej, mMultiplexSetGraph) m], 2);977 r += std::pow(remaining[u  m], 2); 866 978 } 867 979 if (w < r) { … … 873 985 assert (w > 0); 874 986 chosen_set[j] = 0; 875 for (const auto ej : make_iterator_range(in_edges(j, mMultiplexSetGraph))) { 876 remaining[source(ej, mMultiplexSetGraph)  m]++; 877 } 878 } 987 for (const auto u : make_iterator_range(adjacent_vertices(j, mCandidateGraph))) { 988 assert (u >= m); 989 remaining[u  m]++; 990 } 991 } 992 993 // If Samples > 1 then our candidate sets were generated by more than one traversal through the constraint graph. 994 // Sets generated by differing traversals may generate a cycle in the AST if multiplex even when they are not 995 // multiplexed together. For example, 996 997 // Assume we're multiplexing set {A,B,C} and {D,E,F} and that no constraint exists between any nodes in 998 // either set. If A is dependent on D and E is dependent on B, multiplexing both sets would result in a cycle 999 // in the AST. To fix this, we'd have to remove A, D, B or E. 1000 1001 // This cannot occur with only one traversal (or between sets generated by the same traversal) because of the 1002 // DAG traversal strategy used in "generateCandidateSets". 1003 1004 879 1005 } 880 1006 881 1007 for (unsigned i = 0; i != m; ++i) { 882 SetIterator ei, ei_end;883 std::tie(ei, ei_end) = in_edges(i, mMultiplexSetGraph);1008 AdjIterator ei, ei_end; 1009 std::tie(ei, ei_end) = adjacent_vertices(i, mCandidateGraph); 884 1010 for (auto next = ei; ei != ei_end; ei = next) { 885 1011 ++next; 886 if ( source(*ei, mMultiplexSetGraph)!= chosen_set[i]) {887 remove_edge( *ei, mMultiplexSetGraph);1012 if (*ei != chosen_set[i]) { 1013 remove_edge(i, *ei, mCandidateGraph); 888 1014 } 889 1015 } … … 892 1018 #ifndef NDEBUG 893 1019 for (unsigned i = 0; i != m; ++i) { 894 assert ( in_degree(i, mMultiplexSetGraph) <= 1);1020 assert (degree(i, mCandidateGraph) <= 1); 895 1021 } 896 1022 for (unsigned i = m; i != (m + n); ++i) { 897 assert ( out_degree(i, mMultiplexSetGraph) == 0  out_degree(i, mMultiplexSetGraph) >= 3);1023 assert (degree(i, mCandidateGraph) == 0  degree(i, mCandidateGraph) >= 3); 898 1024 } 899 1025 #endif … … 901 1027 902 1028 /**  * 1029 * @brief selectMultiplexSetsWorkingSet 1030 **  */ 1031 void MultiplexingPass::selectMultiplexSetsWorkingSet() { 1032 1033 // The inputs to each Advance must be different; otherwise the SimplificationPass would consider all but 1034 // one of the Advances redundant. However, if the input is short lived, we can ignore it in favour of its 1035 // operands, which *may* be shared amongst more than one of the Advances (or may be short lived themselves, 1036 // in which we can consider their operands instead.) Ideally, if we can keep the set of live values small, 1037 // we may be able to reduce register pressure. 1038 1039 // using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, Statement *, unsigned>; 1040 // using Map = flat_map<const Statement *, typename Graph::vertex_descriptor>; 1041 1042 // const size_t m = num_vertices(mConstraintGraph); 1043 // const size_t n = num_vertices(mMultiplexSetGraph)  m; 1044 1045 // for (unsigned i = 0; i != n; ++i) { 1046 1047 // Map M; 1048 // Graph G = construct<Graph>(block, M); 1049 1050 1051 1052 1053 1054 1055 1056 // } 1057 1058 1059 1060 1061 1062 1063 } 1064 1065 /**  * 903 1066 * @brief eliminateSubsetConstraints 904 1067 **  */ 905 1068 void MultiplexingPass::eliminateSubsetConstraints() { 1069 using SubsetEdgeIterator = graph_traits<SubsetGraph>::edge_iterator; 906 1070 // If Ai â Aj then the subset graph will contain the arc (i, j). Remove all arcs corresponding to vertices 907 1071 // that are not elements of the same multiplexing set. … … 912 1076 const auto u = source(*ei, mSubsetGraph); 913 1077 const auto v = target(*ei, mSubsetGraph); 914 if ( in_degree(u, mMultiplexSetGraph) != 0 && in_degree(v, mMultiplexSetGraph) != 0) {915 assert ( in_degree(u, mMultiplexSetGraph) == 1);916 const auto su = source(*(in_edges(u, mMultiplexSetGraph).first), mMultiplexSetGraph);917 assert (in_degree(v, mMultiplexSetGraph) == 1);918 const auto sv = source(*(in_edges(v, mMultiplexSetGraph).first), mMultiplexSetGraph);1078 if (degree(u, mCandidateGraph) != 0 && degree(v, mCandidateGraph) != 0) { 1079 assert (degree(u, mCandidateGraph) == 1); 1080 assert (degree(v, mCandidateGraph) == 1); 1081 const auto su = *(adjacent_vertices(u, mCandidateGraph).first); 1082 const auto sv = *(adjacent_vertices(v, mCandidateGraph).first); 919 1083 if (su == sv) { 920 1084 continue; … … 952 1116 flat_set<PabloBlock *> modified; 953 1117 const auto first_set = num_vertices(mConstraintGraph); 954 const auto last_set = num_vertices(m MultiplexSetGraph);1118 const auto last_set = num_vertices(mCandidateGraph); 955 1119 for (auto idx = first_set; idx != last_set; ++idx) { 956 const size_t n = out_degree(idx, mMultiplexSetGraph); 1120 const size_t n = degree(idx, mCandidateGraph); 1121 assert (n == 0  n > 2); 957 1122 if (n) { 958 1123 const size_t m = log2_plus_one(n); … … 1011 1176 } 1012 1177 for (PabloBlock * block : modified) { 1013 topologicalSort(block);1178 rewriteAST(block); 1014 1179 } 1015 1180 } … … 1018 1183 * @brief orderMultiplexSet 1019 1184 **  */ 1020 inline MultiplexingPass:: MultiplexVector MultiplexingPass::orderMultiplexSet(const MultiplexSetGraph::vertex_descriptor u) {1021 MultiplexVectorset;1022 set.reserve( out_degree(u, mMultiplexSetGraph));1023 for (const auto e : make_iterator_range( out_edges(u, mMultiplexSetGraph))) {1024 set.push_back( target(e, mMultiplexSetGraph));1185 inline MultiplexingPass::Candidates MultiplexingPass::orderMultiplexSet(const CandidateGraph::vertex_descriptor u) { 1186 Candidates set; 1187 set.reserve(degree(u, mCandidateGraph)); 1188 for (const auto e : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) { 1189 set.push_back(e); 1025 1190 } 1026 1191 std::sort(set.begin(), set.end()); 1027 MultiplexVectorclique;1028 MultiplexVectorresult;1029 result.reserve( out_degree(u, mMultiplexSetGraph));1192 Candidates clique; 1193 Candidates result; 1194 result.reserve(degree(u, mCandidateGraph)); 1030 1195 while (set.size() > 0) { 1031 1196 const auto v = *set.begin(); … … 1057 1222 } 1058 1223 1059 /**  * 1060 * @brief topologicalSort 1061 **  */ 1062 void MultiplexingPass::topologicalSort(PabloBlock * const block) { 1063 1064 using Vertex = OrderingGraph::vertex_descriptor; 1065 1066 OrderingGraph G; 1067 OrderingMap M; 1068 1069 for (Statement * stmt : *block ) { 1070 const auto u = add_vertex(G); 1071 G[u] = stmt; 1072 M.emplace(stmt, u); 1073 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1074 for (Assign * def : cast<If>(stmt)>getDefined()) { 1075 M.emplace(def, u); 1076 } 1077 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1078 for (Next * var : cast<While>(stmt)>getVariants()) { 1079 M.emplace(var, u); 1080 } 1081 } 1082 } 1083 1084 Vertex u = 0; 1085 for (Statement * stmt : *block ) { 1086 1087 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 1088 PabloAST * const op = stmt>getOperand(i); 1089 if (isa<Statement>(op)) { 1090 auto f = M.find(cast<Statement>(op)); 1091 if (f != M.end()) { 1092 add_edge(f>second, u, G); 1093 } 1094 } 1095 } 1096 1097 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1098 for (Assign * def : cast<If>(stmt)>getDefined()) { 1099 topologicalSort(u, block, def, G, M); 1100 } 1101 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1102 for (Next * var : cast<While>(stmt)>getVariants()) { 1103 topologicalSort(u, block, var, G, M); 1104 } 1105 } else { 1106 topologicalSort(u, block, stmt, G, M); 1107 } 1108 1109 ++u; 1110 1111 } 1112 1113 circular_buffer<Vertex> Q(num_vertices(G)); 1114 topological_sort(G, std::back_inserter(Q)); 1115 1116 block>setInsertPoint(nullptr); 1117 while (Q.size() > 0) { 1118 const Vertex u = Q.back(); Q.pop_back(); 1119 block>insert(G[u]); 1120 } 1121 1122 } 1123 1124 /**  * 1125 * @brief topologicalSort 1126 **  */ 1127 void MultiplexingPass::topologicalSort(const OrderingGraph::vertex_descriptor u, const PabloBlock * const block, const Statement * const stmt, OrderingGraph & G, OrderingMap & M) { 1128 for (const PabloAST * user : stmt>users()) { 1129 if (LLVM_LIKELY(isa<Statement>(user))) { 1130 const Statement * use = cast<Statement>(user); 1131 auto f = M.find(use); 1132 if (LLVM_UNLIKELY(f == M.end())) { 1133 const PabloBlock * parent = use>getParent(); 1134 for (;;) { 1135 if (parent == block) { 1224 1225 1226 1227 /**  * 1228 * @brief rewriteAST 1229 * 1230 * Multiplexing ignores defuse ordering when muxing and demuxing the Advances; this will likely lead to an illegal 1231 * ordering but, by virtue of the multiplexing algorithm, some ordering of the IR must be legal. However, an 1232 * arbritary topological ordering will likely lead to poor performance due to excessive register spills; this 1233 * algorithm attempts to mitigate this by using a simple bottomup ordering scheme. 1234 **  */ 1235 void MultiplexingPass::rewriteAST(PabloBlock * const block) { 1236 1237 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, Statement *>; 1238 using Vertex = Graph::vertex_descriptor; 1239 using ReadySet = std::vector<Vertex>; 1240 using TypeId = PabloAST::ClassTypeId; 1241 1242 Graph G = construct<Graph>(block); 1243 1244 1245 std::vector<unsigned> rank(num_vertices(G), 0); 1246 1247 { 1248 circular_buffer<Vertex> Q(num_vertices(G)); 1249 // Compute the rank of each statement 1250 for (const Vertex u : make_iterator_range(vertices(G))) { 1251 if (out_degree(u, G) == 0 && rank[u] == 0) { 1252 Q.push_back(u); 1253 } 1254 } 1255 1256 while (Q.size() > 0) { 1257 1258 const Vertex u = Q.front(); 1259 Q.pop_front(); 1260 1261 assert (rank[u] == 0); 1262 1263 unsigned work = 0; 1264 switch (G[u]>getClassTypeId()) { 1265 case TypeId::And: 1266 case TypeId::Or: 1267 case TypeId::Xor: 1268 work = 2; 1269 break; 1270 // case TypeId::Not: 1271 case TypeId::Assign: 1272 case TypeId::Next: 1273 work = 1; 1274 break; 1275 case TypeId::Sel: 1276 work = 6; 1277 break; 1278 case TypeId::Advance: 1279 case TypeId::ScanThru: 1280 work = 33; 1281 break; 1282 case TypeId::MatchStar: 1283 work = 51; 1284 break; 1285 case TypeId::If: 1286 case TypeId::While: 1287 case TypeId::Call: 1288 work = 10000; // < try to push If, While and Call nodes as high as possible 1289 break; 1290 default: break; 1291 } 1292 1293 unsigned r = 0; 1294 for (const auto e : make_iterator_range(out_edges(u, G))) { 1295 r = std::max(r, rank[target(e, G)]); 1296 } 1297 1298 rank[u] = work + r; 1299 1300 for (const auto ei : make_iterator_range(in_edges(u, G))) { 1301 const auto v = source(ei, G); 1302 assert (rank[v] == 0); 1303 bool ready = true; 1304 for (const auto ej : make_iterator_range(out_edges(v, G))) { 1305 if (rank[target(ej, G)] == 0) { 1306 ready = false; 1136 1307 break; 1137 1308 } 1138 use = parent>getBranch(); 1139 parent = parent>getParent(); 1140 if (parent == nullptr) { 1141 return; 1309 } 1310 if (ready) { 1311 Q.push_back(v); 1312 } 1313 } 1314 1315 } 1316 } 1317 1318 // Compute the initial ready set 1319 ReadySet readySet; 1320 for (const Vertex u : make_iterator_range(vertices(G))) { 1321 if (in_degree(u, G) == 0) { 1322 readySet.emplace_back(u); 1323 } 1324 } 1325 1326 auto by_nonincreasing_rank = [&rank](const Vertex u, const Vertex v) > bool { 1327 return rank[u] > rank[v]; 1328 }; 1329 1330 std::sort(readySet.begin(), readySet.end(), by_nonincreasing_rank); 1331 1332 block>setInsertPoint(nullptr); 1333 // Rewrite the AST using the bottomup ordering 1334 while (readySet.size() > 0) { 1335 // Scan through the ready set to determine which one 'kills' the greatest number of inputs 1336 double best = 0.0; 1337 auto rk = readySet.begin(); 1338 for (auto ri = readySet.begin(); ri != readySet.end(); ++ri) { 1339 double p = 0.0; 1340 assert (rank[*ri] != 0); 1341 for (auto ei : make_iterator_range(in_edges(*ri, G))) { 1342 const auto v = source(ei, G); 1343 unsigned unscheduled = 0; 1344 for (auto ej : make_iterator_range(out_edges(v, G))) { 1345 if (rank[target(ej, G)] != 0) { // if this edge leads to an unscheduled statement 1346 ++unscheduled; 1142 1347 } 1143 1348 } 1144 f = M.find(use); 1145 assert (f != M.end()); 1146 M.emplace(use, f>second); 1147 } 1148 const auto v = f>second; 1149 if (LLVM_UNLIKELY(u != v)) { 1150 add_edge(u, v, G); 1151 } 1152 } 1153 } 1349 assert (unscheduled > 0); 1350 assert (unscheduled <= out_degree(v, G)); 1351 const double uses = out_degree(v, G); 1352 p += std::pow((uses  (double)(unscheduled  1)) / uses, 2); 1353 } 1354 if (p > best) { 1355 rk = ri; 1356 best = p; 1357 } 1358 } 1359 const auto u = *rk; 1360 readySet.erase(rk); 1361 // Write the statement back to the AST ... 1362 block>insert(G[u]); 1363 // ... and mark it as written 1364 rank[u] = 0; 1365 // Now check whether any new statements are ready 1366 for (auto ei : make_iterator_range(out_edges(u, G))) { 1367 bool ready = true; 1368 const auto v = target(ei, G); 1369 for (auto ej : make_iterator_range(in_edges(v, G))) { 1370 if (rank[source(ej, G)] != 0) { 1371 ready = false; 1372 break; 1373 } 1374 } 1375 if (ready) { 1376 assert (rank[v] != 0); 1377 readySet.insert(std::lower_bound(readySet.begin(), readySet.end(), v, by_nonincreasing_rank), v); 1378 assert (std::is_sorted(readySet.cbegin(), readySet.cend(), by_nonincreasing_rank)); 1379 } 1380 } 1381 } 1382 1154 1383 } 1155 1384 … … 1289 1518 } 1290 1519 1520 /**  * 1521 * @brief computeDAG 1522 **  */ 1523 template<typename Graph, typename Map> 1524 Graph construct(PabloBlock * const block, Map & M) { 1525 1526 using Vertex = typename Graph::vertex_descriptor; 1527 1528 const auto size = std::distance(block>begin(), block>end()); 1529 1530 Graph G(size); 1531 M.reserve(size); 1532 1533 Vertex u = 0; 1534 for (Statement * stmt : *block ) { 1535 G[u] = stmt; 1536 M.emplace(stmt, u); 1537 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1538 for (Assign * def : cast<If>(stmt)>getDefined()) { 1539 M.emplace(def, u); 1540 } 1541 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1542 for (Next * var : cast<While>(stmt)>getVariants()) { 1543 M.emplace(var, u); 1544 } 1545 } 1546 ++u; 1547 } 1548 1549 /// The following is a lamda function that adds any users of "stmt" to the graph after resolving 1550 /// which vertex it maps to w.r.t. the current block. 1551 auto addUsers = [&](const Vertex u, const Statement * const stmt) > void { 1552 for (const PabloAST * user : stmt>users()) { 1553 if (LLVM_LIKELY(isa<Statement>(user))) { 1554 const Statement * use = cast<Statement>(user); 1555 auto f = M.find(use); 1556 if (LLVM_UNLIKELY(f == M.end())) { 1557 const PabloBlock * parent = use>getParent(); 1558 for (;;) { 1559 if (parent == block) { 1560 break; 1561 } 1562 use = parent>getBranch(); 1563 parent = parent>getParent(); 1564 if (parent == nullptr) { 1565 return; 1566 } 1567 } 1568 f = M.find(use); 1569 assert (f != M.end()); 1570 M.emplace(use, f>second); 1571 } 1572 const auto v = f>second; 1573 if (LLVM_UNLIKELY(u != v)) { 1574 add_edge(u, v, G); 1575 } 1576 } 1577 } 1578 }; 1579 1580 u = 0; 1581 for (Statement * stmt : *block ) { 1582 1583 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 1584 PabloAST * const op = stmt>getOperand(i); 1585 if (isa<Statement>(op)) { 1586 auto f = M.find(cast<Statement>(op)); 1587 if (f != M.end()) { 1588 add_edge(f>second, u, G); 1589 } 1590 } 1591 } 1592 1593 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1594 for (Assign * def : cast<If>(stmt)>getDefined()) { 1595 addUsers(u, def); 1596 } 1597 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1598 for (Next * var : cast<While>(stmt)>getVariants()) { 1599 addUsers(u, var); 1600 } 1601 } else { 1602 addUsers(u, stmt); 1603 } 1604 1605 ++u; 1606 } 1607 1608 #ifndef NDEBUG 1609 std::vector<typename Graph::vertex_descriptor> nothing; 1610 topological_sort(G, std::back_inserter(nothing)); 1611 #endif 1612 1613 return G; 1614 } 1615 1616 1617 /**  * 1618 * @brief computeDAG 1619 **  */ 1620 template<typename Graph> 1621 Graph construct(PabloBlock * const block) { 1622 using Map = flat_map<const Statement *, typename Graph::vertex_descriptor>; 1623 Map M; 1624 return construct<Graph, Map>(block, M); 1625 } 1626 1291 1627 } // end of namespace pablo
Note: See TracChangeset
for help on using the changeset viewer.