Changeset 4890 for icGREP/icgrepdevel/icgrep/pablo/optimizers
 Timestamp:
 Dec 9, 2015, 4:59:02 PM (4 years ago)
 Location:
 icGREP/icgrepdevel/icgrep/pablo/optimizers
 Files:

 3 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp
r4888 r4890 15 15 #include <bdd.h> 16 16 17 /// TODO: Investigate why ./icgrep c multiplexingwindowsize=13,14...,20 "^\p{l}$" causes segfault in BuDDy. 18 17 19 using namespace llvm; 18 20 using namespace boost; … … 106 108 // const auto seed = rd(); 107 109 const auto seed = 83234827342; 108 RNG rng(seed);109 110 110 111 LOG("Seed: " << seed); 111 112 112 MultiplexingPass mp( limit, maxSelections, windowSize);113 MultiplexingPass mp(seed, limit, maxSelections, windowSize); 113 114 RECORD_TIMESTAMP(start_initialize); 114 115 const unsigned advances = mp.initialize(function, independent); … … 127 128 RECORD_TIMESTAMP(end_characterize); 128 129 129 LOG("Characterize: " << (end_characterize  start_characterize));130 131 LOG("Nodes in BDD: " << bdd_getnodenum() << " of " << bdd_getallocnum());130 LOG("Characterize: " << (end_characterize  start_characterize)); 131 132 LOG("Nodes in BDD: " << bdd_getnodenum() << " of " << bdd_getallocnum()); 132 133 133 134 RECORD_TIMESTAMP(start_shutdown); 134 135 bdd_done(); 135 136 RECORD_TIMESTAMP(end_shutdown); 136 LOG("Shutdown: " << (end_shutdown  start_shutdown));137 LOG("Shutdown: " << (end_shutdown  start_shutdown)); 137 138 138 139 RECORD_TIMESTAMP(start_create_multiplex_graph); 139 const bool multiplex = mp.generateCandidateSets( rng);140 const bool multiplex = mp.generateCandidateSets(); 140 141 RECORD_TIMESTAMP(end_create_multiplex_graph); 141 LOG("GenerateCandidateSets: " << (end_create_multiplex_graph  start_create_multiplex_graph));142 LOG("GenerateCandidateSets: " << (end_create_multiplex_graph  start_create_multiplex_graph)); 142 143 143 144 if (multiplex) { 144 145 146 RECORD_TIMESTAMP(start_usage_weighting); 147 mp.generateUsageWeightingGraph(); 148 RECORD_TIMESTAMP(end_usage_weighting); 149 LOG("GenerateUsageWeighting: " << (end_usage_weighting  start_usage_weighting)); 150 145 151 RECORD_TIMESTAMP(start_select_multiplex_sets); 146 mp.selectMultiplexSets( rng);152 mp.selectMultiplexSets(); 147 153 RECORD_TIMESTAMP(end_select_multiplex_sets); 148 LOG("SelectMultiplexSets: " << (end_select_multiplex_sets  start_select_multiplex_sets));154 LOG("SelectMultiplexSets: " << (end_select_multiplex_sets  start_select_multiplex_sets)); 149 155 150 156 RECORD_TIMESTAMP(start_subset_constraints); 151 157 mp.eliminateSubsetConstraints(); 152 158 RECORD_TIMESTAMP(end_subset_constraints); 153 LOG("ApplySubsetConstraints: " << (end_subset_constraints  start_subset_constraints));159 LOG("ApplySubsetConstraints: " << (end_subset_constraints  start_subset_constraints)); 154 160 155 161 RECORD_TIMESTAMP(start_select_independent_sets); 156 162 mp.multiplexSelectedIndependentSets(function); 157 163 RECORD_TIMESTAMP(end_select_independent_sets); 158 LOG("SelectedIndependentSets: " << (end_select_independent_sets  start_select_independent_sets)); 159 164 LOG("SelectedIndependentSets: " << (end_select_independent_sets  start_select_independent_sets)); 165 166 RECORD_TIMESTAMP(start_topological_sort); 160 167 MultiplexingPass::topologicalSort(function); 168 RECORD_TIMESTAMP(end_topological_sort); 169 LOG("TopologicalSort: " << (end_topological_sort  start_topological_sort)); 170 161 171 #ifndef NDEBUG 162 172 PabloVerifier::verify(function, "postmultiplexing"); … … 165 175 166 176 LOG_NUMBER_OF_ADVANCES(function.getEntryBlock()); 177 167 178 return multiplex; 168 179 } … … 310 321 assert (k == advances); 311 322 312 // Compute the base constraint graph as the union of TRANSPOSE(G) without any reflective loops and the set of edges 313 // marking which advances are in differing scope blocks. 314 323 // Initialize the base constraint graph by effectively transposing G and removing reflective loops 315 324 mConstraintGraph = ConstraintGraph(advances); 316 325 for (unsigned i = 0; i != advances; ++i) { … … 332 341 * @brief initializeAdvanceDepth 333 342 **  */ 334 inline void MultiplexingPass::initializeAdvanceDepth(PabloBlock * const block, const unsigned advances) {343 inline void MultiplexingPass::initializeAdvanceDepth(PabloBlock * const entryBlock, const unsigned advances) { 335 344 336 345 std::stack<Statement *> scope; 337 346 unsigned k = 0; 338 347 int maxDepth = 0; 339 const Advance * advance[advances]; 348 const PabloBlock * advanceScope[advances]; 349 mAdvance.resize(advances, nullptr); 340 350 mAdvanceDepth.resize(advances, 0); 341 for (Statement * stmt = block>front(); ; ) { 351 mAdvanceNegatedVariable.reserve(advances); 352 for (Statement * stmt = entryBlock>front(); ; ) { 342 353 while ( stmt ) { 343 354 if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) { … … 349 360 } else if (LLVM_UNLIKELY(isa<Advance>(stmt))) { 350 361 int depth = 0; 351 advance[k] = cast<Advance>(stmt); 362 mAdvance[k] = cast<Advance>(stmt); 363 advanceScope[k] = cast<Advance>(stmt)>getParent(); 352 364 for (unsigned i = 0; i != k; ++i) { 353 if (edge(i, k, mConstraintGraph).second  (advance [i]>getParent() != advance[k]>getParent())) {365 if (edge(i, k, mConstraintGraph).second  (advanceScope[i] != advanceScope[k])) { 354 366 depth = std::max<int>(depth, mAdvanceDepth[i]); 355 367 } … … 382 394 characterize(cast<If>(stmt)>getBody()); 383 395 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 384 const auto & variants = cast<While>(stmt)>getVariants();385 std::vector<BDD> assignments(variants.size());386 for (unsigned i = 0; i != variants.size(); ++i) {387 assignments[i] = get(variants[i]>getInitial());388 }389 396 characterize(cast<While>(stmt)>getBody()); 390 for ( unsigned i = 0; i != variants.size(); ++i) {391 BDD & var = get(variants[i]>getInitial());392 var = bdd_addref(bdd_or(var, assignments[i]));397 for (const Next * var : cast<While>(stmt)>getVariants()) { 398 BDD & assign = get(var>getInitial()); 399 assign = bdd_addref(bdd_or(assign, get(var))); 393 400 } 394 401 } else { … … 413 420 **  */ 414 421 inline BDD MultiplexingPass::characterize(Statement * const stmt) { 422 assert (stmt>getNumOperands() > 0); 415 423 BDD bdd = get(stmt>getOperand(0)); 416 424 switch (stmt>getClassTypeId()) { … … 457 465 **  */ 458 466 inline BDD MultiplexingPass::characterize(Advance * const adv, const BDD Ik) { 459 const auto k = mAdvanceAttributes.size(); 467 const auto k = mAdvanceNegatedVariable.size(); 468 assert (mAdvance[k] == adv); 460 469 std::vector<bool> unconstrained(k , false); 461 470 for (unsigned i = 0; i != k; ++i) { 471 462 472 // Are we interested in testing these streams to see whether they are mutually exclusive? 463 473 if (exceedsWindowSize(i, k)) continue; 474 464 475 // Have we already proven that they are unconstrained by their subset relationship? 465 476 if (unconstrained[i]) continue; 477 466 478 // If these Advances are mutually exclusive, in the same scope, transitively independent, and shift their 467 479 // values by the same amount, we can safely multiplex them. Otherwise mark the constraint in the graph. 468 const Advance * const ithAdv = std::get<0>(mAdvanceAttributes[i]);480 const Advance * const ithAdv = mAdvance[i]; 469 481 if ((mTestConstrainedAdvances  independent(i, k)) && (ithAdv>getOperand(1) == adv>getOperand(1))) { 470 482 const BDD Ii = get(ithAdv>getOperand(0)); … … 506 518 } 507 519 508 BDD Ck = bdd_ithvar(mVariables++);509 const BDD Nk = bdd_addref(bdd_not( Ck));520 BDD Ak = bdd_ithvar(mVariables++); 521 const BDD Nk = bdd_addref(bdd_not(Ak)); 510 522 for (unsigned i = 0; i != k; ++i) { 511 523 if (unconstrained[i]) { 512 const Advance * const ithAdv = std::get<0>(mAdvanceAttributes[i]); 513 const BDD Ni = std::get<1>(mAdvanceAttributes[i]); 514 515 BDD & Ci = get(ithAdv); 516 Ci = bdd_addref(bdd_and(Ci, Nk)); 517 Ck = bdd_addref(bdd_and(Ck, Ni)); 518 if ((!mTestConstrainedAdvances  independent(i, k)) && (adv>getParent() == ithAdv>getParent())) { 524 // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results 525 // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by 526 // the conjunction of variable representing that Advance and the negation of all variables for the Advances in which 527 // the inputs are deemed to be mutually exclusive. For example, if the input of the ith Advance is mutually exclusive 528 // with the input of the jth and kth Advance, the BDD of the ith Advance is Ai â§ Â¬Aj â§ Â¬Ak. 529 const Advance * const ithAdv = mAdvance[i]; 530 const BDD Ni = mAdvanceNegatedVariable[i]; 531 BDD & Ai = get(ithAdv); 532 Ai = bdd_addref(bdd_and(Ai, Nk)); 533 Ak = bdd_addref(bdd_and(Ak, Ni)); 534 if (independent(i, k) && (adv>getParent() == ithAdv>getParent())) { 519 535 continue; 520 536 } … … 522 538 add_edge(i, k, mConstraintGraph); 523 539 } 524 mAdvanceAttributes.emplace_back(adv, Nk); 525 return Ck; 540 // To minimize the number of BDD computations, store the negated variable instead of negating it each time. 541 mAdvanceNegatedVariable.emplace_back(Nk); 542 return Ak; 526 543 } 527 544 … … 543 560 544 561 /**  * 562 * @brief is_power_of_2 563 * @param n an integer 564 **  */ 565 static inline bool is_power_of_2(const size_t n) { 566 return ((n & (n  1)) == 0); 567 } 568 569 /**  * 570 * @brief generateUsageWeightingGraph 571 * 572 * Prior to generating our candidate sets, scan through the constraint graph and generate a clique in the usage 573 * weighting graph each set of constraintfree Advance nodes that have the same users. We may be able optimize 574 * the demultiplexing calculations using range expressions. 575 * 576 * Note: it'd be preferable to contract vertices in the constraint graph prior to scanning through it but that 577 * leaves us with a more difficult problem. Namely, an Advance node may belong to more than one clique and we 578 * want to avoid generating a multiplexing set whose size is 2^n for any n â â€* but don't want to needlessly 579 * limit the size of any clique. Look into this further later. 580 **  */ 581 void MultiplexingPass::generateUsageWeightingGraph() { 582 const auto n = num_vertices(mConstraintGraph); assert (n > 2); 583 // Let G be the complement of the constraint graph âª the subset graph restricted to only the edges corresponding 584 // to pairs of Advances with the same users. 585 CliqueGraph G(n); 586 for (unsigned i = 0; i != (n  1); ++i) { 587 const Advance * const advI = mAdvance[i]; 588 for (unsigned j = i + 1; j != n; ++j) { 589 const Advance * const advJ = mAdvance[j]; 590 if (LLVM_UNLIKELY(advI>getNumUsers() == advJ>getNumUsers()) && independent(i, j)) { 591 if (LLVM_UNLIKELY(std::equal(advI>user_begin(), advI>user_end(), advJ>user_begin()))) { 592 // INVESTIGATE: we should be able to ignore subset relations if these are going to become a 593 // range expression. Look into making a proof for it once the range expression calculation 594 // is finished. 595 if (!(edge(i, j, mSubsetGraph).second  edge(j, i, mSubsetGraph).second)) { 596 add_edge(i, j, G); 597 } 598 } 599 } 600 } 601 } 602 if (num_edges(G) > 0) { 603 const CliqueSets S = findMaximalCliques(G); 604 for (unsigned i = 0; i != n; ++i) { 605 clear_vertex(i, G); 606 } 607 for (const std::vector<CliqueGraph::vertex_descriptor> & C : S) { 608 const unsigned m = C.size(); assert (m > 1); 609 for (unsigned i = 1; i != m; ++i) { 610 for (unsigned j = 0; j != i; ++j) { 611 add_edge(C[j], C[i], G); 612 } 613 } 614 } 615 } 616 mUsageWeightingGraph = G; 617 } 618 619 /**  * 545 620 * @brief generateMultiplexSets 546 * @param RNG random number generator 547 **  */ 548 bool MultiplexingPass::generateCandidateSets(RNG & rng) { 549 550 using degree_t = graph_traits<ConstraintGraph>::degree_size_type; 621 **  */ 622 bool MultiplexingPass::generateCandidateSets() { 551 623 552 624 // What if we generated a "constraint free" graph instead? By taking each connected component of it … … 555 627 556 628 VertexVector S; 557 std::vector< degree_t> D(num_vertices(mConstraintGraph));629 std::vector<ConstraintGraph::degree_size_type> D(num_vertices(mConstraintGraph)); 558 630 S.reserve(15); 559 631 560 632 mMultiplexSetGraph = MultiplexSetGraph(num_vertices(mConstraintGraph)); 561 633 562 // Push all source nodes into the new independent set N634 // Push all source nodes into the (initial) independent set S 563 635 for (auto v : make_iterator_range(vertices(mConstraintGraph))) { 564 636 const auto d = in_degree(v, mConstraintGraph); … … 575 647 do { 576 648 577 addCandidateSet(S , rng);649 addCandidateSet(S); 578 650 579 651 bool noNewElements = true; … … 581 653 assert (S.size() > 0); 582 654 // Randomly choose a vertex in S and discard it. 583 const auto i = S.begin() + IntDistribution(0, S.size()  1)( rng);655 const auto i = S.begin() + IntDistribution(0, S.size()  1)(mRNG); 584 656 assert (i != S.end()); 585 657 const ConstraintVertex u = *i; … … 604 676 /**  * 605 677 * @brief choose 606 **  */ 607 inline unsigned long choose(const unsigned n, const unsigned k) { 678 * 679 * Compute n choose k 680 **  */ 681 __attribute__ ((const)) inline unsigned long choose(const unsigned n, const unsigned k) { 608 682 if (n < k) 609 683 return 0; … … 632 706 unsigned long x = (choose(n, k)  1)  m; 633 707 for (unsigned i = 0; i != k; ++i) { 634 while (choose(a, b) > x); 635 x = x  choose(a, b); 708 unsigned long y = 0; 709 while ((y = choose(a, b)) > x); 710 x = x  y; 636 711 b = b  1; 637 712 element[i] = (n  1)  a; … … 643 718 * @param S an independent set 644 719 **  */ 645 inline void MultiplexingPass::addCandidateSet(const VertexVector & S , RNG & rng) {720 inline void MultiplexingPass::addCandidateSet(const VertexVector & S) { 646 721 if (S.size() >= 3) { 647 722 if (S.size() <= mMultiplexingSetSizeLimit) { … … 663 738 } else { // take m random samples 664 739 for (unsigned i = 0; i != mMaxMultiplexingSetSelections; ++i) { 665 select(S.size(), mMultiplexingSetSizeLimit, rng() % max, element);740 select(S.size(), mMultiplexingSetSizeLimit, mRNG() % max, element); 666 741 const auto u = add_vertex(mMultiplexSetGraph); 667 742 for (unsigned j = 0; j != mMultiplexingSetSizeLimit; ++j) { … … 675 750 676 751 /**  * 677 * @brief is_power_of_2678 * @param n an integer679 **  */680 static inline bool is_power_of_2(const size_t n) {681 return ((n & (n  1)) == 0) ;682 }683 684 /**  *685 752 * @brief log2_plus_one 686 753 **  */ 687 754 static inline size_t log2_plus_one(const size_t n) { 688 return std::log2<size_t>(n) + 1; // use a faster builtin function for this?755 return std::log2<size_t>(n) + 1; 689 756 } 690 757 691 758 /**  * 692 759 * @brief selectMultiplexSets 693 * @param RNG random number generator694 760 * 695 761 * This algorithm is simply computes a greedy set cover. We want an exact maxweight set cover but can generate new 696 762 * sets by taking a subset of any existing set. With a few modifications, the greedy approach seems to work well 697 * enough but can be trivially shown to produce a suboptimal solution if there are three (or more) sets in which 698 * two, labelled A and B, are disjoint and the third larger set, C, that consists of elements of A and B. 699 **  */ 700 void MultiplexingPass::selectMultiplexSets(RNG &) { 701 702 using InEdgeIterator = graph_traits<MultiplexSetGraph>::in_edge_iterator; 763 * enough but can be shown to produce a suboptimal solution if there are three candidate sets labelled A, B and C, 764 * in which A â© B = â 765 , A â€ B < C < (A + B), and C â© (A âª B) = C. 766 **  */ 767 void MultiplexingPass::selectMultiplexSets() { 768 769 using SetIterator = graph_traits<MultiplexSetGraph>::in_edge_iterator; 770 using ElementIterator = graph_traits<MultiplexSetGraph>::out_edge_iterator; 703 771 using degree_t = MultiplexSetGraph::degree_size_type; 704 772 using vertex_t = MultiplexSetGraph::vertex_descriptor; … … 707 775 const size_t n = num_vertices(mMultiplexSetGraph)  m; 708 776 709 std::vector<degree_t> remaining(n, 0);710 std::vector<vertex_t> chosen_set(m, 0);777 degree_t remaining[n]; 778 vertex_t chosen_set[m]; 711 779 712 780 for (unsigned i = 0; i != n; ++i) { 713 781 remaining[i] = out_degree(i + m, mMultiplexSetGraph); 782 } 783 for (unsigned i = 0; i != m; ++i) { 784 chosen_set[i] = 0; 714 785 } 715 786 … … 720 791 degree_t w = 0; 721 792 for (vertex_t i = 0; i != n; ++i) { 722 const degree_t r = remaining[i]; 723 if (w < r) { 724 k = i; 725 w = r; 726 } 727 } 728 729 // Multiplexing requires at least 3 elements; if the best set contains fewer than 3, abort. 730 if (w < 3) { 793 degree_t r = remaining[i]; 794 if (r > 2) { // if this set has at least 3 elements. 795 r *= r; 796 ElementIterator begin, end; 797 std::tie(begin, end) = out_edges(i + m, mMultiplexSetGraph); 798 for (auto ei = begin; ei != end; ++ei) { 799 for (auto ej = ei; ++ej != end; ) { 800 if (edge(target(*ei, mMultiplexSetGraph), target(*ej, mMultiplexSetGraph), mUsageWeightingGraph).second) { 801 ++r; 802 } 803 } 804 } 805 if (w < r) { 806 k = i; 807 w = r; 808 } 809 } 810 } 811 812 // Multiplexing requires 3 or more elements; if no set contains at least 3, abort. 813 if (w == 0) { 731 814 break; 732 815 } … … 746 829 // If this contains 2^n elements for any n, discard the member that is most likely to be added 747 830 // to some future set. 748 if ( is_power_of_2(w)) {831 if (LLVM_UNLIKELY(is_power_of_2(w))) { 749 832 vertex_t j = 0; 750 833 degree_t w = 0; … … 771 854 772 855 for (unsigned i = 0; i != m; ++i) { 773 InEdgeIterator ei, ei_end;856 SetIterator ei, ei_end; 774 857 std::tie(ei, ei_end) = in_edges(i, mMultiplexSetGraph); 775 858 for (auto next = ei; ei != ei_end; ei = next) { … … 795 878 **  */ 796 879 void MultiplexingPass::eliminateSubsetConstraints() { 797 798 using SubsetEdgeIterator = graph_traits<SubsetGraph>::edge_iterator;799 800 880 // If Ai â Aj then the subset graph will contain the arc (i, j). Remove all arcs corresponding to vertices 801 881 // that are not elements of the same multiplexing set. … … 827 907 // Afterwards modify the AST to ensure that multiplexing algorithm can ignore any subset constraints 828 908 for (auto e : make_iterator_range(edges(mSubsetGraph))) { 829 Advance * adv1 = std::get<0>(mAdvanceAttributes[source(e, mSubsetGraph)]);830 Advance * adv2 = std::get<0>(mAdvanceAttributes[target(e, mSubsetGraph)]);909 Advance * const adv1 = mAdvance[source(e, mSubsetGraph)]; 910 Advance * const adv2 = mAdvance[target(e, mSubsetGraph)]; 831 911 assert (adv1>getParent() == adv2>getParent()); 832 912 PabloBlock * const pb = adv1>getParent(); … … 844 924 **  */ 845 925 void MultiplexingPass::multiplexSelectedIndependentSets(PabloFunction &) { 846 847 const unsigned first_set = num_vertices(mConstraintGraph); 848 const unsigned last_set = num_vertices(mMultiplexSetGraph); 849 850 // Preallocate the structures based on the size of the largest multiplex set 851 size_t max_n = 3; 852 for (unsigned idx = first_set; idx != last_set; ++idx) { 853 max_n = std::max<unsigned>(max_n, out_degree(idx, mMultiplexSetGraph)); 854 } 855 856 circular_buffer<PabloAST *> Q(max_n); 857 858 // When entering thus function, the multiplex set graph M is a DAG with edges denoting the set 859 // relationships of our independent sets. 860 861 for (unsigned idx = first_set; idx != last_set; ++idx) { 926 const auto first_set = num_vertices(mConstraintGraph); 927 const auto last_set = num_vertices(mMultiplexSetGraph); 928 for (auto idx = first_set; idx != last_set; ++idx) { 862 929 const size_t n = out_degree(idx, mMultiplexSetGraph); 863 930 if (n) { … … 865 932 Advance * input[n]; 866 933 Advance * muxed[m]; 867 934 // The multiplex set graph is a DAG with edges denoting the set relationships of our independent sets. 868 935 unsigned i = 0; 869 936 for (const auto e : make_iterator_range(out_edges(idx, mMultiplexSetGraph))) { 870 input[i++] = std::get<0>(mAdvanceAttributes[target(e, mMultiplexSetGraph)]); 871 } 872 937 input[i++] = mAdvance[target(e, mMultiplexSetGraph)]; 938 } 873 939 Advance * const adv = input[0]; 874 940 PabloBlock * const block = adv>getParent(); assert (block); 875 PabloBuilder builder(block); 876 block>setInsertPoint(block>back()); 877 941 block>setInsertPoint(nullptr); 878 942 /// Perform ntom Multiplexing 879 943 for (size_t j = 0; j != m; ++j) { 880 881 944 std::ostringstream prefix; 882 945 prefix << "mux" << n << "to" << m << '.' << (j + 1); 946 Or * muxing = block>createOr(n); 883 947 for (size_t i = 0; i != n; ++i) { 884 948 if (((i + 1) & (1UL << j)) != 0) { 885 949 assert (input[i]>getParent() == block); 886 Q.push_back(input[i]>getOperand(0)); 887 } 888 } 889 890 while (Q.size() > 1) { 891 PabloAST * a1 = Q.front(); Q.pop_front(); assert (a1); 892 PabloAST * a2 = Q.front(); Q.pop_front(); assert (a2); 893 assert (!Q.full()); 894 Q.push_back(builder.createOr(a2, a1, "muxing")); 895 } 896 897 PabloAST * mux = Q.front(); Q.pop_front(); assert (mux); 898 // The only way this did not return an Advance statement would be if either the mux or shift amount 899 // is zero. Since these cases would have been eliminated earlier, we are safe to cast here. 900 muxed[j] = cast<Advance>(builder.createAdvance(mux, adv>getOperand(1), prefix.str())); 901 } 902 950 muxing>addOperand(input[i]>getOperand(0)); 951 } 952 } 953 muxed[j] = cast<Advance>(block>createAdvance(muxing, adv>getOperand(1), prefix.str())); 954 } 903 955 /// Perform mton Demultiplexing 904 956 for (size_t i = 0; i != n; ++i) { 905 906 // Construct the demuxed values and replaces all the users of the original advances with them.957 // Construct the demuxed values and replaces all the users of the original advances with them. 958 PabloAST * demuxing[m]; 907 959 for (size_t j = 0; j != m; ++j) { 960 demuxing[j] = muxed[j]; 908 961 if (((i + 1) & (1UL << j)) == 0) { 909 Q.push_back(muxed[j]); 910 } 911 } 912 913 if (LLVM_LIKELY(Q.size() > 0)) { 914 while (Q.size() > 1) { 915 PabloAST * a1 = Q.front(); Q.pop_front(); assert (a1); 916 PabloAST * a2 = Q.front(); Q.pop_front(); assert (a2); 917 assert (!Q.full()); 918 Q.push_back(builder.createOr(a2, a1, "demuxing")); 919 } 920 assert (Q.size() == 1); 921 PabloAST * neg = Q.front(); Q.pop_front(); assert (neg); 922 Q.push_back(builder.createNot(neg, "demuxing")); 923 } 924 925 for (unsigned j = 0; j != m; ++j) { 926 if (((i + 1) & (1ULL << j)) != 0) { 927 assert (!Q.full()); 928 Q.push_back(muxed[j]); 929 } 930 } 931 932 while (Q.size() > 1) { 933 PabloAST * a1 = Q.front(); Q.pop_front(); assert (a1); 934 PabloAST * a2 = Q.front(); Q.pop_front(); assert (a2); 935 assert (!Q.full()); 936 Q.push_back(builder.createAnd(a1, a2, "demuxing")); 937 } 938 939 PabloAST * demuxed = Q.front(); Q.pop_front(); assert (demuxed); 962 demuxing[j] = block>createNot(demuxing[j]); 963 } 964 } 965 And * demuxed = block>createAnd(m); 966 for (size_t j = 0; j != m; ++j) { 967 demuxed>addOperand(demuxing[j]); 968 } 940 969 input[i]>replaceWith(demuxed, true, true); 941 970 } … … 945 974 946 975 /**  * 976 * @brief OrderingVerifier 977 **  */ 978 struct OrderingVerifier { 979 OrderingVerifier() : mParent(nullptr) {} 980 OrderingVerifier(const OrderingVerifier & parent) : mParent(&parent) {} 981 bool count(const PabloAST * expr) const { 982 if (mSet.count(expr)) { 983 return true; 984 } else if (mParent) { 985 return mParent>count(expr); 986 } 987 return false; 988 } 989 void insert(const PabloAST * expr) { 990 mSet.insert(expr); 991 } 992 private: 993 const OrderingVerifier * const mParent; 994 std::unordered_set<const PabloAST *> mSet; 995 }; 996 997 /**  * 947 998 * @brief topologicalSort 948 *949 * After transforming the IR, we need to run this in order to always have a valid program. Each multiplex set950 * contains vertices corresponding to an Advance in the IR. While we know each Advance within a set is independent951 * w.r.t. the transitive closure of their dependencies in the IR, the position of each Advance's dependencies and952 * users within the IR isn't taken into consideration. Thus while there must be a valid ordering for the program,953 * it's not necessarily the original ordering.954 999 **  */ 955 1000 void MultiplexingPass::topologicalSort(PabloFunction & function) { 956 // Note: not a real topological sort. I expect the original order to be very close to the resulting one. 957 std::unordered_set<const PabloAST *> encountered; 958 std::stack<Statement *> scope; 959 for (Statement * stmt = function.getEntryBlock()>front(); ; ) { restart: 960 while ( stmt ) { 961 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 962 PabloAST * const op = stmt>getOperand(i); 963 if (LLVM_LIKELY(isa<Statement>(op))) { 964 if (LLVM_UNLIKELY(encountered.count(op) == 0)) { 965 if (LLVM_UNLIKELY(isa<While>(stmt) && isa<Next>(op))) { 966 if (encountered.count(cast<Next>(op)>getInitial()) != 0) { 967 continue; 1001 OrderingVerifier set; 1002 set.insert(PabloBlock::createZeroes()); 1003 set.insert(PabloBlock::createOnes()); 1004 for (unsigned i = 0; i != function.getNumOfParameters(); ++i) { 1005 set.insert(function.getParameter(i)); 1006 } 1007 topologicalSort(function.getEntryBlock(), set); 1008 } 1009 1010 /**  * 1011 * @brief topologicalSort 1012 **  */ 1013 void MultiplexingPass::topologicalSort(PabloBlock * block, OrderingVerifier & parent) { 1014 OrderingVerifier encountered(parent); 1015 for (Statement * stmt = block>front(); stmt; ) { 1016 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1017 topologicalSort(cast<If>(stmt)>getBody(), encountered); 1018 for (Assign * def : cast<If>(stmt)>getDefined()) { 1019 encountered.insert(def); 1020 } 1021 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1022 topologicalSort(cast<While>(stmt)>getBody(), encountered); 1023 for (Next * var : cast<While>(stmt)>getVariants()) { 1024 encountered.insert(var); 1025 } 1026 } 1027 bool unmodified = true; 1028 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 1029 PabloAST * const op = stmt>getOperand(i); 1030 if (LLVM_UNLIKELY(encountered.count(op) == 0 && isa<Statement>(op))) { 1031 Statement * const next = stmt>getNextNode(); 1032 Statement * pos = cast<Statement>(op); 1033 if (cast<Statement>(op)>getParent() != block) { 1034 // If we haven't already encountered the Assign or Next node, it must come from a If or 1035 // While node that we haven't processed yet. Scan ahead and try to locate it. 1036 pos = nullptr; 1037 if (isa<Assign>(pos)) { 1038 for (pos = cast<Statement>(op); pos; pos = pos>getNextNode()) { 1039 if (LLVM_UNLIKELY(isa<If>(pos))) { 1040 const auto & defs = cast<If>(pos)>getDefined(); 1041 if (LLVM_LIKELY(std::find(defs.begin(), defs.end(), op) != defs.end())) { 1042 break; 1043 } 968 1044 } 969 1045 } 970 Statement * const next = stmt>getNextNode(); 971 stmt>insertAfter(cast<Statement>(op)); 972 stmt = next; 973 goto restart; 974 } 975 } 976 } 977 if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) { 978 // Set the next statement to be the first statement of the inner scope and push the 979 // next statement of the current statement into the scope stack. 980 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)>getBody() : cast<While>(stmt)>getBody(); 981 scope.push(stmt>getNextNode()); 982 stmt = nested>front(); 983 continue; 984 } 1046 } else if (isa<Next>(pos)) { 1047 for (pos = cast<Statement>(op); pos; pos = pos>getNextNode()) { 1048 if (LLVM_UNLIKELY(isa<While>(pos))) { 1049 const auto & vars = cast<While>(pos)>getVariants(); 1050 if (LLVM_LIKELY(std::find(vars.begin(), vars.end(), op) != vars.end())) { 1051 break; 1052 } 1053 } 1054 } 1055 } 1056 if (LLVM_UNLIKELY(pos == nullptr)) { 1057 throw std::runtime_error("Unexpected error: MultiplexingPass failed to topologically sort the function!"); 1058 } 1059 } 1060 stmt>insertAfter(pos); 1061 stmt = next; 1062 unmodified = false; 1063 break; 1064 } 1065 } 1066 if (unmodified) { 985 1067 encountered.insert(stmt); 986 1068 stmt = stmt>getNextNode(); 987 1069 } 988 if (scope.empty()) {989 break;990 }991 stmt = scope.top();992 scope.pop();993 1070 } 994 1071 } … … 1025 1102 } 1026 1103 1104 1105 /**  * 1106 * @brief findMaximalCliques 1107 * 1108 * Adaptation of the BronKerbosch algorithm. 1109 **  */ 1110 inline MultiplexingPass::CliqueSets MultiplexingPass::findMaximalCliques(const CliqueGraph & G) { 1111 CliqueSets S; 1112 const auto n = num_vertices(G); 1113 std::vector<CliqueGraph::vertex_descriptor> ordering; 1114 ordering.reserve(n); 1115 for (auto u : make_iterator_range(vertices(G))) { 1116 if (degree(u, G)) { 1117 ordering.push_back(u); 1118 } 1119 } 1120 CliqueSet R; 1121 CliqueSet P(ordering.begin(), ordering.end()); 1122 CliqueSet X; 1123 X.reserve(ordering.size()); 1124 // compute a degeneracy ordering of G 1125 std::sort(ordering.begin(), ordering.end(), [&G](const CliqueGraph::vertex_descriptor i, const CliqueGraph::vertex_descriptor j){ return degree(i, G) < degree(j, G); }); 1126 for (auto v : ordering) { 1127 R.insert(v); 1128 CliqueSet PN, XN; 1129 for (const auto u : make_iterator_range(adjacent_vertices(v, G))) { 1130 if (P.count(u)) PN.insert(u); 1131 if (X.count(u)) XN.insert(u); 1132 } 1133 findMaximalCliques(G, R, std::move(PN), std::move(XN), S); // ({v}, P â© N(v), X â© N(v)) 1134 R.clear(); 1135 P.erase(v); 1136 X.insert(v); 1137 } 1138 return S; 1139 } 1140 1141 /**  * 1142 * @brief findMaximalCliques 1143 **  */ 1144 void MultiplexingPass::findMaximalCliques(const CliqueGraph & G, CliqueSet & R, CliqueSet && P, CliqueSet && X, CliqueSets & S) { 1145 if (LLVM_UNLIKELY(P.empty() && X.empty())) { // Report R as a maximal clique 1146 S.emplace(R.begin(), R.end()); 1147 } else { 1148 // choose the pivot vertex u in P âª X as the vertex with the highest number of neighbors in P (Tomita et al. 2006.) 1149 CliqueSet N; 1150 CliqueGraph::degree_size_type size = 0; 1151 for (const CliqueGraph::vertex_descriptor u : P) { 1152 if (degree(u, G) >= size) { 1153 CliqueGraph::degree_size_type neighbours = 0; 1154 for (const CliqueGraph::vertex_descriptor v : make_iterator_range(adjacent_vertices(u, G))) { 1155 neighbours += P.count(v); 1156 } 1157 if (size <= neighbours) { 1158 if (size < neighbours) { 1159 size = neighbours; 1160 N.clear(); 1161 } 1162 N.insert(u); 1163 } 1164 } 1165 } 1166 for (const CliqueGraph::vertex_descriptor u : X) { 1167 if (degree(u, G) >= size) { 1168 CliqueGraph::degree_size_type neighbours = 0; 1169 for (const CliqueGraph::vertex_descriptor v : make_iterator_range(adjacent_vertices(u, G))) { 1170 neighbours += P.count(v); 1171 } 1172 if (size <= neighbours) { 1173 if (size < neighbours) { 1174 size = neighbours; 1175 N.clear(); 1176 } 1177 N.insert(u); 1178 } 1179 } 1180 } 1181 const CliqueGraph::vertex_descriptor u = *(N.nth(IntDistribution(0, N.size()  1)(mRNG))); 1182 // for each vertex v in P \ N(u): 1183 for (auto v = P.begin(); v != P.end(); v = P.erase(v)) { 1184 if (edge(u, *v, G).second) continue; 1185 const bool added = R.insert(*v).second; 1186 CliqueSet PN, XN; 1187 for (const CliqueGraph::vertex_descriptor u : make_iterator_range(adjacent_vertices(*v, G))) { 1188 if (P.count(u)) PN.insert(u); 1189 if (X.count(u)) XN.insert(u); 1190 } 1191 findMaximalCliques(G, R, std::move(PN), std::move(XN), S); // (R âª {v}, P â© N(v), X â© N(v)) 1192 if (LLVM_LIKELY(added)) R.erase(*v); 1193 X.insert(*v); 1194 } 1195 } 1196 } 1197 1027 1198 /**  * 1028 1199 * @brief get 
icGREP/icgrepdevel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp
r4888 r4890 4 4 #include <pablo/codegenstate.h> 5 5 #include <slab_allocator.h> 6 #include <queue>7 6 #include <boost/graph/adjacency_list.hpp> 8 7 #include <boost/graph/adjacency_matrix.hpp> 8 #include <boost/container/flat_set.hpp> 9 9 #include <boost/container/flat_map.hpp> 10 10 #include <boost/numeric/ublas/matrix.hpp> … … 19 19 class PabloBuilder; 20 20 class PabloFunction; 21 struct OrderingVerifier; 21 22 22 23 class MultiplexingPass { … … 29 30 using MultiplexSetGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS>; 30 31 using SubsetGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS>; 32 using SubsetEdgeIterator = boost::graph_traits<SubsetGraph>::edge_iterator; 33 using CliqueGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::undirectedS>; 34 using CliqueSet = boost::container::flat_set<CliqueGraph::vertex_descriptor>; 35 using CliqueSets = boost::container::flat_set<std::vector<CliqueGraph::vertex_descriptor>>; 36 37 using AdvanceVector = std::vector<Advance *>; 31 38 using AdvanceDepth = std::vector<int>; 32 using Advance Attributes = std::vector<std::pair<Advance *, BDD>>; // the Advance pointer and its respective base BDD variable39 using AdvanceVariable = std::vector<BDD>; 33 40 using VertexVector = std::vector<ConstraintVertex>; 34 using ScopeMap = boost::container::flat_map<const PabloBlock *, Statement *>;35 41 36 42 public: 37 static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 10, const bool independent = false); 43 44 static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 1, const bool independent = false); 45 38 46 protected: 47 39 48 unsigned initialize(PabloFunction & function, const bool independent); 40 49 void initializeBaseConstraintGraph(PabloBlock * const block, const unsigned statements, const unsigned advances); … … 46 55 bool independent(const ConstraintVertex i, const ConstraintVertex j) const; 47 56 bool exceedsWindowSize(const ConstraintVertex i, const ConstraintVertex j) const; 48 bool generateCandidateSets(RNG & rng);49 57 50 void addCandidateSet(const VertexVector & S, RNG & rng); 51 void selectMultiplexSets(RNG & rng); 58 void generateUsageWeightingGraph(); 59 CliqueSets findMaximalCliques(const CliqueGraph & G); 60 void findMaximalCliques(const CliqueGraph & G, CliqueSet & R, CliqueSet && P, CliqueSet && X, CliqueSets & S); 61 62 bool generateCandidateSets(); 63 void addCandidateSet(const VertexVector & S); 64 void selectMultiplexSets(); 65 66 void eliminateSubsetConstraints(); 52 67 void doTransitiveReductionOfSubsetGraph(); 53 void eliminateSubsetConstraints(); 68 54 69 void multiplexSelectedIndependentSets(PabloFunction & function); 70 55 71 static void topologicalSort(PabloFunction & function); 72 static void topologicalSort(PabloBlock * block, OrderingVerifier & parent); 73 56 74 BDD & get(const PabloAST * const expr); 57 75 58 inline MultiplexingPass(const unsigned limit, const unsigned maxSelections, const unsigned windowSize)76 inline MultiplexingPass(const RNG::result_type seed, const unsigned limit, const unsigned maxSelections, const unsigned windowSize) 59 77 : mMultiplexingSetSizeLimit(limit) 60 78 , mMaxMultiplexingSetSelections(maxSelections) … … 62 80 , mTestConstrainedAdvances(true) 63 81 , mVariables(0) 82 , mRNG(seed) 64 83 , mConstraintGraph(0) 84 , mAdvance(0, nullptr) 65 85 , mAdvanceDepth(0, 0) 86 , mAdvanceNegatedVariable(0, 0) 66 87 { 67 88 … … 74 95 const bool mTestConstrainedAdvances; 75 96 unsigned mVariables; 97 RNG mRNG; 76 98 CharacterizationMap mCharacterization; 77 ConstraintGraph mConstraintGraph; 99 ConstraintGraph mConstraintGraph; 100 AdvanceVector mAdvance; 78 101 AdvanceDepth mAdvanceDepth; 102 AdvanceVariable mAdvanceNegatedVariable; 79 103 SubsetGraph mSubsetGraph; 80 AdvanceAttributes mAdvanceAttributes;104 CliqueGraph mUsageWeightingGraph; 81 105 MultiplexSetGraph mMultiplexSetGraph; 82 ScopeMap mResolvedScopes;83 106 }; 84 107 
icGREP/icgrepdevel/icgrep/pablo/optimizers/pablo_simplifier.cpp
r4886 r4890 141 141 if (isReassociative(stmt)) { 142 142 return fold(cast<Variadic>(stmt), block); 143 } else if (isa<Not>(stmt)) { 144 PabloAST * value = stmt>getOperand(0); 145 if (LLVM_UNLIKELY(isa<Not>(value))) { 146 return cast<Not>(value)>getOperand(0); 147 } else if (LLVM_UNLIKELY(isa<Zeroes>(value))) { 148 return block>createOnes(); 149 } else if (LLVM_UNLIKELY(isa<Ones>(value))) { 150 return block>createZeroes(); 151 } 152 } else if (isa<Advance>(stmt)) { 153 if (LLVM_UNLIKELY(isa<Zeroes>(stmt>getOperand(0)))) { 154 return block>createZeroes(); 155 } 143 156 } else { 144 157 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 145 158 if (LLVM_UNLIKELY(isa<Zeroes>(stmt>getOperand(i)))) { 146 159 switch (stmt>getClassTypeId()) { 147 case PabloAST::ClassTypeId::Advance:148 return block>createZeroes();149 case PabloAST::ClassTypeId::Not:150 return block>createOnes();151 160 case PabloAST::ClassTypeId::Sel: 152 161 block>setInsertPoint(stmt>getPrevNode()); … … 164 173 block>setInsertPoint(stmt>getPrevNode()); 165 174 switch (stmt>getClassTypeId()) { 166 case PabloAST::ClassTypeId::Not:167 return block>createZeroes();168 175 case PabloAST::ClassTypeId::Sel: 169 176 block>setInsertPoint(stmt>getPrevNode()); … … 186 193 } 187 194 } 188 } 189 return nullptr;190 }195 } 196 } 197 return nullptr; 191 198 } 192 199
Note: See TracChangeset
for help on using the changeset viewer.