Changeset 4937 for icGREP/icgrep-devel


Ignore:
Timestamp:
Feb 21, 2016, 1:05:57 PM (4 years ago)
Author:
nmedfort
Message:

Check in of misc changes prior to symbol table work.

Location:
icGREP/icgrep-devel/icgrep
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4922 r4937  
    140140IF(ENABLE_MULTIPLEXING)
    141141message(STATUS "Enabling Multiplexing")
    142 SET(BUDDY_ROOT "${CMAKE_SOURCE_DIR}/../buddy-2.4")
    143 file(GLOB BUDDY_SOURCES "${BUDDY_ROOT}/src/*.cpp")
     142SET(BUDDY_ROOT "${CMAKE_SOURCE_DIR}/../buddy-2.4/src")
     143SET(BUDDY_SOURCES ${BUDDY_ROOT}/bddop.cpp ${BUDDY_ROOT}/cache.cpp ${BUDDY_ROOT}/imatrix.cpp ${BUDDY_ROOT}/kernel.cpp)
     144SET(BUDDY_SOURCES ${BUDDY_SOURCES} ${BUDDY_ROOT}/prime.cpp ${BUDDY_ROOT}/pairs.cpp ${BUDDY_ROOT}/reorder.cpp ${BUDDY_ROOT}/tree.cpp)
    144145add_library(BUDDY ${BUDDY_SOURCES})
    145 include_directories(${BUDDY_ROOT}/src)
     146include_directories(${BUDDY_ROOT})
    146147target_link_libraries (PabloADT BUDDY)
    147148SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_MULTIPLEXING")
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.cpp

    r4900 r4937  
    2424void IDISA_Builder::genPrintRegister(std::string regName, Value * bitblockValue) {
    2525    if (mPrintRegisterFunction == nullptr) {
    26         mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(mMod->getContext()), Type::getInt8PtrTy(mMod->getContext()), mBitBlockType, NULL);
     26        mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(mMod->getContext()), Type::getInt8PtrTy(mMod->getContext()), mBitBlockType, nullptr);
    2727    }
    2828    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
  • icGREP/icgrep-devel/icgrep/do_grep.cpp

    r4904 r4937  
    250250    }
    251251    else {
    252         mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
     252        mFileBuffer = (char *) mmap(nullptr, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
    253253        if (mFileBuffer == MAP_FAILED) {
    254254            if (errno ==  ENOMEM) {
  • icGREP/icgrep-devel/icgrep/hrtime.h

    r4919 r4937  
    1616
    1717  // open proc/cpuinfo
    18   if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
     18  if ((fp = fopen("/proc/cpuinfo", "r")) == nullptr)
    1919    return -1;
    2020
    2121  // ignore all lines until we reach MHz information
    22   while (fgets(line, 1024, fp) != NULL) {
    23     if (strstr(line, search_str) != NULL) {
     22  while (fgets(line, 1024, fp) != nullptr) {
     23    if (strstr(line, search_str) != nullptr) {
    2424      // ignore all characters in line up to :
    2525      for (s = line; *s && (*s != ':'); ++s)
     
    3131  }
    3232
    33   if (fp != NULL)
     33  if (fp != nullptr)
    3434    fclose(fp);
    3535  return mhz;
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4922 r4937  
    410410pablo/optimizers/codemotionpass.cpp
    411411../buddy-2.4/src/bdd.h
    412 ../buddy-2.4/src/bddio.cpp
    413412../buddy-2.4/src/bddop.cpp
    414 ../buddy-2.4/src/bvec.cpp
    415 ../buddy-2.4/src/bvec.h
    416413../buddy-2.4/src/cache.cpp
    417414../buddy-2.4/src/cache.h
    418 ../buddy-2.4/src/fdd.cpp
    419 ../buddy-2.4/src/fdd.h
    420415../buddy-2.4/src/imatrix.cpp
    421416../buddy-2.4/src/imatrix.h
     
    426421../buddy-2.4/src/prime.h
    427422../buddy-2.4/src/reorder.cpp
    428 ../buddy-2.4/src/tree.cpp
    429423../buddy-2.4/src/bdd.h
    430424../buddy-2.4/src/bddio.c
    431425../buddy-2.4/src/bddop.c
    432426../buddy-2.4/src/bddtest.cxx
    433 ../buddy-2.4/src/bddtree.h
    434427../buddy-2.4/src/cache.h
    435428../buddy-2.4/src/cache.c
     
    447440../buddy-2.4/src/kernel.h
    448441../buddy-2.4/src/kernel.c
    449 ../buddy-2.4/src/config.h
    450442../buddy-2.4/src/bddtree.h
    451443UCD/EastAsianWidth.h
     
    504496IDISA/idisa_avx_builder.cpp
    505497IDISA/idisa_builder.h
     498../buddy-2.4/src/bddtree.h
     499../buddy-2.4/src/tree.cpp
  • icGREP/icgrep-devel/icgrep/include/simd-lib/bitblock_iterator.hpp

    r4033 r4937  
    245245
    246246protected:
    247         Scanner(): strm(NULL), pos(EOS), blk(-1), scan_blk(-1) {}
     247        Scanner(): strm(nullptr), pos(EOS), blk(-1), scan_blk(-1) {}
    248248        Scanner(const bitblock_t * s, uint32_t start_pos, uint32_t start_blk, scanblock_t start_scan_blk): strm(s), pos(start_pos), blk(start_blk), scan_blk(start_scan_blk) {}
    249249
     
    511511{
    512512public:
    513         BitStreamIterator():pos(EOS), blk(-1), blk_pos(-1), strm(NULL), scan_blk(-1), scan_blk_cnt(0)
     513        BitStreamIterator():pos(EOS), blk(-1), blk_pos(-1), strm(nullptr), scan_blk(-1), scan_blk_cnt(0)
    514514        {
    515515                // default constructor defines past-the-end of bit stream semantics, pos == EOS
  • icGREP/icgrep-devel/icgrep/include/simd-lib/builtins.hpp

    r3850 r4937  
    1313#include "config.hpp"
    1414
    15 static IDISA_ALWAYS_INLINE long likely(long x);
    16 static IDISA_ALWAYS_INLINE long unlikely(long x);
     15static IDISA_ALWAYS_INLINE long LIKELY(long x);
     16static IDISA_ALWAYS_INLINE long UNLIKELY(long x);
    1717
    1818#if defined (_MSC_VER)
     
    2626#elif defined (__GNUC__)
    2727
    28         IDISA_ALWAYS_INLINE long likely(long x) {
     28        IDISA_ALWAYS_INLINE long LIKELY(long x) {
    2929                return __builtin_expect(x, 1);
    3030        }
    3131
    32         IDISA_ALWAYS_INLINE long unlikely(long x) {
     32        IDISA_ALWAYS_INLINE long UNLIKELY(long x) {
    3333                return __builtin_expect(x, 0);
    3434        }
  • icGREP/icgrep-devel/icgrep/ispc.cpp

    r4889 r4937  
    440440
    441441Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genericAsSmth) :
    442     m_target(NULL),
    443     m_targetMachine(NULL),
    444     m_dataLayout(NULL),
     442    m_target(nullptr),
     443    m_targetMachine(nullptr),
     444    m_dataLayout(nullptr),
    445445    m_valid(false),
    446446    m_isa(SSE2),
     
    451451    m_attributes(""),
    452452#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
    453     m_tf_attributes(NULL),
     453    m_tf_attributes(nullptr),
    454454#endif
    455455    m_nativeVectorWidth(-1),
     
    482482    }
    483483
    484     if (isa == NULL) {
     484    if (isa == nullptr) {
    485485        // If a CPU was specified explicitly, try to pick the best
    486486        // possible ISA based on that.
     
    555555    }
    556556
    557     if (arch == NULL) {
     557    if (arch == nullptr) {
    558558#ifdef ISPC_ARM_ENABLED
    559559        if (!strncmp(isa, "neon", 4))
     
    589589        }
    590590    }
    591     if (this->m_target == NULL) {
     591    if (this->m_target == nullptr) {
    592592        fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
    593593        llvm::TargetRegistry::iterator iter;
     
    985985    if (CPUID == CPU_None) {
    986986#ifndef ISPC_ARM_ENABLED
    987         if (isa == NULL) {
     987        if (isa == nullptr) {
    988988#endif
    989989            std::string hostCPU = llvm::sys::getHostCPUName();
     
    10401040            m_target->createTargetMachine(triple, m_cpu, featuresString, options,
    10411041                    relocModel);
    1042         Assert(m_targetMachine != NULL);
     1042        Assert(m_targetMachine != nullptr);
    10431043
    10441044#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
     
    13031303    llvm::ArrayType *at =
    13041304        llvm::dyn_cast<llvm::ArrayType>(type);
    1305     if (at != NULL)
     1305    if (at != nullptr)
    13061306        return lGenericTypeLayoutIndeterminate(at->getElementType());
    13071307
    13081308    llvm::PointerType *pt =
    13091309        llvm::dyn_cast<llvm::PointerType>(type);
    1310     if (pt != NULL)
     1310    if (pt != nullptr)
    13111311        return false;
    13121312
    13131313    llvm::StructType *st =
    13141314        llvm::dyn_cast<llvm::StructType>(type);
    1315     if (st != NULL) {
     1315    if (st != nullptr) {
    13161316        for (int i = 0; i < (int)st->getNumElements(); ++i)
    13171317            if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
     
    13891389    llvm::StructType *structType =
    13901390        llvm::dyn_cast<llvm::StructType>(type);
    1391     if (structType == NULL || structType->isSized() == false) {
     1391    if (structType == nullptr || structType->isSized() == false) {
    13921392        Assert(m->errorCount > 0);
    1393         return NULL;
     1393        return nullptr;
    13941394    }
    13951395
    13961396    const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
    1397     Assert(sl != NULL);
     1397    Assert(sl != nullptr);
    13981398
    13991399    uint64_t offset = sl->getElementOffset(element);
     
    14651465    _getcwd(currentDirectory, sizeof(currentDirectory));
    14661466#else
    1467     if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
     1467    if (getcwd(currentDirectory, sizeof(currentDirectory)) == nullptr)
    14681468        FATAL("Current directory path too long!");
    14691469#endif
     
    14771477SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
    14781478    name = n;
    1479     if (name == NULL) {
    1480         if (m != NULL)
     1479    if (name == nullptr) {
     1480        if (m != nullptr)
    14811481            name = m->module->getModuleIdentifier().c_str();
    14821482        else
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r4936 r4937  
    100100    recordNum_input_parm->setName("lineNum");
    101101   
    102     Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_match", Type::getVoidTy(ctxt), T, T, T, NULL);
     102    Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_match", Type::getVoidTy(ctxt), T, T, T, nullptr);
    103103
    104104   
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4927 r4937  
    1616#include <unordered_set>
    1717#include <bdd.h>
     18#include <functional>
     19
     20#include <llvm/Support/CommandLine.h>
    1821
    1922using namespace llvm;
     
    2225using namespace boost::numeric::ublas;
    2326
    24 /// Interesting test case: ./icgrep '[\p{Lm}\p{Meetei_Mayek}]' -disable-if-hierarchy-strategy
     27/// Interesting test cases:
     28/// ./icgrep -c -multiplexing '[\p{Lm}\p{Meetei_Mayek}]' -disable-if-hierarchy-strategy
     29
     30/// ./icgrep -c -multiplexing '\p{Imperial_Aramaic}(?<!\p{Sm})' -disable-if-hierarchy-strategy
     31
     32
     33static cl::OptionCategory MultiplexingOptions("Multiplexing Optimization Options", "These options control the Pablo Multiplexing optimization pass.");
     34
     35#ifdef NDEBUG
     36#define INITIAL_SEED_VALUE (std::random_device()())
     37#else
     38#define INITIAL_SEED_VALUE (83234827342)
     39#endif
     40
     41static cl::opt<std::mt19937::result_type> Seed("multiplexing-seed", cl::init(INITIAL_SEED_VALUE),
     42                                        cl::desc("randomization seed used when performing any non-deterministic operations."),
     43                                        cl::cat(MultiplexingOptions));
     44
     45#undef INITIAL_SEED_VALUE
     46
     47static cl::opt<unsigned> SetLimit("multiplexing-set-limit", cl::init(std::numeric_limits<unsigned>::max()),
     48                                        cl::desc("maximum size of any candidate set."),
     49                                        cl::cat(MultiplexingOptions));
     50
     51static cl::opt<unsigned> SelectionLimit("multiplexing-selection-limit", cl::init(100),
     52                                        cl::desc("maximum number of selections from any partial candidate set."),
     53                                        cl::cat(MultiplexingOptions));
     54
     55static cl::opt<unsigned> WindowSize("multiplexing-window-size", cl::init(1),
     56                                        cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."),
     57                                        cl::cat(MultiplexingOptions));
     58
     59static cl::opt<unsigned> Samples("multiplexing-samples", cl::init(1),
     60                                 cl::desc("number of times the Advance constraint graph is sampled to find multiplexing opportunities."),
     61                                 cl::cat(MultiplexingOptions));
     62
     63
     64enum SelectionStrategy {Greedy, WorkingSet};
     65
     66static cl::opt<SelectionStrategy> Strategy(cl::desc("Choose set selection strategy:"),
     67                                             cl::values(
     68                                             clEnumVal(Greedy, "choose the largest multiplexing sets possible (w.r.t. the multiplexing-set-limit)."),
     69                                             clEnumVal(WorkingSet, "choose multiplexing sets that share common input values."),
     70                                             clEnumValEnd),
     71                                           cl::init(Greedy),
     72                                           cl::cat(MultiplexingOptions));
    2573
    2674// #define PRINT_DEBUG_OUTPUT
     
    111159using TypeId = PabloAST::ClassTypeId;
    112160
    113 bool MultiplexingPass::optimize(PabloFunction & function, const unsigned limit, const unsigned maxSelections, const unsigned windowSize, const bool independent) {
    114 
    115     std::random_device rd;
    116     const RNG::result_type seed = rd();
    117 //    const RNG::result_type seed = 83234827342;
    118 
    119     LOG("Seed:                    " << seed);
     161template<typename Graph>
     162static Graph construct(PabloBlock * const block);
     163
     164template<typename Graph, typename Map>
     165static Graph construct(PabloBlock * const block, Map & M);
     166
     167bool MultiplexingPass::optimize(PabloFunction & function, const bool independent) {
     168
     169    LOG("Seed:                    " << Seed);
    120170
    121171    #ifdef PRINT_TIMING_INFORMATION
    122     MultiplexingPass::SEED = seed;
     172    MultiplexingPass::SEED = Seed;
    123173    MultiplexingPass::NODES_ALLOCATED = 0;
    124174    MultiplexingPass::NODES_USED = 0;
    125175    #endif
    126176
    127     MultiplexingPass mp(seed, limit, maxSelections, windowSize);
     177    MultiplexingPass mp(Seed);
    128178    RECORD_TIMESTAMP(start_initialize);
    129179    const unsigned advances = mp.initialize(function, independent);
     
    169219
    170220        RECORD_TIMESTAMP(start_select_multiplex_sets);
    171         mp.selectMultiplexSets();
     221        if (Strategy == SelectionStrategy::Greedy) {
     222            mp.selectMultiplexSetsGreedy();
     223        } else if (Strategy == SelectionStrategy::WorkingSet) {
     224            mp.selectMultiplexSetsWorkingSet();
     225        }
    172226        RECORD_TIMESTAMP(end_select_multiplex_sets);
    173227        LOG("SelectMultiplexSets:      " << (end_select_multiplex_sets - start_select_multiplex_sets));
     
    256310    bdd_setcacheratio(64);
    257311    bdd_setmaxincrease(10000000);
    258     bdd_autoreorder(BDD_REORDER_SIFT);
     312    bdd_autoreorder(BDD_REORDER_SIFT); // BDD_REORDER_SIFT
    259313
    260314    // Map the constants and input variables
     
    364418    const PabloBlock * advanceScope[advances];
    365419    mAdvance.resize(advances, nullptr);
    366     mAdvanceDepth.resize(advances, 0);
     420    mAdvanceRank.resize(advances, 0);
    367421    mAdvanceNegatedVariable.reserve(advances);
    368422    for (Statement * stmt = entryBlock->front(); ; ) {
     
    380434                for (unsigned i = 0; i != k; ++i) {
    381435                    if (edge(i, k, mConstraintGraph).second || (advanceScope[i] != advanceScope[k])) {
    382                         depth = std::max<int>(depth, mAdvanceDepth[i]);
     436                        depth = std::max<int>(depth, mAdvanceRank[i]);
    383437                    }
    384438                }
    385                 mAdvanceDepth[k++] = ++depth;
     439                mAdvanceRank[k++] = ++depth;
    386440                maxDepth = std::max(maxDepth, depth);
    387441            }
     
    396450    assert (k == advances);
    397451
    398     LOG("Window Size / Max Depth: " << mWindowSize << " of " << maxDepth);
     452    LOG("Window Size / Max Depth: " << WindowSize << " of " << maxDepth);
    399453}
    400454
     
    409463        if (LLVM_UNLIKELY(isa<If>(stmt))) {
    410464            characterize(cast<If>(stmt)->getBody());
     465            for (const Assign * def : cast<If>(stmt)->getDefined()) {
     466                if (LLVM_LIKELY(escapes(def))) {
     467                    bdd_addref(get(def));
     468                }
     469            }
    411470        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    412471            characterize(cast<While>(stmt)->getBody());
    413472            for (const Next * var : cast<While>(stmt)->getVariants()) {
    414                 BDD & assign = get(var->getInitial());
    415                 assign = bdd_addref(bdd_or(assign, get(var)));
     473                if (LLVM_LIKELY(escapes(var))) {
     474                    BDD & initial = get(var->getInitial());
     475                    BDD & escaped = get(var);
     476                    initial = bdd_addref(bdd_or(initial, escaped));
     477                    escaped = initial;
     478                }
    416479            }
    417480        } else {
     
    548611    for (unsigned i = 0; i != k; ++i) {
    549612        if (unconstrained[i]) {
    550             // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results
    551             // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by
    552             // the conjunction of variable representing the k-th Advance and the negation of all variables for the Advances whose
    553             // inputs are mutually exclusive with the k-th input. For example, if the input of the i-th Advance is mutually exclusive
    554             // with the input of the j-th and k-th Advance, the BDD of the i-th Advance is Ai ∧ ¬Aj ∧ ¬Ak.
    555             const Advance * const ithAdv = mAdvance[i];
     613            // This algorithm deems two streams mutually exclusive if and only if the conjuntion of their BDDs is a contradiction.
     614            // To generate a contradiction when comparing Advances, the BDD of each Advance is represented by the conjunction of
     615            // variables representing the k-th Advance and the negation of all variables for the Advances whose inputs are mutually
     616            // exclusive with the k-th input.
     617
     618            // For example, if the input of the i-th Advance is mutually exclusive with the input of the j-th and k-th Advance, the
     619            // BDD of the i-th Advance is Ai ∧ ¬Aj ∧ ¬Ak. Similarly, the j- and k-th Advance is Aj ∧ ¬Ai and Ak ∧ ¬Ai, respectively
     620            // (assuming that the j-th and k-th Advance are not mutually exclusive.)
     621
    556622            const BDD Ni = mAdvanceNegatedVariable[i];
    557             BDD & Ai = get(ithAdv);
     623            BDD & Ai = get(mAdvance[i]);
    558624            Ai = bdd_addref(bdd_and(Ai, Nk));
    559625            Ak = bdd_addref(bdd_and(Ak, Ni));
    560             if (independent(i, k) && (adv->getParent() == ithAdv->getParent())) {
     626            if (independent(i, k) && (adv->getParent() == mAdvance[i]->getParent())) {
    561627                continue;
    562628            }
     
    564630        add_edge(i, k, mConstraintGraph);
    565631    }
    566     // To minimize the number of BDD computations, store the negated variable instead of negating it each time.
     632    // To minimize the number of BDD computations, we store the negated variable instead of negating it each time.
    567633    mAdvanceNegatedVariable.emplace_back(Nk);
    568634    return Ak;
     
    581647 ** ------------------------------------------------------------------------------------------------------------- */
    582648inline bool MultiplexingPass::exceedsWindowSize(const ConstraintVertex i, const ConstraintVertex j) const {
    583     assert (i < mAdvanceDepth.size() && j < mAdvanceDepth.size());
    584     return (std::abs<int>(mAdvanceDepth[i] - mAdvanceDepth[j]) > mWindowSize);
     649    assert (i < mAdvanceRank.size() && j < mAdvanceRank.size());
     650    return (std::abs<int>(mAdvanceRank[i] - mAdvanceRank[j]) > WindowSize);
    585651}
    586652
     
    644710
    645711/** ------------------------------------------------------------------------------------------------------------- *
    646  * @brief generateMultiplexSets
     712 * @brief generateCandidateSets
    647713 ** ------------------------------------------------------------------------------------------------------------- */
    648714bool MultiplexingPass::generateCandidateSets() {
    649715
    650     // What if we generated a "constraint free" graph instead? By taking each connected component of it
    651     // and computing the complement of it (with the same lesser to greater index ordering), we should
    652     // have the same problem here but decomposed into subgraphs.
    653 
    654     VertexVector S;
    655     std::vector<ConstraintGraph::degree_size_type> D(num_vertices(mConstraintGraph));
    656     S.reserve(15);
    657 
    658     mMultiplexSetGraph = MultiplexSetGraph(num_vertices(mConstraintGraph));
    659 
    660     // Push all source nodes into the (initial) independent set S
    661     for (auto v : make_iterator_range(vertices(mConstraintGraph))) {
    662         const auto d = in_degree(v, mConstraintGraph);
    663         D[v] = d;
    664         if (d == 0) {
    665             S.push_back(v);
    666         }
    667     }
    668 
    669     assert (S.size() > 0);
    670 
    671     auto remainingVerticies = num_vertices(mConstraintGraph) - S.size();
    672 
    673     do {
    674 
    675         addCandidateSet(S);
    676 
    677         bool noNewElements = true;
    678         do {
     716    Constraints S;
     717
     718    ConstraintGraph::degree_size_type D[num_vertices(mConstraintGraph)];
     719
     720    mCandidateGraph = CandidateGraph(num_vertices(mConstraintGraph));
     721
     722    for (unsigned iteration = 0; iteration < Samples; ++iteration) {
     723
     724        // Push all source nodes into the (initial) independent set S
     725        for (const auto v : make_iterator_range(vertices(mConstraintGraph))) {
     726            const auto d = in_degree(v, mConstraintGraph);
     727            D[v] = d;
     728            if (d == 0) {
     729                S.push_back(v);
     730            }
     731        }
     732
     733        auto remaining = num_vertices(mConstraintGraph) - S.size();
     734
     735        for (;;) {
    679736            assert (S.size() > 0);
    680             // Randomly choose a vertex in S and discard it.
    681             const auto i = S.begin() + IntDistribution(0, S.size() - 1)(mRNG);
    682             assert (i != S.end());
    683             const ConstraintVertex u = *i;
    684             S.erase(i);
    685 
    686             for (auto e : make_iterator_range(out_edges(u, mConstraintGraph))) {
    687                 const ConstraintVertex v = target(e, mConstraintGraph);
    688                 if ((--D[v]) == 0) {
    689                     S.push_back(v);
    690                     --remainingVerticies;
    691                     noNewElements = false;
    692                 }
    693             }
    694         }
    695         while (noNewElements && remainingVerticies);
    696     }
    697     while (remainingVerticies);
    698 
    699     return num_vertices(mMultiplexSetGraph) > num_vertices(mConstraintGraph);
     737            addCandidateSet(S);
     738            if (LLVM_UNLIKELY(remaining == 0)) {
     739                break;
     740            }
     741            for (;;) {
     742                assert (S.size() > 0);
     743                // Randomly choose a vertex in S and discard it.
     744                const auto i = S.begin() + IntDistribution(0, S.size() - 1)(mRNG);
     745                assert (i != S.end());
     746                const auto u = *i;
     747                S.erase(i);
     748                bool checkCandidate = false;
     749                for (auto e : make_iterator_range(out_edges(u, mConstraintGraph))) {
     750                    const auto v = target(e, mConstraintGraph);
     751                    assert ("Constraint set degree subtraction error!" && (D[v] != 0));
     752                    if ((--D[v]) == 0) {
     753                        assert ("Error v is already in S!" && std::count(S.begin(), S.end(), v) == 0);
     754                        S.push_back(v);
     755                        assert (remaining != 0);
     756                        --remaining;
     757                        if (LLVM_LIKELY(S.size() >= 3)) {
     758                            checkCandidate = true;
     759                        }
     760                    }
     761                }
     762                if (checkCandidate || LLVM_UNLIKELY(remaining == 0)) {
     763                    break;
     764                }
     765            }
     766        }
     767
     768        S.clear();
     769    }
     770
     771    return num_vertices(mCandidateGraph) > num_vertices(mConstraintGraph);
    700772}
    701773
     
    727799 * James McCaffrey's algorithm for "Generating the mth Lexicographical Element of a Mathematical Combination"
    728800 ** ------------------------------------------------------------------------------------------------------------- */
    729 void select(const unsigned n, const unsigned k, const unsigned m, unsigned * element) {
     801void MultiplexingPass::selectCandidateSet(const unsigned n, const unsigned k, const unsigned m, const Constraints & S, ConstraintVertex * const element) {
    730802    unsigned long a = n;
    731803    unsigned long b = k;
     
    736808        x = x - y;
    737809        b = b - 1;
    738         element[i] = (n - 1) - a;
    739     }
     810        element[i] = S[(n - 1) - a];
     811    }
     812}
     813
     814/** ------------------------------------------------------------------------------------------------------------- *
     815 * @brief updateCandidateSet
     816 ** ------------------------------------------------------------------------------------------------------------- */
     817void MultiplexingPass::updateCandidateSet(ConstraintVertex * const begin, ConstraintVertex * const end) {
     818
     819    using Vertex = CandidateGraph::vertex_descriptor;
     820
     821    const auto n = num_vertices(mConstraintGraph);
     822    const auto m = num_vertices(mCandidateGraph);
     823    const auto d = end - begin;
     824
     825    std::sort(begin, end);
     826
     827    Vertex u = 0;
     828
     829    for (Vertex i = n; i != m; ++i) {
     830
     831        if (LLVM_UNLIKELY(degree(i, mCandidateGraph) == 0)) {
     832            u = i;
     833            continue;
     834        }
     835
     836        const auto adj = adjacent_vertices(i, mCandidateGraph);
     837        if (degree(i, mCandidateGraph) < d) {
     838            // set_i can only be a subset of the new set
     839            if (LLVM_UNLIKELY(std::includes(begin, end, adj.first, adj.second))) {
     840                clear_vertex(i, mCandidateGraph);
     841                u = i;
     842            }
     843        } else if (LLVM_UNLIKELY(std::includes(adj.first, adj.second, begin, end))) {
     844            // the new set is a subset of set_i; discard it.
     845            return;
     846        }
     847
     848    }
     849
     850    if (LLVM_LIKELY(u == 0)) { // n must be at least 3 so u is 0 if and only if we're not reusing a set vertex.
     851        u = add_vertex(mCandidateGraph);
     852    }
     853
     854    for (ConstraintVertex * i = begin; i != end; ++i) {
     855        add_edge(u, *i, mCandidateGraph);
     856    }
     857
    740858}
    741859
     
    744862 * @param S an independent set
    745863 ** ------------------------------------------------------------------------------------------------------------- */
    746 inline void MultiplexingPass::addCandidateSet(const VertexVector & S) {
     864inline void MultiplexingPass::addCandidateSet(const Constraints & S) {
    747865    if (S.size() >= 3) {
    748         if (S.size() <= mMultiplexingSetSizeLimit) {
    749             const auto u = add_vertex(mMultiplexSetGraph);
    750             for (const auto v : S) {
    751                 add_edge(u, v, mMultiplexSetGraph);
    752             }
     866        const unsigned setLimit = SetLimit;
     867        if (S.size() <= setLimit) {
     868            ConstraintVertex E[S.size()];
     869            std::copy(S.cbegin(), S.cend(), E);
     870            updateCandidateSet(E, E + S.size());
    753871        } else {
    754             const auto max = choose(S.size(), mMultiplexingSetSizeLimit);
    755             unsigned element[mMultiplexingSetSizeLimit];
    756             if (LLVM_UNLIKELY(max <= mMaxMultiplexingSetSelections)) {
     872            assert (setLimit > 0);
     873            ConstraintVertex E[setLimit];
     874            const auto max = choose(S.size(), setLimit);
     875            if (LLVM_UNLIKELY(max <= SelectionLimit)) {
    757876                for (unsigned i = 0; i != max; ++i) {
    758                     select(S.size(), mMultiplexingSetSizeLimit, i, element);
    759                     const auto u = add_vertex(mMultiplexSetGraph);
    760                     for (unsigned j = 0; j != mMultiplexingSetSizeLimit; ++j) {
    761                         add_edge(u, S[element[j]], mMultiplexSetGraph);
    762                     }
     877                    selectCandidateSet(S.size(), setLimit, i, S, E);
     878                    updateCandidateSet(E, E + setLimit);
    763879                }
    764880            } else { // take m random samples
    765                 for (unsigned i = 0; i != mMaxMultiplexingSetSelections; ++i) {
    766                     select(S.size(), mMultiplexingSetSizeLimit, mRNG() % max, element);
    767                     const auto u = add_vertex(mMultiplexSetGraph);
    768                     for (unsigned j = 0; j != mMultiplexingSetSizeLimit; ++j) {
    769                         add_edge(u, S[element[j]], mMultiplexSetGraph);
    770                     }
     881                for (unsigned i = 0; i != SelectionLimit; ++i) {
     882                    selectCandidateSet(S.size(), setLimit, mRNG() % max, S, E);
     883                    updateCandidateSet(E, E + setLimit);
    771884                }
    772885            }
     
    783896
    784897/** ------------------------------------------------------------------------------------------------------------- *
    785  * @brief selectMultiplexSets
     898 * @brief selectMultiplexSetsGreedy
    786899 *
    787900 * This algorithm is simply computes a greedy set cover. We want an exact max-weight set cover but can generate new
     
    791904, |A| ≀ |B| < |C|, and C ⊂ (A ∪ B).
    792905 ** ------------------------------------------------------------------------------------------------------------- */
    793 void MultiplexingPass::selectMultiplexSets() {
    794 
    795     using SetIterator = graph_traits<MultiplexSetGraph>::in_edge_iterator;
    796     using ElementIterator = graph_traits<MultiplexSetGraph>::out_edge_iterator;
    797     using degree_t = MultiplexSetGraph::degree_size_type;
    798     using vertex_t = MultiplexSetGraph::vertex_descriptor;
     906void MultiplexingPass::selectMultiplexSetsGreedy() {
     907
     908    using AdjIterator = graph_traits<CandidateGraph>::adjacency_iterator;
     909    using degree_t = CandidateGraph::degree_size_type;
     910    using vertex_t = CandidateGraph::vertex_descriptor;
    799911
    800912    const size_t m = num_vertices(mConstraintGraph);
    801     const size_t n = num_vertices(mMultiplexSetGraph) - m;
     913    const size_t n = num_vertices(mCandidateGraph) - m;
    802914
    803915    degree_t remaining[n];
     
    805917
    806918    for (unsigned i = 0; i != n; ++i) {
    807         remaining[i] = out_degree(i + m, mMultiplexSetGraph);
     919        remaining[i] = degree(i + m, mCandidateGraph);
    808920    }
    809921    for (unsigned i = 0; i != m; ++i) {
     
    820932            if (r > 2) { // if this set has at least 3 elements.
    821933                r *= r;
    822                 ElementIterator begin, end;
    823                 std::tie(begin, end) = out_edges(i + m, mMultiplexSetGraph);
     934                AdjIterator begin, end;
     935                std::tie(begin, end) = adjacent_vertices(i + m, mCandidateGraph);
    824936                for (auto ei = begin; ei != end; ++ei) {
    825937                    for (auto ej = ei; ++ej != end; ) {
    826                         if (edge(target(*ei, mMultiplexSetGraph), target(*ej, mMultiplexSetGraph), mUsageGraph).second) {
     938                        if (edge(*ei, *ej, mUsageGraph).second) {
    827939                            ++r;
    828940                        }
     
    841953        }
    842954
    843         for (const auto ei : make_iterator_range(out_edges(k + m, mMultiplexSetGraph))) {
    844             const vertex_t j = target(ei, mMultiplexSetGraph);
    845             if (chosen_set[j] == 0) {
    846                 chosen_set[j] = (k + m);
    847                 for (const auto ej : make_iterator_range(in_edges(j, mMultiplexSetGraph))) {
    848                     remaining[source(ej, mMultiplexSetGraph) - m]--;
     955        for (const auto u : make_iterator_range(adjacent_vertices(k + m, mCandidateGraph))) {
     956            if (chosen_set[u] == 0) {
     957                chosen_set[u] = (k + m);
     958                for (const auto v : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) {
     959                    assert (v >= m);
     960                    remaining[v - m]--;
    849961                }
    850962            }
     
    861973                if (chosen_set[i] == (k + m)) {
    862974                    degree_t r = 1;
    863                     for (const auto ej : make_iterator_range(in_edges(i, mMultiplexSetGraph))) {
     975                    for (const auto u : make_iterator_range(adjacent_vertices(i, mCandidateGraph))) {
    864976                        // strongly prefer adding weight to unvisited sets that have more remaining vertices
    865                         r += std::pow(remaining[source(ej, mMultiplexSetGraph) - m], 2);
     977                        r += std::pow(remaining[u - m], 2);
    866978                    }
    867979                    if (w < r) {
     
    873985            assert (w > 0);
    874986            chosen_set[j] = 0;
    875             for (const auto ej : make_iterator_range(in_edges(j, mMultiplexSetGraph))) {
    876                 remaining[source(ej, mMultiplexSetGraph) - m]++;
    877             }
    878         }
     987            for (const auto u : make_iterator_range(adjacent_vertices(j, mCandidateGraph))) {
     988                assert (u >= m);
     989                remaining[u - m]++;
     990            }
     991        }
     992
     993        // If Samples > 1 then our candidate sets were generated by more than one traversal through the constraint graph.
     994        // Sets generated by differing traversals may generate a cycle in the AST if multiplex even when they are not
     995        // multiplexed together. For example,
     996
     997        // Assume we're multiplexing set {A,B,C} and {D,E,F} and that no constraint exists between any nodes in
     998        // either set. If A is dependent on D and E is dependent on B, multiplexing both sets would result in a cycle
     999        // in the AST. To fix this, we'd have to remove A, D, B or E.
     1000
     1001        // This cannot occur with only one traversal (or between sets generated by the same traversal) because of the
     1002        // DAG traversal strategy used in "generateCandidateSets".
     1003
     1004
    8791005    }
    8801006
    8811007    for (unsigned i = 0; i != m; ++i) {
    882         SetIterator ei, ei_end;
    883         std::tie(ei, ei_end) = in_edges(i, mMultiplexSetGraph);
     1008        AdjIterator ei, ei_end;
     1009        std::tie(ei, ei_end) = adjacent_vertices(i, mCandidateGraph);
    8841010        for (auto next = ei; ei != ei_end; ei = next) {
    8851011            ++next;
    886             if (source(*ei, mMultiplexSetGraph) != chosen_set[i]) {
    887                 remove_edge(*ei, mMultiplexSetGraph);
     1012            if (*ei != chosen_set[i]) {
     1013                remove_edge(i, *ei, mCandidateGraph);
    8881014            }
    8891015        }
     
    8921018    #ifndef NDEBUG
    8931019    for (unsigned i = 0; i != m; ++i) {
    894         assert (in_degree(i, mMultiplexSetGraph) <= 1);
     1020        assert (degree(i, mCandidateGraph) <= 1);
    8951021    }
    8961022    for (unsigned i = m; i != (m + n); ++i) {
    897         assert (out_degree(i, mMultiplexSetGraph) == 0 || out_degree(i, mMultiplexSetGraph) >= 3);
     1023        assert (degree(i, mCandidateGraph) == 0 || degree(i, mCandidateGraph) >= 3);
    8981024    }
    8991025    #endif
     
    9011027
    9021028/** ------------------------------------------------------------------------------------------------------------- *
     1029 * @brief selectMultiplexSetsWorkingSet
     1030 ** ------------------------------------------------------------------------------------------------------------- */
     1031void MultiplexingPass::selectMultiplexSetsWorkingSet() {
     1032
     1033    // The inputs to each Advance must be different; otherwise the SimplificationPass would consider all but
     1034    // one of the Advances redundant. However, if the input is short lived, we can ignore it in favour of its
     1035    // operands, which *may* be shared amongst more than one of the Advances (or may be short lived themselves,
     1036    // in which we can consider their operands instead.) Ideally, if we can keep the set of live values small,
     1037    // we may be able to reduce register pressure.
     1038
     1039//    using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, Statement *, unsigned>;
     1040//    using Map = flat_map<const Statement *, typename Graph::vertex_descriptor>;
     1041
     1042//    const size_t m = num_vertices(mConstraintGraph);
     1043//    const size_t n = num_vertices(mMultiplexSetGraph) - m;
     1044
     1045//    for (unsigned i = 0; i != n; ++i) {
     1046
     1047//        Map M;
     1048//        Graph G = construct<Graph>(block, M);
     1049
     1050
     1051
     1052
     1053
     1054
     1055
     1056//    }
     1057
     1058
     1059
     1060
     1061
     1062
     1063}
     1064
     1065/** ------------------------------------------------------------------------------------------------------------- *
    9031066 * @brief eliminateSubsetConstraints
    9041067 ** ------------------------------------------------------------------------------------------------------------- */
    9051068void MultiplexingPass::eliminateSubsetConstraints() {
     1069    using SubsetEdgeIterator = graph_traits<SubsetGraph>::edge_iterator;
    9061070    // If Ai ⊂ Aj then the subset graph will contain the arc (i, j). Remove all arcs corresponding to vertices
    9071071    // that are not elements of the same multiplexing set.
     
    9121076        const auto u = source(*ei, mSubsetGraph);
    9131077        const auto v = target(*ei, mSubsetGraph);
    914         if (in_degree(u, mMultiplexSetGraph) != 0 && in_degree(v, mMultiplexSetGraph) != 0) {
    915             assert (in_degree(u, mMultiplexSetGraph) == 1);
    916             const auto su = source(*(in_edges(u, mMultiplexSetGraph).first), mMultiplexSetGraph);
    917             assert (in_degree(v, mMultiplexSetGraph) == 1);
    918             const auto sv = source(*(in_edges(v, mMultiplexSetGraph).first), mMultiplexSetGraph);
     1078        if (degree(u, mCandidateGraph) != 0 && degree(v, mCandidateGraph) != 0) {
     1079            assert (degree(u, mCandidateGraph) == 1);
     1080            assert (degree(v, mCandidateGraph) == 1);
     1081            const auto su = *(adjacent_vertices(u, mCandidateGraph).first);
     1082            const auto sv = *(adjacent_vertices(v, mCandidateGraph).first);
    9191083            if (su == sv) {
    9201084                continue;
     
    9521116    flat_set<PabloBlock *> modified;
    9531117    const auto first_set = num_vertices(mConstraintGraph);
    954     const auto last_set = num_vertices(mMultiplexSetGraph);
     1118    const auto last_set = num_vertices(mCandidateGraph);
    9551119    for (auto idx = first_set; idx != last_set; ++idx) {
    956         const size_t n = out_degree(idx, mMultiplexSetGraph);
     1120        const size_t n = degree(idx, mCandidateGraph);
     1121        assert (n == 0 || n > 2);
    9571122        if (n) {
    9581123            const size_t m = log2_plus_one(n);
     
    10111176    }
    10121177    for (PabloBlock * block : modified) {
    1013         topologicalSort(block);
     1178        rewriteAST(block);
    10141179    }
    10151180}
     
    10181183 * @brief orderMultiplexSet
    10191184 ** ------------------------------------------------------------------------------------------------------------- */
    1020 inline MultiplexingPass::MultiplexVector MultiplexingPass::orderMultiplexSet(const MultiplexSetGraph::vertex_descriptor u) {
    1021     MultiplexVector set;
    1022     set.reserve(out_degree(u, mMultiplexSetGraph));
    1023     for (const auto e : make_iterator_range(out_edges(u, mMultiplexSetGraph))) {
    1024         set.push_back(target(e, mMultiplexSetGraph));
     1185inline MultiplexingPass::Candidates MultiplexingPass::orderMultiplexSet(const CandidateGraph::vertex_descriptor u) {
     1186    Candidates set;
     1187    set.reserve(degree(u, mCandidateGraph));
     1188    for (const auto e : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) {
     1189        set.push_back(e);
    10251190    }
    10261191    std::sort(set.begin(), set.end());
    1027     MultiplexVector clique;
    1028     MultiplexVector result;
    1029     result.reserve(out_degree(u, mMultiplexSetGraph));
     1192    Candidates clique;
     1193    Candidates result;
     1194    result.reserve(degree(u, mCandidateGraph));
    10301195    while (set.size() > 0) {
    10311196        const auto v = *set.begin();
     
    10571222}
    10581223
    1059 /** ------------------------------------------------------------------------------------------------------------- *
    1060  * @brief topologicalSort
    1061  ** ------------------------------------------------------------------------------------------------------------- */
    1062 void MultiplexingPass::topologicalSort(PabloBlock * const block) {
    1063 
    1064     using Vertex = OrderingGraph::vertex_descriptor;
    1065 
    1066     OrderingGraph G;
    1067     OrderingMap M;
    1068 
    1069     for (Statement * stmt : *block ) {
    1070         const auto u = add_vertex(G);
    1071         G[u] = stmt;
    1072         M.emplace(stmt, u);
    1073         if (LLVM_UNLIKELY(isa<If>(stmt))) {
    1074             for (Assign * def : cast<If>(stmt)->getDefined()) {
    1075                 M.emplace(def, u);
    1076             }
    1077         } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    1078             for (Next * var : cast<While>(stmt)->getVariants()) {
    1079                 M.emplace(var, u);
    1080             }
    1081         }
    1082     }
    1083 
    1084     Vertex u = 0;
    1085     for (Statement * stmt : *block ) {
    1086 
    1087         for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
    1088             PabloAST * const op = stmt->getOperand(i);
    1089             if (isa<Statement>(op)) {
    1090                 auto f = M.find(cast<Statement>(op));
    1091                 if (f != M.end()) {
    1092                     add_edge(f->second, u, G);
    1093                 }
    1094             }
    1095         }
    1096 
    1097         if (LLVM_UNLIKELY(isa<If>(stmt))) {
    1098             for (Assign * def : cast<If>(stmt)->getDefined()) {
    1099                 topologicalSort(u, block, def, G, M);
    1100             }
    1101         } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    1102             for (Next * var : cast<While>(stmt)->getVariants()) {
    1103                 topologicalSort(u, block, var, G, M);
    1104             }
    1105         } else {
    1106             topologicalSort(u, block, stmt, G, M);
    1107         }
    1108 
    1109         ++u;
    1110 
    1111     }
    1112 
    1113     circular_buffer<Vertex> Q(num_vertices(G));
    1114     topological_sort(G, std::back_inserter(Q));
    1115 
    1116     block->setInsertPoint(nullptr);
    1117     while (Q.size() > 0) {
    1118         const Vertex u = Q.back(); Q.pop_back();
    1119         block->insert(G[u]);
    1120     }
    1121 
    1122 }
    1123 
    1124 /** ------------------------------------------------------------------------------------------------------------- *
    1125  * @brief topologicalSort
    1126  ** ------------------------------------------------------------------------------------------------------------- */
    1127 void MultiplexingPass::topologicalSort(const OrderingGraph::vertex_descriptor u, const PabloBlock * const block, const Statement * const stmt, OrderingGraph & G, OrderingMap & M) {
    1128     for (const PabloAST * user : stmt->users()) {
    1129         if (LLVM_LIKELY(isa<Statement>(user))) {
    1130             const Statement * use = cast<Statement>(user);
    1131             auto f = M.find(use);
    1132             if (LLVM_UNLIKELY(f == M.end())) {
    1133                 const PabloBlock * parent = use->getParent();
    1134                 for (;;) {
    1135                     if (parent == block) {
     1224
     1225
     1226
     1227/** ------------------------------------------------------------------------------------------------------------- *
     1228 * @brief rewriteAST
     1229 *
     1230 * Multiplexing ignores def-use ordering when muxing and demuxing the Advances; this will likely lead to an illegal
     1231 * ordering but, by virtue of the multiplexing algorithm, some ordering of the IR must be legal. However, an
     1232 * arbritary topological ordering will likely lead to poor performance due to excessive register spills; this
     1233 * algorithm attempts to mitigate this by using a simple bottom-up ordering scheme.
     1234 ** ------------------------------------------------------------------------------------------------------------- */
     1235void MultiplexingPass::rewriteAST(PabloBlock * const block) {
     1236
     1237    using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, Statement *>;
     1238    using Vertex = Graph::vertex_descriptor;
     1239    using ReadySet = std::vector<Vertex>;
     1240    using TypeId = PabloAST::ClassTypeId;
     1241
     1242    Graph G = construct<Graph>(block);
     1243
     1244
     1245    std::vector<unsigned> rank(num_vertices(G), 0);
     1246
     1247    {
     1248        circular_buffer<Vertex> Q(num_vertices(G));
     1249        // Compute the rank of each statement
     1250        for (const Vertex u : make_iterator_range(vertices(G))) {
     1251            if (out_degree(u, G) == 0 && rank[u] == 0) {
     1252                Q.push_back(u);
     1253            }
     1254        }
     1255
     1256        while (Q.size() > 0) {
     1257
     1258            const Vertex u = Q.front();
     1259            Q.pop_front();
     1260
     1261            assert (rank[u] == 0);
     1262
     1263            unsigned work = 0;
     1264            switch (G[u]->getClassTypeId()) {
     1265                case TypeId::And:
     1266                case TypeId::Or:
     1267                case TypeId::Xor:
     1268                    work = 2;
     1269                    break;
     1270//                case TypeId::Not:
     1271                case TypeId::Assign:
     1272                case TypeId::Next:
     1273                    work = 1;
     1274                    break;
     1275                case TypeId::Sel:
     1276                    work = 6;
     1277                    break;
     1278                case TypeId::Advance:
     1279                case TypeId::ScanThru:
     1280                    work = 33;
     1281                    break;
     1282                case TypeId::MatchStar:
     1283                    work = 51;
     1284                    break;
     1285                case TypeId::If:
     1286                case TypeId::While:
     1287                case TypeId::Call:
     1288                    work = 10000; // <-- try to push If, While and Call nodes as high as possible
     1289                    break;
     1290                default: break;
     1291            }
     1292
     1293            unsigned r = 0;
     1294            for (const auto e : make_iterator_range(out_edges(u, G))) {
     1295                r = std::max(r, rank[target(e, G)]);
     1296            }
     1297
     1298            rank[u] = work + r;
     1299
     1300            for (const auto ei : make_iterator_range(in_edges(u, G))) {
     1301                const auto v = source(ei, G);
     1302                assert (rank[v] == 0);
     1303                bool ready = true;
     1304                for (const auto ej : make_iterator_range(out_edges(v, G))) {
     1305                    if (rank[target(ej, G)] == 0) {
     1306                        ready = false;
    11361307                        break;
    11371308                    }
    1138                     use = parent->getBranch();
    1139                     parent = parent->getParent();
    1140                     if (parent == nullptr) {
    1141                         return;
     1309                }
     1310                if (ready) {
     1311                    Q.push_back(v);
     1312                }
     1313            }
     1314
     1315        }
     1316    }
     1317
     1318    // Compute the initial ready set
     1319    ReadySet readySet;
     1320    for (const Vertex u : make_iterator_range(vertices(G))) {
     1321        if (in_degree(u, G) == 0) {
     1322            readySet.emplace_back(u);
     1323        }
     1324    }
     1325
     1326    auto by_nonincreasing_rank = [&rank](const Vertex u, const Vertex v) -> bool {
     1327        return rank[u] > rank[v];
     1328    };
     1329
     1330    std::sort(readySet.begin(), readySet.end(), by_nonincreasing_rank);
     1331
     1332    block->setInsertPoint(nullptr);
     1333    // Rewrite the AST using the bottom-up ordering
     1334    while (readySet.size() > 0) {
     1335        // Scan through the ready set to determine which one 'kills' the greatest number of inputs
     1336        double best = 0.0;
     1337        auto rk = readySet.begin();
     1338        for (auto ri = readySet.begin(); ri != readySet.end(); ++ri) {
     1339            double p = 0.0;
     1340            assert (rank[*ri] != 0);
     1341            for (auto ei : make_iterator_range(in_edges(*ri, G))) {
     1342                const auto v = source(ei, G);
     1343                unsigned unscheduled = 0;
     1344                for (auto ej : make_iterator_range(out_edges(v, G))) {
     1345                    if (rank[target(ej, G)] != 0) { // if this edge leads to an unscheduled statement
     1346                        ++unscheduled;
    11421347                    }
    11431348                }
    1144                 f = M.find(use);
    1145                 assert (f != M.end());
    1146                 M.emplace(use, f->second);
    1147             }
    1148             const auto v = f->second;
    1149             if (LLVM_UNLIKELY(u != v)) {
    1150                 add_edge(u, v, G);
    1151             }
    1152         }
    1153     }
     1349                assert (unscheduled > 0);
     1350                assert (unscheduled <= out_degree(v, G));
     1351                const double uses = out_degree(v, G);
     1352                p += std::pow((uses - (double)(unscheduled - 1)) / uses, 2);
     1353            }
     1354            if (p > best) {
     1355                rk = ri;
     1356                best = p;
     1357            }
     1358        }
     1359        const auto u = *rk;
     1360        readySet.erase(rk);
     1361        // Write the statement back to the AST ...
     1362        block->insert(G[u]);
     1363        // ... and mark it as written
     1364        rank[u] = 0;
     1365        // Now check whether any new statements are ready
     1366        for (auto ei : make_iterator_range(out_edges(u, G))) {
     1367            bool ready = true;
     1368            const auto v = target(ei, G);
     1369            for (auto ej : make_iterator_range(in_edges(v, G))) {
     1370                if (rank[source(ej, G)] != 0) {
     1371                    ready = false;
     1372                    break;
     1373                }
     1374            }
     1375            if (ready) {
     1376                assert (rank[v] != 0);
     1377                readySet.insert(std::lower_bound(readySet.begin(), readySet.end(), v, by_nonincreasing_rank), v);
     1378                assert (std::is_sorted(readySet.cbegin(), readySet.cend(), by_nonincreasing_rank));
     1379            }
     1380        }
     1381    }
     1382
    11541383}
    11551384
     
    12891518}
    12901519
     1520/** ------------------------------------------------------------------------------------------------------------- *
     1521 * @brief computeDAG
     1522 ** ------------------------------------------------------------------------------------------------------------- */
     1523template<typename Graph, typename Map>
     1524Graph construct(PabloBlock * const block, Map & M) {
     1525
     1526    using Vertex = typename Graph::vertex_descriptor;
     1527
     1528    const auto size = std::distance(block->begin(), block->end());
     1529
     1530    Graph G(size);
     1531    M.reserve(size);
     1532
     1533    Vertex u = 0;
     1534    for (Statement * stmt : *block ) {
     1535        G[u] = stmt;
     1536        M.emplace(stmt, u);
     1537        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     1538            for (Assign * def : cast<If>(stmt)->getDefined()) {
     1539                M.emplace(def, u);
     1540            }
     1541        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
     1542            for (Next * var : cast<While>(stmt)->getVariants()) {
     1543                M.emplace(var, u);
     1544            }
     1545        }
     1546        ++u;
     1547    }
     1548
     1549    /// The following is a lamda function that adds any users of "stmt" to the graph after resolving
     1550    /// which vertex it maps to w.r.t. the current block.
     1551    auto addUsers = [&](const Vertex u, const Statement * const stmt) -> void {
     1552        for (const PabloAST * user : stmt->users()) {
     1553            if (LLVM_LIKELY(isa<Statement>(user))) {
     1554                const Statement * use = cast<Statement>(user);
     1555                auto f = M.find(use);
     1556                if (LLVM_UNLIKELY(f == M.end())) {
     1557                    const PabloBlock * parent = use->getParent();
     1558                    for (;;) {
     1559                        if (parent == block) {
     1560                            break;
     1561                        }
     1562                        use = parent->getBranch();
     1563                        parent = parent->getParent();
     1564                        if (parent == nullptr) {
     1565                            return;
     1566                        }
     1567                    }
     1568                    f = M.find(use);
     1569                    assert (f != M.end());
     1570                    M.emplace(use, f->second);
     1571                }
     1572                const auto v = f->second;
     1573                if (LLVM_UNLIKELY(u != v)) {
     1574                    add_edge(u, v, G);
     1575                }
     1576            }
     1577        }
     1578    };
     1579
     1580    u = 0;
     1581    for (Statement * stmt : *block ) {
     1582
     1583        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     1584            PabloAST * const op = stmt->getOperand(i);
     1585            if (isa<Statement>(op)) {
     1586                auto f = M.find(cast<Statement>(op));
     1587                if (f != M.end()) {
     1588                    add_edge(f->second, u, G);
     1589                }
     1590            }
     1591        }
     1592
     1593        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     1594            for (Assign * def : cast<If>(stmt)->getDefined()) {
     1595                addUsers(u, def);
     1596            }
     1597        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
     1598            for (Next * var : cast<While>(stmt)->getVariants()) {
     1599                addUsers(u, var);
     1600            }
     1601        } else {
     1602            addUsers(u, stmt);
     1603        }
     1604
     1605        ++u;
     1606    }
     1607
     1608    #ifndef NDEBUG
     1609    std::vector<typename Graph::vertex_descriptor> nothing;
     1610    topological_sort(G, std::back_inserter(nothing));
     1611    #endif
     1612
     1613    return G;
     1614}
     1615
     1616
     1617/** ------------------------------------------------------------------------------------------------------------- *
     1618 * @brief computeDAG
     1619 ** ------------------------------------------------------------------------------------------------------------- */
     1620template<typename Graph>
     1621Graph construct(PabloBlock * const block) {
     1622    using Map = flat_map<const Statement *, typename Graph::vertex_descriptor>;
     1623    Map M;
     1624    return construct<Graph, Map>(block, M);
     1625}
     1626
    12911627} // end of namespace pablo
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp

    r4927 r4937  
    2424
    2525    using CharacterizationMap = llvm::DenseMap<const PabloAST *, BDD>;
     26
    2627    using ConstraintGraph = boost::adjacency_matrix<boost::directedS>;
    2728    using ConstraintVertex = ConstraintGraph::vertex_descriptor;
     29    using Constraints = std::vector<ConstraintVertex>;
     30
    2831    using RNG = std::mt19937;
    2932    using IntDistribution = std::uniform_int_distribution<RNG::result_type>;
    30     using MultiplexSetGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS>;
    31     using MultiplexVector = std::vector<MultiplexSetGraph::vertex_descriptor>;
     33
     34    using CandidateGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::undirectedS>;
     35    using Candidates = std::vector<CandidateGraph::vertex_descriptor>;
     36
    3237    using SubsetGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS>;
    33     using SubsetEdgeIterator = boost::graph_traits<SubsetGraph>::edge_iterator;
     38
    3439    using CliqueGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::undirectedS>;
    3540    using CliqueSet = boost::container::flat_set<CliqueGraph::vertex_descriptor>;
    3641    using CliqueSets = boost::container::flat_set<std::vector<CliqueGraph::vertex_descriptor>>;
    37     using OrderingGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, Statement *>;
    38     using OrderingMap = boost::container::flat_map<const Statement *, OrderingGraph::vertex_descriptor>;
    3942
    4043    using AdvanceVector = std::vector<Advance *>;
    4144    using AdvanceDepth = std::vector<int>;
    4245    using AdvanceVariable = std::vector<BDD>;
    43     using VertexVector = std::vector<ConstraintVertex>;
    4446
    4547public:
    4648
    47     static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 1, const bool independent = false);
     49    static bool optimize(PabloFunction & function, const bool independent = false);
    4850    #ifdef PRINT_TIMING_INFORMATION
    4951    using seed_t = RNG::result_type;
     
    6971
    7072    bool generateCandidateSets();
    71     void addCandidateSet(const VertexVector & S);
    72     void selectMultiplexSets();
     73    void addCandidateSet(const Constraints & S);
     74    void updateCandidateSet(ConstraintVertex * const begin, ConstraintVertex * const end);
     75    void selectCandidateSet(const unsigned n, const unsigned k, const unsigned m, const Constraints & S, ConstraintVertex * const element);
     76
     77    void selectMultiplexSetsGreedy();
     78    void selectMultiplexSetsWorkingSet();
    7379
    7480    void eliminateSubsetConstraints();
    7581    void doTransitiveReductionOfSubsetGraph();
    7682
    77     MultiplexVector orderMultiplexSet(const MultiplexSetGraph::vertex_descriptor u);
     83    Candidates orderMultiplexSet(const CandidateGraph::vertex_descriptor u);
    7884    void multiplexSelectedSets(PabloFunction & function);
    7985
    80     static void topologicalSort(PabloBlock * const block);
    81     static void topologicalSort(const OrderingGraph::vertex_descriptor u, const PabloBlock * const block, const Statement * const stmt, OrderingGraph & G, OrderingMap & M);
     86    static void rewriteAST(PabloBlock * const block);
    8287
    8388    BDD & get(const PabloAST * const expr);
    8489
    85     inline MultiplexingPass(const RNG::result_type seed, const unsigned limit, const unsigned maxSelections, const unsigned windowSize)
    86     : mMultiplexingSetSizeLimit(limit)
    87     , mMaxMultiplexingSetSelections(maxSelections)
    88     , mWindowSize(windowSize)
    89     , mTestConstrainedAdvances(true)
     90    inline MultiplexingPass(const RNG::result_type seed)
     91    : mTestConstrainedAdvances(true)
    9092    , mSubsetImplicationsAdhereToWindowingSizeConstraint(false)
    9193    , mVariables(0)
     
    9395    , mConstraintGraph(0)
    9496    , mAdvance(0, nullptr)
    95     , mAdvanceDepth(0, 0)
     97    , mAdvanceRank(0, 0)
    9698    , mAdvanceNegatedVariable(0, 0)
    9799    {
     
    100102
    101103private:
    102     const unsigned              mMultiplexingSetSizeLimit;
    103     const unsigned              mMaxMultiplexingSetSelections;
    104     const unsigned              mWindowSize;
    105104    const bool                  mTestConstrainedAdvances;
    106105    const bool                  mSubsetImplicationsAdhereToWindowingSizeConstraint;
     
    110109    ConstraintGraph             mConstraintGraph;   
    111110    AdvanceVector               mAdvance;
    112     AdvanceDepth                mAdvanceDepth;
     111    AdvanceDepth                mAdvanceRank;
    113112    AdvanceVariable             mAdvanceNegatedVariable;
    114113    SubsetGraph                 mSubsetGraph;
    115114    CliqueGraph                 mUsageGraph;
    116     MultiplexSetGraph           mMultiplexSetGraph;
     115    CandidateGraph           mCandidateGraph;
    117116};
    118117
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r4927 r4937  
    545545 * @brief strengthReduction
    546546 *
    547  * Find and replace any Pablo operations with the less expensive equivalent operations whenever possible.
     547 * Find and replace any Pablo operations with a less expensive equivalent operation whenever possible.
    548548 ** ------------------------------------------------------------------------------------------------------------- */
    549549void Simplifier::strengthReduction(PabloBlock * const block) {
     
    578578                    op->eraseFromParent(false);
    579579                }
    580             }
     580            } else if (isa<And>(scanThru->getOperand(0))) {
     581                // Suppose B is an arbitrary bitstream and A = Advance(B, 1). ScanThru(B ∧ ¬A, B) will leave a marker on the position
     582                // following the end of any run of 1-bits in B. But this is equivalent to computing A ∧ ¬B since A will have exactly
     583                // one 1-bit past the end of any run of 1-bits in B.
     584
     585
     586
     587
     588
     589            }
     590
     591
     592
     593
    581594        }
    582595        stmt = stmt->getNextNode();
     
    592605    PabloVerifier::verify(function, "post-eliminate-redundant-code");
    593606    #endif
     607    strengthReduction(function.getEntryBlock());
     608    #ifndef NDEBUG
     609    PabloVerifier::verify(function, "post-strength-reduction");
     610    #endif
    594611    deadCodeElimination(function.getEntryBlock());
    595612    #ifndef NDEBUG
    596613    PabloVerifier::verify(function, "post-dead-code-elimination");
    597614    #endif
    598     strengthReduction(function.getEntryBlock());
    599     #ifndef NDEBUG
    600     PabloVerifier::verify(function, "post-strength-reduction");
    601     #endif
    602615    return true;
    603616}
  • icGREP/icgrep-devel/icgrep/papi_helper.hpp

    r4919 r4937  
    7171PapiCounter<N>::~PapiCounter() {
    7272    // Call PAPI stop on destruction
    73     int rval = PAPI_stop(fEventSet, NULL);
     73    int rval = PAPI_stop(fEventSet, nullptr);
    7474    if (rval != PAPI_OK) {
    7575        throw std::runtime_error(" PAPI code: " + std::string(PAPI_strerror(rval)));
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4922 r4937  
    9696static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
    9797                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
    98                                         cl::cat(cPabloOptimizationsOptions));
    99 
    100 static cl::opt<unsigned> MultiplexingSetLimit("multiplexing-set-limit", cl::init(std::numeric_limits<unsigned>::max()),
    101                                         cl::desc("maximum size of any candidate multiplexing set."),
    102                                         cl::cat(cPabloOptimizationsOptions));
    103 
    104 static cl::opt<unsigned> MultiplexingSelectionLimit("multiplexing-selection-limit", cl::init(100),
    105                                         cl::desc("maximum number of selections from any partial candidate multiplexing set."),
    106                                         cl::cat(cPabloOptimizationsOptions));
    107 
    108 static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(1),
    109                                         cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."),
    11098                                        cl::cat(cPabloOptimizationsOptions));
    11199
     
    307295    if (EnableMultiplexing) {
    308296        READ_CYCLE_COUNTER(multiplexing_start);
    309         MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
     297        MultiplexingPass::optimize(*function);
    310298        READ_CYCLE_COUNTER(multiplexing_end);
    311299        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
Note: See TracChangeset for help on using the changeset viewer.