Ignore:
Timestamp:
Jan 21, 2016, 5:15:33 PM (4 years ago)
Author:
nmedfort
Message:

Work on lowering + some timing and papi information that will be cleaned up later.

Location:
icGREP/icgrep-devel/icgrep
Files:
3 added
24 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4908 r4919  
    1717option(USE_BOOST_MMAP "Using mmap from Boost.Iostreams")
    1818option(ENABLE_PREGENERATED_UCD_FUNCTIONS "Enable compiling the pregenerated UCD functions")
    19 
     19option(PRINT_TIMING_INFORMATION "Write compilation and execution timing information to standard error stream")
    2020
    2121# configure a header file to pass some of the CMake settings
     
    3737
    3838# We incorporate the CMake features provided by LLVM:
    39 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake")
     39set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${LLVM_ROOT}/share/llvm/cmake;${CMAKE_CURRENT_SOURCE_DIR}/cmake")
     40
    4041include(LLVMConfig)
    4142
     
    4748# Let's suppose we want to build a JIT compiler with support for
    4849# binary code (no interpreter):
    49 llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native IRReader)
     50llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native IRReader) # ipo
    5051
    5152message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
     
    124125    target_link_libraries(icgrep ${Boost_LIBRARIES})
    125126ENDIF()
     127IF (PRINT_TIMING_INFORMATION)
     128    find_package(PAPI REQUIRED)
     129    include_directories(${PAPI_INCLUDE_DIRS})
     130    target_link_libraries(icgrep ${PAPI_LIBRARIES})
     131ENDIF()
     132
    126133
    127134target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
     
    314321ENDIF()
    315322
     323IF (PRINT_TIMING_INFORMATION)   
     324    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPRINT_TIMING_INFORMATION")
     325ENDIF()
     326
    316327SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
    317328IF (${CMAKE_SYSTEM} MATCHES "Linux")
  • icGREP/icgrep-devel/icgrep/hrtime.h

    r3851 r4919  
    3636}
    3737
     38typedef uint64_t timestamp_t;
     39
    3840// get the number of CPU cycles since startup using rdtsc instruction
    39 inline unsigned long long get_hrcycles() {
    40   unsigned int tmp[2];
    41   asm ("rdtsc" : "=a" (tmp[1]), "=d" (tmp[0]));
    42   return (((unsigned long long)tmp[0] << 32 | tmp[1]));
     41static inline timestamp_t read_cycle_counter() {
     42#ifdef __GNUC__
     43timestamp_t ts;
     44#ifdef __x86_64__
     45  unsigned int eax, edx;
     46  asm volatile("rdtsc" : "=a" (eax), "=d" (edx));
     47  ts = ((timestamp_t) eax) | (((timestamp_t) edx) << 32);
     48#else
     49  asm volatile("rdtsc\n" : "=A" (ts));
     50#endif
     51  return(ts);
     52#endif
     53#ifdef _MSC_VER
     54  return __rdtsc();
     55#endif
    4356}
    4457
     
    4861  if (CPU_HZ == 0)
    4962    CPU_HZ = getMHZ() * 1000000;
    50   return (get_hrcycles() / CPU_HZ);
     63  return (read_cycle_counter() / CPU_HZ);
    5164}
    5265
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4896 r4919  
    493493pablo/optimizers/schedulingprepass.h
    494494pablo/optimizers/schedulingprepass.cpp
     495papi_helper.hpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r4876 r4919  
    1212../buddy-2.4/src
    1313pablo/passes
     14../llvm-build/lib
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4917 r4919  
    2525#include <llvm/Support/Host.h>
    2626#include <llvm/IR/Verifier.h>
    27 
    2827#include <kernels/s2p_gen.h>
    2928#include <kernels/scanmatchgen.h>
     
    3635#include <pablo/function.h>
    3736
    38 #include "do_grep.h"
     37#include <iostream>
     38
     39#include <do_grep.h>
     40#include <hrtime.h>
     41
     42#ifdef PRINT_TIMING_INFORMATION
     43#include "papi_helper.hpp"
     44#endif
    3945
    4046static cl::OptionCategory aRegexSourceOptions("Regular Expression Options",
     
    6167static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""), cl::cat(aRegexSourceOptions));
    6268
    63 
     69using namespace llvm;
    6470
    6571static unsigned firstInputFile = 1;  // Normal case when first positional arg is a regex.
     
    154160        re_ast = regular_expression_passes(encoding, re_ast);
    155161       
     162        #ifdef PRINT_TIMING_INFORMATION
     163        const timestamp_t regex_compilation_start = read_cycle_counter();
     164        #endif
    156165        pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
     166        #ifdef PRINT_TIMING_INFORMATION
     167        const timestamp_t regex_compilation_end = read_cycle_counter();
     168        std::cerr << "REGEX COMPILATION TIME: " << (regex_compilation_end - regex_compilation_start) << std::endl;
     169        #endif
    157170
    158171        pablo_function_passes(function);
    159        
    160172       
    161173        pablo::PabloCompiler pablo_compiler(M, idb);
     
    181193    llvm::Function * s2p_IR = M->getFunction("s2p_block");
    182194   
    183     llvm::Function * scanRoutine = M->getFunction("scan_matches_in_bitblock");
     195   // llvm::Function * scanRoutine = M->getFunction("scan_matches_in_bitblock");
    184196   
    185197    if (s2p_IR == nullptr) {
     
    200212    void * icgrep_MCptr = engine->getPointerToFunction(icgrep_IR);
    201213    void * s2p_MCptr = engine->getPointerToFunction(s2p_IR);
    202     void * scan_MCptr = engine->getPointerToFunction(scanRoutine);
     214    // void * scan_MCptr = engine->getPointerToFunction(scanRoutine);
    203215    if (s2p_MCptr == nullptr) {
    204216        std::cerr << "No s2p_MCptr!\n";
    205217        exit(1);
    206218    }
    207    
     219
    208220    if (icgrep_MCptr) {
    209221        GrepExecutor grepEngine(s2p_MCptr, icgrep_init_carry_ptr, icgrep_MCptr);
     
    215227            grepEngine.setShowFileNameOption();
    216228        }
     229        #ifdef PRINT_TIMING_INFORMATION
     230        papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
     231        #endif
    217232        for (unsigned i = firstInputFile; i != inputFiles.size(); ++i) {
     233            #ifdef PRINT_TIMING_INFORMATION
     234            papiCounters.start();
     235            const timestamp_t execution_start = read_cycle_counter();
     236            #endif
    218237            grepEngine.doGrep(inputFiles[i]);
     238            #ifdef PRINT_TIMING_INFORMATION
     239            const timestamp_t execution_end = read_cycle_counter();
     240            papiCounters.stop();
     241            std::cerr << "EXECUTION TIME: " << inputFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
     242            #endif
    219243        }
    220244    }
  • icGREP/icgrep-devel/icgrep/pablo/analysis/pabloverifier.cpp

    r4899 r4919  
    169169    PabloPrinter::print(user, str);
    170170    if (count == 0) {
    171         str << " is not recorded in ";
     171        str << " is not considered a user of ";
    172172    } else if (count > 0) {
    173         str << " is recorded" << count << " times in ";
     173        str << " was recorded too many times (" << count << ") in the user list of ";
    174174    }
    175175    PabloPrinter::print(def, str);
    176     str << "'s user list.";
     176    throw std::runtime_error(str.str());
     177}
     178
     179/** ------------------------------------------------------------------------------------------------------------- *
     180 * @brief throwReportedScopeError
     181 ** ------------------------------------------------------------------------------------------------------------- */
     182static void throwReportedScopeError(const Statement * const stmt) {
     183    std::string tmp;
     184    raw_string_ostream str(tmp);
     185    str << "PabloVerifier: structure error: ";
     186    PabloPrinter::print(stmt, str);
     187    str << " is not contained in its reported scope block";
     188    throw std::runtime_error(str.str());
     189}
     190
     191/** ------------------------------------------------------------------------------------------------------------- *
     192 * @brief throwMisreportedBranchError
     193 ** ------------------------------------------------------------------------------------------------------------- */
     194static void throwMisreportedBranchError(const Statement * const stmt, const Statement * const branch) {
     195    std::string tmp;
     196    raw_string_ostream str(tmp);
     197    str << "PabloVerifier: structure error: ";
     198    PabloPrinter::print(stmt, str);
     199    str << " branches into a scope block that reports ";
     200    PabloPrinter::print(stmt, str);
     201    str << " as its branching statement.";
     202    throw std::runtime_error(str.str());
     203}
     204
     205/** ------------------------------------------------------------------------------------------------------------- *
     206 * @brief throwReflexiveIfConditionError
     207 ** ------------------------------------------------------------------------------------------------------------- */
     208static void throwReflexiveIfConditionError(const PabloAST * const ifNode) {
     209    std::string tmp;
     210    raw_string_ostream str(tmp);
     211    str << "PabloVerifier: structure error: the condition of ";
     212    PabloPrinter::print(ifNode, str);
     213    str << " cannot be defined by the If node itself.";
    177214    throw std::runtime_error(str.str());
    178215}
     
    206243        if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
    207244            const PabloBlock * nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    208             if (LLVM_UNLIKELY(nested->getParent() != block)) {
    209                 std::string tmp;
    210                 raw_string_ostream str(tmp);
    211                 str << "PabloVerifier: structure error: body of ";
    212                 PabloPrinter::print(stmt, str);
    213                 str << " is not nested within the expected scope block";
    214                 throw std::runtime_error(str.str());
     245            if (LLVM_UNLIKELY(nested->getBranch() != stmt)) {
     246                throwMisreportedBranchError(stmt, nested->getBranch());
     247            } else if (LLVM_UNLIKELY(nested->getParent() != block)) {
     248                throwReportedScopeError(stmt);
    215249            }
    216250            if (isa<If>(stmt)) {
    217251                for (const Assign * def : cast<If>(stmt)->getDefined()) {
    218252                    if (LLVM_UNLIKELY(def == cast<If>(stmt)->getCondition())) {
    219                         std::string tmp;
    220                         raw_string_ostream str(tmp);
    221                         str << "PabloVerifier: structure error: the condition of ";
    222                         PabloPrinter::print(cast<PabloAST>(stmt), str);
    223                         str << " cannot be defined by the If node itself.";
    224                         throw std::runtime_error(str.str());
    225                     }
    226                 }
    227             }
    228             if (isa<If>(stmt)) {
    229                 for (const Assign * def : cast<If>(stmt)->getDefined()) {
    230                     if (LLVM_UNLIKELY(unreachable(def, nested))) {
     253                        throwReflexiveIfConditionError(stmt);
     254                    } else if (LLVM_UNLIKELY(unreachable(def, nested))) {
    231255                        throwUncontainedEscapedValueError(stmt, def);
    232256                    }
     
    250274                    }
    251275                    count = std::count(var->user_begin(), var->user_end(), stmt);
    252                     if (LLVM_UNLIKELY(count != 1)) {
     276                    if (LLVM_UNLIKELY(count != ((cast<While>(stmt)->getCondition() == var) ? 2 : 1))) {
    253277                        throwEscapedValueUsageError(stmt, var, var, stmt, count);
    254278                    }
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/booleanreassociationpass.cpp

    r4899 r4919  
    2222
    2323using TypeId = PabloAST::ClassTypeId;
    24 using Graph = BooleanReassociationPass::Graph;
    25 using Vertex = Graph::vertex_descriptor;
     24using DependencyGraph = BooleanReassociationPass::Graph;
     25using Vertex = DependencyGraph::vertex_descriptor;
    2626using VertexData = BooleanReassociationPass::VertexData;
    2727using SinkMap = BooleanReassociationPass::Map;
    2828using DistributionGraph = adjacency_list<hash_setS, vecS, bidirectionalS, Vertex>;
    29 using DistributionMap = flat_map<Graph::vertex_descriptor, DistributionGraph::vertex_descriptor>;
     29using DistributionMap = flat_map<DependencyGraph::vertex_descriptor, DistributionGraph::vertex_descriptor>;
    3030using VertexSet = std::vector<Vertex>;
    3131using VertexSets = std::vector<VertexSet>;
     
    3939 ** ------------------------------------------------------------------------------------------------------------- */
    4040template<typename Iterator>
    41 inline Graph::edge_descriptor first(const std::pair<Iterator, Iterator> & range) {
     41inline DependencyGraph::edge_descriptor first(const std::pair<Iterator, Iterator> & range) {
    4242    assert (range.first != range.second);
    4343    return *range.first;
     
    4545
    4646#ifndef NDEBUG
    47 static bool no_path(const Vertex u, const Vertex v, const Graph & G) {
     47static bool no_path(const Vertex u, const Vertex v, const DependencyGraph & G) {
    4848    if (u == v) return false;
    4949    flat_set<Vertex> V;
     
    7070#endif
    7171
    72 inline void add_edge(PabloAST * expr, const Vertex u, const Vertex v, Graph & G) {
     72inline void add_edge(PabloAST * expr, const Vertex u, const Vertex v, DependencyGraph & G) {
    7373    assert (no_path(v, u, G));
    7474    // Make sure each edge is unique
     
    821821 * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique.
    822822 ** ------------------------------------------------------------------------------------------------------------- */
    823 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const Graph & G, DistributionGraph & H) {
     823static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const DependencyGraph & G, DistributionGraph & H) {
    824824    for (auto ci = cliques.begin(); ci != cliques.end(); ) {
    825825        const auto cardinalityA = std::get<0>(*ci).size();
     
    844844 * @brief safeDistributionSets
    845845 ** ------------------------------------------------------------------------------------------------------------- */
    846 static DistributionSets safeDistributionSets(const Graph & G, DistributionGraph & H) {
     846static DistributionSets safeDistributionSets(const DependencyGraph & G, DistributionGraph & H) {
    847847
    848848    VertexSet sinks;
     
    868868 * @brief generateDistributionGraph
    869869 ** ------------------------------------------------------------------------------------------------------------- */
    870 void generateDistributionGraph(const Graph & G, DistributionGraph & H) {
     870void generateDistributionGraph(const DependencyGraph & G, DistributionGraph & H) {
    871871    DistributionMap M;
    872872    for (const Vertex u : make_iterator_range(vertices(G))) {
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.cpp

    r4896 r4919  
    1313 ** ------------------------------------------------------------------------------------------------------------- */
    1414bool CodeMotionPass::optimize(PabloFunction & function) {
    15     CodeMotionPass::process(function.getEntryBlock());
     15    CodeMotionPass::global(function.getEntryBlock());
    1616    #ifndef NDEBUG
    1717    PabloVerifier::verify(function, "post-code-motion");
     
    2323 * @brief process
    2424 ** ------------------------------------------------------------------------------------------------------------- */
    25 void CodeMotionPass::process(PabloBlock * const block) {
     25void CodeMotionPass::global(PabloBlock * const block) {
    2626    sink(block);
    2727    for (Statement * stmt : *block) {
    2828        if (isa<If>(stmt)) {
    29             process(cast<If>(stmt)->getBody());
     29            global(cast<If>(stmt)->getBody());
    3030        } else if (isa<While>(stmt)) {
    31             process(cast<While>(stmt)->getBody());
     31            global(cast<While>(stmt)->getBody());
    3232            // TODO: if we analyzed the probability of this loop being executed once, twice, or many times, we could
    3333            // determine whether hoisting will helpful or harmful to the expected run time.
     
    132132                // must be the LCA of our original scopes.
    133133                while (scope1 != scope2) {
     134                    assert (scope1 && scope2);
    134135                    scope1 = scope1->getParent();
    135136                    scope2 = scope2->getParent();
    136137                }
    137                 assert (scope1 && scope2);
     138                assert (scope1);
    138139                // But if the LCA is the current block, we can't sink the statement.
    139140                if (scope1 == block) {
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.h

    r4896 r4919  
    3131    static bool optimize(PabloFunction & function);
    3232protected:
    33     static void process(PabloBlock * const block);
     33    static void global(PabloBlock * const block);
    3434    static bool isAcceptableTarget(Statement *stmt, ScopeSet & scopeSet, const PabloBlock * const block);
    3535    static void sink(PabloBlock * const block);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/distributivepass.cpp

    r4899 r4919  
    1414namespace pablo {
    1515
    16 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>;
    17 using Vertex = Graph::vertex_descriptor;
     16using DependencyGraph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>;
     17using Vertex = DependencyGraph::vertex_descriptor;
    1818using SinkMap = flat_map<PabloAST *, Vertex>;
    1919using VertexSet = std::vector<Vertex>;
     
    2828 * @brief getVertex
    2929 ** ------------------------------------------------------------------------------------------------------------- */
    30 static inline Vertex getVertex(PabloAST * value, Graph & G, SinkMap & M) {
     30static inline Vertex getVertex(PabloAST * value, DependencyGraph & G, SinkMap & M) {
    3131    const auto f = M.find(value);
    3232    if (f != M.end()) {
     
    4343 * Generate a graph G describing the potential applications of the distributive law for the given block.
    4444 ** ------------------------------------------------------------------------------------------------------------- */
    45 VertexSet generateDistributionGraph(PabloBlock * block, Graph & G) {
     45VertexSet generateDistributionGraph(PabloBlock * block, DependencyGraph & G) {
    4646    SinkMap M;
    4747    VertexSet distSet;
     
    126126 ** ------------------------------------------------------------------------------------------------------------- */
    127127template <unsigned side>
    128 inline static BicliqueSet && independentCliqueSets(BicliqueSet && cliques, const unsigned minimum) {
     128inline static BicliqueSet && independentCliqueSets(BicliqueSet && bicliques, const unsigned minimum) {
    129129    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
    130130
    131     const auto l = cliques.size();
     131    const auto l = bicliques.size();
    132132    IndependentSetGraph I(l);
    133133
    134     // Initialize our weights
     134    // Initialize our weights and determine the constraints
    135135    for (unsigned i = 0; i != l; ++i) {
    136         I[i] = std::pow(std::get<side>(cliques[i]).size(), 2);
    137     }
    138 
    139     // Determine our constraints
    140     for (unsigned i = 0; i != l; ++i) {
     136        I[i] = std::pow(std::get<side>(bicliques[i]).size(), 2);
    141137        for (unsigned j = i + 1; j != l; ++j) {
    142             if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) {
     138            if (intersects(std::get<side>(bicliques[i]), std::get<side>(bicliques[j]))) {
    143139                add_edge(i, j, I);
    144140            }
    145141        }
    146142    }
     143
     144//    // Initialize our weights and determine the constraints
     145//    for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
     146//        I[std::distance(bicliques.begin(), i)] = std::pow(std::get<side>(*i).size(), 2);
     147//        for (auto j = i; ++j != bicliques.end(); ) {
     148//            if (intersects(i->second, j->second) && intersects(i->first, j->first)) {
     149//                add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
     150//            }
     151//        }
     152//    }
    147153
    148154    // Use the greedy algorithm to choose our independent set
     
    167173    // Sort the selected list and then remove the unselected cliques
    168174    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
    169     auto end = cliques.end();
     175    auto end = bicliques.end();
    170176    for (const unsigned offset : selected) {
    171         end = cliques.erase(cliques.begin() + offset + 1, end) - 1;
    172     }
    173     cliques.erase(cliques.begin(), end);
    174 
    175     return std::move(cliques);
     177        end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
     178    }
     179    bicliques.erase(bicliques.begin(), end);
     180
     181    return std::move(bicliques);
    176182}
    177183
     
    183189 * bipartition A and their adjacencies to be in B.
    184190  ** ------------------------------------------------------------------------------------------------------------- */
    185 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) {
     191static BicliqueSet enumerateBicliques(const DependencyGraph & G, const VertexSet & A) {
    186192    using IntersectionSets = std::set<VertexSet>;
    187193
     
    265271 * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique.
    266272 ** ------------------------------------------------------------------------------------------------------------- */
    267 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, Graph & G) {
     273static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, DependencyGraph & G) {
    268274    for (auto ci = cliques.begin(); ci != cliques.end(); ) {
    269275        const auto cardinalityA = std::get<0>(*ci).size();
     
    288294 * @brief safeDistributionSets
    289295 ** ------------------------------------------------------------------------------------------------------------- */
    290 static DistributionSets safeDistributionSets(Graph & G, const VertexSet & distSet) {
     296static DistributionSets safeDistributionSets(DependencyGraph & G, const VertexSet & distSet) {
    291297    DistributionSets T;
    292298    BicliqueSet lowerSet = independentCliqueSets<1>(removeUnhelpfulBicliques(enumerateBicliques(G, distSet), G), 1);
     
    309315        // FlattenAssociativeDFG::coalesce(block, false);
    310316
    311         Graph G;
     317        DependencyGraph G;
    312318
    313319        const VertexSet distSet = generateDistributionGraph(block, G);
     
    350356                outerOp->addOperand(G[u]);
    351357            }
    352             FlattenAssociativeDFG::coalesce(outerOp);
     358            CoalesceDFG::coalesce(outerOp);
    353359
    354360            for (const Vertex u : sources) {
     
    359365            }
    360366            innerOp->addOperand(outerOp);
    361             FlattenAssociativeDFG::coalesce(innerOp);
     367            CoalesceDFG::coalesce(innerOp);
    362368
    363369            for (const Vertex u : sinks) {
    364                 Variadic * const resultOp = cast<Variadic>(G[u]);
    365                 resultOp->addOperand(innerOp);
    366                 FlattenAssociativeDFG::coalesce(resultOp);
     370                cast<Variadic>(G[u])->addOperand(innerOp);
    367371            }
    368372        }
     
    391395    #endif
    392396    Simplifier::optimize(function);
    393 //    FlattenAssociativeDFG::deMorgansReduction(function.getEntryBlock());
    394 //    #ifndef NDEBUG
    395 //    PabloVerifier::verify(function, "post-demorgans-reduction");
    396 //    #endif
    397 //    Simplifier::optimize(function);
    398 }
    399 
    400 
    401 }
     397}
     398
     399
     400}
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4899 r4919  
    1111#include <pablo/analysis/pabloverifier.hpp>
    1212#include <pablo/optimizers/pablo_simplifier.hpp>
     13#include <pablo/builder.hpp>
    1314#include <stack>
    1415#include <queue>
     
    2324using namespace boost::numeric::ublas;
    2425
    25 #define PRINT_DEBUG_OUTPUT
     26// #define PRINT_DEBUG_OUTPUT
    2627
    2728#if !defined(NDEBUG) && !defined(PRINT_DEBUG_OUTPUT)
     
    102103namespace pablo {
    103104
     105#ifdef PRINT_TIMING_INFORMATION
     106MultiplexingPass::seed_t MultiplexingPass::SEED = 0;
     107unsigned MultiplexingPass::NODES_ALLOCATED = 0;
     108unsigned MultiplexingPass::NODES_USED = 0;
     109#endif
     110
    104111using TypeId = PabloAST::ClassTypeId;
    105112
    106113bool MultiplexingPass::optimize(PabloFunction & function, const unsigned limit, const unsigned maxSelections, const unsigned windowSize, const bool independent) {
    107114
    108 //    std::random_device rd;
    109 //    const auto seed = rd();
    110     const auto seed = 83234827342;
     115    std::random_device rd;
     116    const RNG::result_type seed = rd();
     117    // const RNG::result_type seed = 83234827342;
    111118
    112119    LOG("Seed:                    " << seed);
     120
     121    #ifdef PRINT_TIMING_INFORMATION
     122    MultiplexingPass::SEED = seed;
     123    MultiplexingPass::NODES_ALLOCATED = 0;
     124    MultiplexingPass::NODES_USED = 0;
     125    #endif
    113126
    114127    MultiplexingPass mp(seed, limit, maxSelections, windowSize);
     
    132145
    133146    LOG("Nodes in BDD:             " << bdd_getnodenum() << " of " << bdd_getallocnum());
     147
     148    #ifdef PRINT_TIMING_INFORMATION
     149    MultiplexingPass::NODES_ALLOCATED = bdd_getallocnum();
     150    MultiplexingPass::NODES_USED = bdd_getnodenum();
     151    #endif
    134152
    135153    RECORD_TIMESTAMP(start_shutdown);
     
    491509.
    492510                for (auto e : make_iterator_range(in_edges(i, mSubsetGraph))) {
    493                     unconstrained[source(e, mSubsetGraph)] = true;
     511                    const auto j = source(e, mSubsetGraph);
     512                    if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {
     513                        continue;
     514                    }
     515                    unconstrained[j] = true;
    494516                }
    495517                unconstrained[i] = true;
     
    503525                    const auto j = source(e, mSubsetGraph);
    504526                    add_edge(j, k, mSubsetGraph);
     527                    if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {
     528                        continue;
     529                    }
    505530                    unconstrained[j] = true;
    506531                }
     
    513538                    const auto j = target(e, mSubsetGraph);
    514539                    add_edge(k, j, mSubsetGraph);
     540                    if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {
     541                        continue;
     542                    }
    515543                    unconstrained[j] = true;
    516544                }
     
    527555            // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results
    528556            // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by
    529             // the conjunction of variable representing that Advance and the negation of all variables for the Advances in which
    530             // the inputs are deemed to be mutually exclusive. For example, if the input of the i-th Advance is mutually exclusive
     557            // the conjunction of variable representing the k-th Advance and the negation of all variables for the Advances whose
     558            // inputs are mutually exclusive with the k-th input. For example, if the input of the i-th Advance is mutually exclusive
    531559            // with the input of the j-th and k-th Advance, the BDD of the i-th Advance is Ai ∧ ¬Aj ∧ ¬Ak.
    532560            const Advance * const ithAdv = mAdvance[i];
     
    943971            Advance * const adv = input[0];
    944972            PabloBlock * const block = adv->getParent(); assert (block);
    945             block->setInsertPoint(nullptr);           
    946             /// Perform n-to-m Multiplexing
     973            block->setInsertPoint(nullptr);
     974            circular_buffer<PabloAST *> Q(n);
     975            PabloBuilder builder(block);
     976            /// Perform n-to-m Multiplexing           
    947977            for (size_t j = 0; j != m; ++j) {
    948978                std::ostringstream prefix;
    949979                prefix << "mux" << n << "to" << m << '.' << (j + 1);
    950                 Or * muxing = block->createOr(n);
     980//                Or * muxing = block->createOr(n);
     981//                for (size_t i = 0; i != n; ++i) {
     982//                    if (((i + 1) & (1UL << j)) != 0) {
     983//                        assert (input[i]->getParent() == block);
     984//                        muxing->addOperand(input[i]->getOperand(0));
     985//                    }
     986//                }
    951987                for (size_t i = 0; i != n; ++i) {
    952988                    if (((i + 1) & (1UL << j)) != 0) {
    953                         assert (input[i]->getParent() == block);
    954                         muxing->addOperand(input[i]->getOperand(0));
    955                     }
    956                 }
    957                 muxed[j] = block->createAdvance(muxing, adv->getOperand(1), prefix.str());
    958                 muxed_n[j] = block->createNot(muxed[j]);
    959             }
    960             /// Perform m-to-n Demultiplexing
     989                        Q.push_back(input[i]->getOperand(0));
     990                    }
     991                }
     992                while (Q.size() > 1) {
     993                    PabloAST * a = Q.front(); Q.pop_front();
     994                    PabloAST * b = Q.front(); Q.pop_front();
     995                    Q.push_back(builder.createOr(a, b));
     996                }
     997                PabloAST * muxing =  Q.front(); Q.clear();
     998                muxed[j] = builder.createAdvance(muxing, adv->getOperand(1), prefix.str());
     999                muxed_n[j] = builder.createNot(muxed[j]);
     1000            }
     1001            /// Perform m-to-n Demultiplexing           
    9611002            for (size_t i = 0; i != n; ++i) {
    962                 // Construct the demuxed values and replaces all the users of the original advances with them.               
    963                 And * demuxed = block->createAnd(m);
     1003                // Construct the demuxed values and replaces all the users of the original advances with them.
     1004                assert (Q.empty());
    9641005                for (size_t j = 0; j != m; ++j) {
    965                     demuxed->addOperand((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]);
    966                 }
     1006                    Q.push_back((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]);
     1007                }
     1008                while (Q.size() > 1) {
     1009                    PabloAST * a = Q.front(); Q.pop_front();
     1010                    PabloAST * b = Q.front(); Q.pop_front();
     1011                    Q.push_back(builder.createAnd(a, b));
     1012                }
     1013                PabloAST * demuxed =  Q.front(); Q.clear();
     1014//                And * demuxed = block->createAnd(m);
     1015//                for (size_t j = 0; j != m; ++j) {
     1016//                    demuxed->addOperand((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]);
     1017//                }
    9671018                input[i]->replaceWith(demuxed, true, true);
    9681019            }
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp

    r4896 r4919  
    4444
    4545    static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 1, const bool independent = false);
    46 
     46    #ifdef PRINT_TIMING_INFORMATION
     47    using seed_t = RNG::result_type;
     48    static seed_t SEED;
     49    static unsigned NODES_ALLOCATED;
     50    static unsigned NODES_USED;
     51    #endif
    4752protected:
    4853
     
    8186    , mWindowSize(windowSize)
    8287    , mTestConstrainedAdvances(true)
     88    , mSubsetImplicationsAdhereToWindowingSizeConstraint(false)
    8389    , mVariables(0)
    8490    , mRNG(seed)
     
    96102    const unsigned              mWindowSize;
    97103    const bool                  mTestConstrainedAdvances;
     104    const bool                  mSubsetImplicationsAdhereToWindowingSizeConstraint;
    98105    unsigned                    mVariables;
    99106    RNG                         mRNG;
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r4899 r4919  
    55#include <pablo/printer_pablos.h>
    66#include <pablo/analysis/pabloverifier.hpp>
    7 #ifdef USE_BOOST
    87#include <boost/container/flat_set.hpp>
    9 #else
    10 #include <unordered_set>
    11 #endif
    128
    139namespace pablo {
    1410
    15 #ifdef USE_BOOST
    1611template <typename Type>
    1712using SmallSet = boost::container::flat_set<Type>;
    18 #else
    19 template <typename Type>
    20 using SmallSet = std::unordered_set<Type>;
    21 #endif
    2213
    2314/** ------------------------------------------------------------------------------------------------------------- *
     
    2516 ** ------------------------------------------------------------------------------------------------------------- */
    2617bool Simplifier::optimize(PabloFunction & function) {
     18    // negationsShouldImmediatelySucceedTheirLiteral(function.getEntryBlock());
     19    #ifndef NDEBUG
     20    PabloVerifier::verify(function, "post-negation-must-immediately-succeed-their-literal");
     21    #endif
    2722    eliminateRedundantCode(function.getEntryBlock());
    2823    #ifndef NDEBUG
     
    476471}
    477472
    478 }
     473/** ------------------------------------------------------------------------------------------------------------- *
     474 * @brief negationsShouldImmediatelySucceedTheirLiteral
     475 *
     476 * Assuming that most negations are actually replaced by an ANDC operations or that we only need a literal or its
     477 * negation, by pushing all negations up to immediately succeed the literal we should generate equally efficient
     478 * code whilst simplifying analysis.
     479 *
     480 * TODO: this theory needs evaluation.
     481 ** ------------------------------------------------------------------------------------------------------------- */
     482void Simplifier::negationsShouldImmediatelySucceedTheirLiteral(PabloBlock * const block) {
     483    Statement * stmt = block->front();
     484    while (stmt) {
     485        Statement * next = stmt->getNextNode();
     486        if (isa<If>(stmt)) {
     487            negationsShouldImmediatelySucceedTheirLiteral(cast<If>(stmt)->getBody());
     488        } else if (isa<While>(stmt)) {
     489            negationsShouldImmediatelySucceedTheirLiteral(cast<While>(stmt)->getBody());
     490        } else if (isa<Not>(stmt)) {
     491            PabloAST * op = cast<Not>(stmt)->getExpr();
     492            if (LLVM_LIKELY(isa<Statement>(op))) {
     493                PabloBlock * scope = cast<Statement>(op)->getParent();
     494                // If the operand is not in a nested scope, we can move it.
     495                for (;;) {
     496                    if (LLVM_UNLIKELY(scope == nullptr)) {
     497                        stmt->insertAfter(cast<Statement>(op));
     498                        break;
     499                    }
     500                    scope = scope->getParent();
     501                    if (LLVM_UNLIKELY(scope == block)) {
     502                        break;
     503                    }
     504                }
     505            }
     506        }
     507        stmt = next;
     508    }
     509}
     510
     511}
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.hpp

    r4886 r4919  
    1515    Simplifier() = default;
    1616private:
     17    static void negationsShouldImmediatelySucceedTheirLiteral(PabloBlock * const block);
    1718    static void eliminateRedundantCode(PabloBlock * const block, ExpressionTable * predecessor = nullptr);
    1819    static PabloAST * fold(Variadic * const var, PabloBlock * const block);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/schedulingprepass.cpp

    r4899 r4919  
    33#include <boost/circular_buffer.hpp>
    44#include <boost/container/flat_set.hpp>
     5#include <boost/container/flat_map.hpp>
    56#include <pablo/analysis/pabloverifier.hpp>
     7#include <boost/graph/adjacency_list.hpp>
     8#include <unordered_map>
    69
    710#include <pablo/printer_pablos.h>
     
    1316namespace pablo {
    1417
     18using DependencyGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, std::vector<PabloAST *>>;
     19using Vertex = DependencyGraph::vertex_descriptor;
     20using Map = std::unordered_map<const PabloAST *, Vertex>;
     21using ReadyPair = std::pair<unsigned, Vertex>;
     22using ReadySet = std::vector<ReadyPair>;
     23
     24using weight_t = unsigned;
     25using TypeId = PabloAST::ClassTypeId;
     26using LiveSet = flat_set<Vertex>;
     27
     28void schedule(PabloBlock * const block);
     29
     30/** ------------------------------------------------------------------------------------------------------------- *
     31 * @brief printGraph
     32 ** ------------------------------------------------------------------------------------------------------------- */
     33template <typename DependencyGraph>
     34static void printGraph(const DependencyGraph & G, const std::vector<unsigned> & ordering, const std::string name, raw_ostream & out) {
     35    out << "*******************************************\n";
     36    out << "digraph " << name << " {\n";
     37    for (auto u : make_iterator_range(vertices(G))) {
     38        if (G[u].empty()) {
     39            continue;
     40        }
     41        out << "v" << u << " [label=\"" << ordering[u] << " : ";
     42        bool newLine = false;
     43        for (PabloAST * expr : G[u]) {
     44            if (newLine) {
     45                out << '\n';
     46            }
     47            newLine = true;
     48            if (isa<Statement>(expr)) {
     49                PabloPrinter::print(cast<Statement>(expr), out);
     50            } else {
     51                PabloPrinter::print(expr, out);
     52            }
     53        }
     54        out << "\"];\n";
     55    }
     56    for (auto e : make_iterator_range(edges(G))) {
     57        out << "v" << source(e, G) << " -> v" << target(e, G);
     58        out << ";\n";
     59    }
     60
     61    out << "}\n\n";
     62    out.flush();
     63}
     64
    1565/** ------------------------------------------------------------------------------------------------------------- *
    1666 * @brief resolveNestedUsages
    1767 ** ------------------------------------------------------------------------------------------------------------- */
    18 void SchedulingPrePass::resolveNestedUsages(Statement * const root, Statement * const stmt, Graph & G, Map & M, PabloBlock * const block) {
     68static void resolveNestedUsages(Statement * const root, Statement * const stmt, DependencyGraph & G, Map & M, PabloBlock * const block) {
    1969    for (PabloAST * use : stmt->users()) {
    2070        if (LLVM_LIKELY(isa<Statement>(use))) {
     
    2373                for (PabloBlock * prior = scope->getParent(); prior; scope = prior, prior = prior->getParent()) {
    2474                    if (prior == block) {
    25                         auto s = mResolvedScopes.find(scope);
    26                         assert (s != mResolvedScopes.end());
    27                         assert (s->second);
    28                         auto v = M.find(s->second);
     75                        assert (scope->getBranch());
     76                        auto v = M.find(scope->getBranch());
    2977                        assert (v != M.end());
    3078                        auto u = M.find(root);
     
    4189}
    4290
    43 ///** ------------------------------------------------------------------------------------------------------------- *
    44 // * @brief printGraph
    45 // ** ------------------------------------------------------------------------------------------------------------- */
    46 //template <typename Graph>
    47 //static void printGraph(const Graph & G, const std::vector<unsigned> & ordering, const std::string name, raw_ostream & out) {
    48 //    out << "*******************************************\n";
    49 //    out << "digraph " << name << " {\n";
    50 //    for (auto u : make_iterator_range(vertices(G))) {
    51 //        out << "v" << u << " [label=\"" << ordering[u] << " : ";
    52 //        PabloPrinter::print(G[u], out);
    53 //        out << "\"];\n";
    54 //    }
    55 //    for (auto e : make_iterator_range(edges(G))) {
    56 //        out << "v" << source(e, G) << " -> v" << target(e, G);
    57 //        out << ";\n";
    58 //    }
    59 
    60 //    out << "}\n\n";
    61 //    out.flush();
    62 //}
    63 
    64 /** ------------------------------------------------------------------------------------------------------------- *
    65  * @brief schedule
    66  ** ------------------------------------------------------------------------------------------------------------- */
    67 void SchedulingPrePass::schedule(PabloBlock * const block) {
    68     Graph G;
     91/** ------------------------------------------------------------------------------------------------------------- *
     92 * @brief resolveNestedUsages
     93 ** ------------------------------------------------------------------------------------------------------------- */
     94static void computeDependencyGraph(DependencyGraph & G, PabloBlock * const block) {
    6995    Map M;
    70 
    7196    // Construct a use-def graph G representing the current scope block
    7297    for (Statement * stmt : *block) {
    73         if (isa<If>(stmt) || isa<While>(stmt)) {
    74             PabloBlock * body = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    75             mResolvedScopes.emplace(body, stmt);
    76             schedule(body);
    77         }
    78         const auto u = add_vertex(stmt, G);
     98        if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     99            schedule(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
     100        }
     101        const auto u = add_vertex({stmt}, G);
    79102        M.insert(std::make_pair(stmt, u));
    80103        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     
    94117                        // Was this instruction computed by a nested block?
    95118                        if (prior == block) {
    96                             auto s = mResolvedScopes.find(scope);
    97                             assert (s != mResolvedScopes.end());
    98                             assert (s->second);
    99                             auto v = M.find(s->second);
     119                            assert (scope->getBranch());
     120                            auto v = M.find(scope->getBranch());
    100121                            assert (v != M.end());
    101122                            if (v->second != u) {
     
    109130        }
    110131    }
    111     // Do a second pass to ensure that we've accounted for any nested usage of a statement
     132    // Do a second pass to ensure that we've accounted for any nested usage of an If or While statement
    112133    for (Statement * stmt : *block) {
    113134        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     
    123144        }
    124145    }
    125 
     146    // Contract the graph
     147    for (;;) {
     148        bool done = true;
     149        for (const Vertex u : make_iterator_range(vertices(G))) {
     150            if (out_degree(u, G) == 1) {
     151                const Vertex v = target(*(out_edges(u, G).first), G);
     152                G[v].insert(G[v].begin(), G[u].begin(), G[u].end());
     153                for (auto e : make_iterator_range(in_edges(u, G))) {
     154                    add_edge(source(e, G), v, G);
     155                }
     156                G[u].clear();
     157                clear_vertex(u, G);
     158                done = false;
     159            }
     160        }
     161        if (done) {
     162            break;
     163        }
     164    }
     165
     166
     167}
     168
     169/** ------------------------------------------------------------------------------------------------------------- *
     170 * @brief computeGraphOrdering
     171 ** ------------------------------------------------------------------------------------------------------------- */
     172std::vector<weight_t> computeGraphOrdering(const DependencyGraph & G) {
    126173    // Determine the bottom-up ordering of G
    127     std::vector<unsigned> ordering(num_vertices(G));
     174    std::vector<weight_t> ordering(num_vertices(G));
    128175    circular_buffer<Vertex> Q(num_vertices(G));
    129176    for (const Vertex u : make_iterator_range(vertices(G))) {
    130         if (out_degree(u, G) == 0) {
     177        if (out_degree(u, G) == 0 && G[u].size() > 0) {
    131178            ordering[u] = 1;
    132179            Q.push_back(u);
    133180        }
    134181    }
    135 
    136182    while (!Q.empty()) {
    137183        const Vertex u = Q.front();
     
    141187            const Vertex v = source(ei, G);
    142188            if (ordering[v] == 0) {
    143                 unsigned weight = 0;
     189                weight_t weight = 0;
    144190                bool ready = true;
    145191                for (const auto ej : make_iterator_range(out_edges(v, G))) {
    146192                    const Vertex w = target(ej, G);
    147                     const unsigned t = ordering[w];
     193                    const weight_t t = ordering[w];
    148194                    if (t == 0) {
    149195                        ready = false;
     
    155201                    assert (weight < std::numeric_limits<unsigned>::max());
    156202                    assert (weight > 0);
    157                     ordering[v] = (weight + 1);
     203                    ordering[v] = weight + 1;
    158204                    Q.push_back(v);
    159205                }
     
    161207        }
    162208    }
     209    return ordering;
     210}
     211
     212/** ------------------------------------------------------------------------------------------------------------- *
     213 * @brief schedule
     214 ** ------------------------------------------------------------------------------------------------------------- */
     215void schedule(PabloBlock * const block) {
     216    DependencyGraph G;
     217    computeDependencyGraph(G, block);
     218    std::vector<weight_t> ordering = computeGraphOrdering(G);
     219
     220//    raw_os_ostream out(std::cerr);
     221//    printGraph(G, ordering, "G", out);
    163222
    164223    // Compute the initial ready set
    165224    ReadySet readySet;
    166225    for (const Vertex u : make_iterator_range(vertices(G))) {
    167         if (in_degree(u, G) == 0) {
     226        if (in_degree(u, G) == 0 && G[u].size() > 0) {
    168227            readySet.emplace_back(ordering[u], u);
    169228        }
     
    180239    block->setInsertPoint(nullptr);
    181240
    182 
    183     // Rewrite the AST using the bottom-up ordering
     241    // Rewrite the AST
    184242    while (!readySet.empty()) {
    185 
    186         // Scan through the ready set to determine which one 'kills' the greatest number of inputs
    187         // NOTE: the readySet is kept in sorted "distance to sink" order; thus those closest to a
    188         // sink will be evaluated first.
    189         double best = 0;
     243        DependencyGraph::degree_size_type killed = 0;
    190244        auto chosen = readySet.begin();
    191245        for (auto ri = readySet.begin(); ri != readySet.end(); ++ri) {
    192             const Vertex u = std::get<1>(*ri);
    193             const PabloAST * const expr = G[u];
    194             if (expr && (isa<Assign>(expr) || isa<Next>(expr))) {
     246            DependencyGraph::degree_size_type kills = 0;
     247            for (auto e : make_iterator_range(in_edges(ri->first, G))) {
     248                if (out_degree(source(e, G), G) == 1) {
     249                    ++kills;
     250                }
     251            }
     252            if (kills > killed) {
    195253                chosen = ri;
    196                 break;
    197             }
    198             double progress = 0;
    199             for (auto ei : make_iterator_range(in_edges(u, G))) {
    200                 const auto v = source(ei, G);
    201                 const auto totalUsesOfIthOperand = out_degree(v, G);
    202                 if (LLVM_UNLIKELY(totalUsesOfIthOperand == 0)) {
    203                     progress += 1.0;
    204                 } else {
    205                     unsigned unscheduledUsesOfIthOperand = 0;
    206                     for (auto ej : make_iterator_range(out_edges(v, G))) {
    207                         if (ordering[target(ej, G)]) { // if this edge leads to an unscheduled statement
    208                             ++unscheduledUsesOfIthOperand;
    209                         }
    210                     }
    211                     progress += std::pow((double)(totalUsesOfIthOperand - unscheduledUsesOfIthOperand + 1) / (double)(totalUsesOfIthOperand), 2);
    212                 }
    213             }
    214             if (progress > best) {
    215                 chosen = ri;
    216                 best = progress;
     254                killed = kills;
    217255            }
    218256        }
     
    221259        std::tie(weight, u) = *chosen;
    222260        readySet.erase(chosen);
     261        clear_in_edges(u, G);
    223262
    224263        assert ("Error: SchedulingPrePass is attempting to reschedule a statement!" && (weight > 0));
    225264
    226265        // insert the statement then mark it as written ...
    227         block->insert(cast<Statement>(G[u]));
     266        for (PabloAST * expr : G[u]) {
     267            block->insert(cast<Statement>(expr));
     268        }
     269
    228270        ordering[u] = 0;
    229271        // Now check whether any new instructions are ready
     
    245287        }
    246288    }
     289
    247290}
    248291
     
    250293 * @brief schedule
    251294 ** ------------------------------------------------------------------------------------------------------------- */
    252 void SchedulingPrePass::schedule(PabloFunction & function) {
    253     mResolvedScopes.emplace(function.getEntryBlock(), nullptr);
     295void schedule(PabloFunction & function) {
    254296    schedule(function.getEntryBlock());
    255297}
     
    259301 ** ------------------------------------------------------------------------------------------------------------- */
    260302bool SchedulingPrePass::optimize(PabloFunction & function) {
    261     SchedulingPrePass pp;
    262     pp.schedule(function);
     303    schedule(function);
    263304    #ifndef NDEBUG
    264305    PabloVerifier::verify(function, "post-scheduling-prepass");
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/schedulingprepass.h

    r4896 r4919  
    11#ifndef SCHEDULINGPREPASS_H
    22#define SCHEDULINGPREPASS_H
    3 
    4 #include <boost/container/flat_map.hpp>
    5 #include <boost/graph/adjacency_list.hpp>
    6 #include <unordered_map>
    73
    84namespace pablo {
    95
    106class PabloFunction;
    11 class PabloBlock;
    12 class Statement;
    13 class PabloAST;
    147
    158class SchedulingPrePass {
    169public:
    17     using Graph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, PabloAST *>;
    18     using Vertex = Graph::vertex_descriptor;
    19     using Map = std::unordered_map<const PabloAST *, Vertex>;
    20     using ScopeMap = boost::container::flat_map<const PabloBlock *, Statement *>;
    21     using ReadyPair = std::pair<unsigned, Vertex>;
    22     using ReadySet = std::vector<ReadyPair>;
    23 public:
    2410    static bool optimize(PabloFunction & function);
    2511protected:
    26     void schedule(PabloFunction & function);
    27     void schedule(PabloBlock * const block);
    28     void resolveNestedUsages(Statement * const root, Statement * const stmt, Graph & G, Map & M, PabloBlock * const block);
    2912    SchedulingPrePass() = default;
    30 private:
    31     ScopeMap mResolvedScopes;
    3213};
    33 
    3414
    3515}
    3616
    37 
    3817#endif // SCHEDULINGPREPASS_H
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4900 r4919  
    4747#include <llvm/ADT/Twine.h>
    4848#include <iostream>
     49#include <llvm/Support/raw_ostream.h>
     50#include <llvm/Support/FileSystem.h>
     51
     52//#include <llvm/PassManager.h>
     53//#include <llvm/Transforms/IPO/PassManagerBuilder.h>
     54
     55#include <hrtime.h>
    4956
    5057static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
    5158static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(eIRDumpOptions));
     59static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(eIRDumpOptions));
     60
    5261
    5362static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
     
    7685llvm::Function * PabloCompiler::compile(PabloFunction * function) {
    7786
     87    #ifdef PRINT_TIMING_INFORMATION
     88    const timestamp_t pablo_compilation_start = read_cycle_counter();
     89    #endif
    7890 
    7991    PabloBlock * const mainScope = function->getEntryBlock();
     
    126138   
    127139    // Clean up
    128     delete mCarryManager; mCarryManager = nullptr;
    129    
     140    delete mCarryManager;
     141    mCarryManager = nullptr;
     142   
     143    #ifdef PRINT_TIMING_INFORMATION
     144    const timestamp_t pablo_compilation_end = read_cycle_counter();
     145    std::cerr << "PABLO COMPILATION TIME: " << (pablo_compilation_end - pablo_compilation_start) << std::endl;
     146    #endif
     147
     148//    llvm::PassManager pm;
     149//    llvm::PassManagerBuilder pmb;
     150//    pmb.OptLevel = 3;
     151//    pmb.populateModulePassManager(pm);
     152//    pm.run(*mMod);
     153
    130154    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
    131         mMod->dump();
     155
     156        if (IROutputFilename.empty()) {
     157            mMod->dump();
     158        } else {
     159            std::error_code error;
     160            llvm::raw_fd_ostream out(IROutputFilename, error, sys::fs::OpenFlags::F_None);
     161            mMod->print(out, nullptr);
     162        }
    132163    }
    133164
  • icGREP/icgrep-devel/icgrep/pablo/passes/factorizedfg.cpp

    r4899 r4919  
    66#include <pablo/passes/flattenassociativedfg.h>
    77#include <boost/container/flat_set.hpp>
     8#include <boost/circular_buffer.hpp>
     9#include <boost/graph/adjacency_list.hpp>
     10#include <boost/graph/adjacency_matrix.hpp>
     11#include <boost/graph/topological_sort.hpp>
     12#include <queue>
     13#include <tuple>
     14
    815#include <set>
     16#include <type_traits>
     17
     18#include <pablo/printer_pablos.h>
     19#include <iostream>
    920
    1021using namespace boost;
    1122using namespace boost::container;
    1223
    13 
    1424namespace pablo {
    1525
    16 using VertexSet = std::vector<PabloAST *>;
    17 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]
    18 using BicliqueSet = std::vector<Biclique>;
    1926using TypeId = PabloAST::ClassTypeId;
    2027
    21 ///** ------------------------------------------------------------------------------------------------------------- *
    22 // * @brief lower
    23 // ** ------------------------------------------------------------------------------------------------------------- */
    24 //inline PabloAST * FactorizeDFG::lower(Variadic * const var, PabloBlock * block, OrderingMap & order) {
    25 //    PabloAST * result = nullptr;
    26 //    assert (var->getNumOperands() > 2);
    27 ////    // Sort the operands by their order in the current AST
    28 ////    std::sort(var->begin(), var->end(),
    29 ////        [&order](const PabloAST * const a, const PabloAST * const b)
    30 ////    {
    31 ////        return order.of(a) > order.of(b);
    32 ////    });
    33 ////    unsigned operands = var->getNumOperands();
    34 ////    // Swap the final two operands so that we can just append the temporary calculations onto the end
    35 ////    // and trust that the PENULTIMATE operand is the "latest" in the AST.
    36 ////    PabloAST * const item = var->getOperand(operands - 1);
    37 ////    var->setOperand(operands - 1, var->getOperand(operands - 2));
    38 ////    var->setOperand(operands - 2, item);
    39 //    // Finally rewrite all of the variadic statements as binary operations
    40 //    block->setInsertPoint(var->getPrevNode());
    41 //    while (var->getNumOperands() > 1) {
    42 //        PabloAST * const op2 = var->removeOperand(1);
    43 //        PabloAST * const op1 = var->removeOperand(0);
    44 //        assert (op1 != op2);
    45 //        if (isa<And>(var)) {
    46 //            result = block->createAnd(op1, op2);
    47 //        } else if (isa<Or>(var)) {
    48 //            result = block->createOr(op1, op2);
    49 //        } else { // if (isa<Xor>(var)) {
    50 //            result = block->createXor(op1, op2);
    51 //        }
    52 //        var->addOperand(result);
    53 //    }
    54 //    assert (result);
    55 //    return result;
    56 //}
    57 
    58 ///** ------------------------------------------------------------------------------------------------------------- *
    59 // * @brief lower
    60 // ** ------------------------------------------------------------------------------------------------------------- */
    61 //void FactorizeDFG::lower(PabloBlock * const block, OrderingMap & parent) {
    62 //    OrderingMap order(parent);
    63 //    Statement * stmt = block->front();
    64 //    while (stmt) {
    65 //        if (isa<If>(stmt) || isa<While>(stmt)) {
    66 //            lower(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), order);
    67 //            if (isa<If>(stmt)) {
    68 //                for (Assign * def : cast<If>(stmt)->getDefined()) {
    69 //                    order.enumerate(def);
    70 //                }
    71 //            } else { // if (isa<While>(stmt)) {
    72 //                for (Next * var : cast<While>(stmt)->getVariants()) {
    73 //                    order.enumerate(var);
    74 //                }
    75 //            }
    76 //        } else {
    77 //            if ((isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) && (stmt->getNumOperands() > 2)) {
    78 //                // We should maintain an ordering list and sort each Variadic according to it prior to writing them out.
    79 //                PabloAST * result = lower(cast<Variadic>(stmt), block, order);
    80 //                order.enumerate(result);
    81 //                stmt = stmt->replaceWith(result, true);
    82 //                continue;
    83 //            }
    84 //            order.enumerate(stmt);
    85 //        }
    86 //        stmt = stmt->getNextNode();
    87 //    }
    88 //}
    89 
    90 /** ------------------------------------------------------------------------------------------------------------- *
    91  * @brief finalize
    92  ** ------------------------------------------------------------------------------------------------------------- */
    93 inline Statement * FactorizeDFG::lower(Variadic * const var, PabloBlock * block) {
    94     PabloAST * result = nullptr;
     28// TODO: move the "tryToPartiallyExtractVariadic" from the coalescedfg class into here to run prior to lowering?
     29
     30/** ------------------------------------------------------------------------------------------------------------- *
     31 * @brief lower
     32 *
     33 * The goal of this function is to try and lower a variadic operation into binary form while attempting to mitigate
     34 * register pressure under the assumption that LLVM is not going to significantly change the IR (which was observed
     35 * when assessing the ASM output of LLVM 3.6.1 using O3.)
     36 ** ------------------------------------------------------------------------------------------------------------- */
     37inline Statement * FactorizeDFG::lower(Variadic * const var, PabloBlock * block) const {
     38
     39    using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *, int>;
     40    using Vertex = Graph::vertex_descriptor;
     41    using Map = flat_map<const PabloAST *, Vertex>;
     42    using SchedulingData = std::pair<unsigned, PabloAST *>;
     43    using SchedulingQueue = std::priority_queue<SchedulingData, std::vector<SchedulingData>, std::greater<SchedulingData>>;
     44
     45    const unsigned NOT_STEP = 1;
     46    const unsigned BOOLEAN_STEP = 10;
     47    const unsigned OTHER_STEP = 30;
     48    const unsigned DESIRED_GAP = 30;
     49
     50    assert (var->getParent() == block);
    9551    assert (var->getNumOperands() > 2);
    96     block->setInsertPoint(var->getPrevNode());
    97     while (var->getNumOperands() > 1) {
    98         PabloAST * const op2 = var->removeOperand(1);
    99         PabloAST * const op1 = var->removeOperand(0);
    100         assert (op1 != op2);
     52
     53    // Begin by computing a graph G depicting which operands are used by which statements. We know that
     54    // all of them are used by the Variadic but they might be used elsewhere in the current block.
     55
     56
     57    // If this operand was defined in this block, we want to indicate that in G as well so that we
     58    // don't mistakingly pair them together.
     59
     60    const unsigned operands = var->getNumOperands();
     61
     62    Graph G(operands + 1);
     63    Map M;
     64
     65    G[operands] = var;
     66    M.emplace(var, operands);
     67
     68    for (Vertex u = 0; u != operands; ++u) {
     69        PabloAST * const op = var->getOperand(u);
     70        G[u] = op;
     71        M.emplace(op, u);
     72        assert ("AST structural error!" && (op->getNumUses() > 0));
     73        for (PabloAST * user : op->users()) {
     74            if (LLVM_LIKELY(isa<Statement>(user))) {
     75                Statement * usage = cast<Statement>(user);
     76                PabloBlock * scope = usage->getParent();
     77                while (scope) {
     78                    if (scope == block) {
     79                        auto f = M.find(usage);
     80                        Vertex v = 0;
     81                        if (f == M.end()) {
     82                            v = add_vertex(usage, G);
     83                            M.emplace(usage, v);
     84                        } else {
     85                            v = f->second;
     86                        }
     87                        G[add_edge(u, v, G).first] = 0;
     88                        break;
     89                    }
     90                    usage = scope->getBranch();
     91                    scope = scope->getParent();
     92                }
     93            }
     94        }
     95    }
     96
     97    assert (M.count(var) == 1);
     98
     99    unsigned estQuantum = 0;
     100    circular_buffer<std::pair<unsigned, Vertex>> defs(operands);
     101    for (Statement * stmt : *block) {
     102        switch (stmt->getClassTypeId()) {
     103            case TypeId::And:
     104            case TypeId::Or:
     105            case TypeId::Xor:
     106                estQuantum += BOOLEAN_STEP;
     107                break;
     108            case TypeId::Not:
     109                estQuantum += NOT_STEP;
     110                break;
     111            default:
     112                estQuantum += OTHER_STEP;
     113        }
     114        auto f = M.find(stmt);
     115        if (LLVM_UNLIKELY(f != M.end())) {
     116            const auto u = f->second;
     117            if (LLVM_UNLIKELY(u == operands)) {
     118                assert (stmt == var);
     119                break;
     120            }
     121            for (auto e : make_iterator_range(in_edges(u, G)))   {
     122                G[e] = estQuantum;
     123            }
     124            if (u < operands) {
     125                defs.push_back(std::make_pair(estQuantum + DESIRED_GAP, u));
     126            }
     127        }
     128        // Annotate G to indicate when we expect a statement will be available
     129        while (defs.size() > 0) {
     130            unsigned availQuantum = 0;
     131            Vertex u = 0;
     132            std::tie(availQuantum, u) = defs.front();
     133            if (availQuantum > estQuantum) {
     134                break;
     135            }
     136            defs.pop_front();
     137            auto f = M.find(stmt);
     138            Vertex v = 0;
     139            if (f == M.end()) {
     140                v = add_vertex(stmt, G);
     141                M.emplace(stmt, v);
     142            } else {
     143                v = f->second;
     144            }
     145            G[add_edge(u, v, G).first] = estQuantum;
     146        }
     147    }
     148
     149    for (Vertex u = 0; u < operands; ++u) {
     150        unsigned quantum = 0;
     151        Vertex v = operands;
     152        for (auto e : make_iterator_range(out_edges(u, G))) {
     153            if (quantum < G[e]) {
     154                quantum = G[e];
     155                v = target(e, G);
     156            }
     157        }
     158        clear_out_edges(u, G);
     159        G[add_edge(u, v, G).first] = quantum;
     160    }
     161
     162    for (auto e : make_iterator_range(in_edges(operands, G)))   {
     163        G[e] = estQuantum;
     164    }
     165
     166    assert (num_edges(G) == var->getNumOperands());
     167
     168    SchedulingQueue Q;
     169    while (num_edges(G) > 0) {
     170
     171        Graph::edge_descriptor f;
     172        unsigned quantum = std::numeric_limits<unsigned>::max();
     173        for (auto e : make_iterator_range(edges(G))) {
     174            if (in_degree(source(e, G), G) == 0) {
     175                if (quantum > G[e]) {
     176                    quantum = G[e];
     177                    f = e;
     178                }
     179            }
     180        }
     181
     182        assert ("No edge selected!" && (quantum < std::numeric_limits<unsigned>::max()));
     183
     184        const auto u = source(f, G);
     185        assert (u < operands);
     186        const auto v = target(f, G);
     187        assert (isa<Statement>(G[v]));
     188        // Since this might have been a target of a prior pairing, read the original operand value instead of
     189        // G when checking which value is indicated by u.
     190        PabloAST * const op1 = var->getOperand(u);
     191        block->setInsertPoint(cast<Statement>(G[v]));
     192        remove_edge(f, G);
     193
     194        if (LLVM_LIKELY(Q.size() > 0)) {
     195            unsigned minQuantum = 0; PabloAST * op2 = nullptr;
     196            std::tie(minQuantum, op2) = Q.top();
     197            if (minQuantum < quantum) {
     198                Q.pop();
     199                PabloAST * result = nullptr;
     200                if (isa<And>(var)) {
     201                    result = block->createAnd(op1, op2);
     202                } else if (isa<Or>(var)) {
     203                    result = block->createOr(op1, op2);
     204                } else { // if (isa<Xor>(var)) {
     205                    result = block->createXor(op1, op2);
     206                }
     207                Q.emplace(quantum + DESIRED_GAP, result);
     208                G[v] = result; // update the insertion point node value
     209                continue;
     210            }
     211        }
     212        Q.emplace(quantum, op1);
     213    }
     214
     215    // If we've done our best to schedule the statements and have operands remaining in our queue, generate a
     216    // tree of the remaining operands.
     217    while (Q.size() > 1) {
     218        unsigned q1 = 0; PabloAST * op1 = nullptr;
     219        std::tie(q1, op1) = Q.top();
     220        Q.pop();
     221
     222        unsigned q2 = 0; PabloAST * op2 = nullptr;
     223        std::tie(q2, op2) = Q.top();
     224        Q.pop();
     225
     226        PabloAST * result = nullptr;
    101227        if (isa<And>(var)) {
    102228            result = block->createAnd(op1, op2);
     
    106232            result = block->createXor(op1, op2);
    107233        }
    108         var->addOperand(result);
    109     }
     234        Q.emplace(std::max(q1, q2) + DESIRED_GAP, result);
     235    }
     236
     237    assert (Q.size() == 1);
     238    return var->replaceWith(std::get<1>(Q.top()));
     239}
     240
     241#if 0
     242using EnumerationMap = flat_map<const PabloAST *, unsigned>;
     243
     244inline void enumerate(PabloBlock * const block, EnumerationMap & M, std::vector<Statement *> & S) {
     245    for (Statement * stmt : *block) {
     246        M.emplace(stmt, static_cast<int>(S.size()));
     247        S.push_back(stmt);
     248    }
     249}
     250
     251inline static unsigned indexOf(const PabloAST * const expr, const EnumerationMap & M) {
     252    assert (expr);
     253    const auto f = M.find(expr);
     254    assert (f != M.end());
     255    return f->second;
     256}
     257
     258/** ------------------------------------------------------------------------------------------------------------- *
     259 * @brief lower
     260 *
     261 * The goal of this function is to try and lower a variadic operation into binary form while attempting to mitigate
     262 * register pressure under the assumption that LLVM is not going to significantly change the IR (which was observed
     263 * when assessing the ASM output of LLVM 3.6.1 using O3.)
     264 ** ------------------------------------------------------------------------------------------------------------- */
     265inline Statement * FactorizeDFG::lower(Variadic * const var, PabloBlock * block) const {
     266
     267    assert (var->getNumOperands() > 2);
     268
     269    EnumerationMap M;
     270    std::vector<Statement *> S;
     271
     272    enumerate(block, M, S);
     273
     274    const int varIdx = indexOf(var, M);
     275
     276    circular_buffer<std::tuple<unsigned, unsigned, PabloAST *>> lowered(var->getNumOperands());
     277    for (unsigned i = 0; i != var->getNumOperands(); ++i) {
     278        PabloAST * const op = var->getOperand(i);
     279
     280        unsigned defIdx = 0;
     281        if (LLVM_LIKELY(isa<Statement>(op))) {
     282            Statement * producer = cast<Statement>(op);
     283            PabloBlock * scope = producer->getParent();
     284            while (scope && (scope != block)) {
     285                producer = scope->getBranch();
     286                scope = scope->getParent();
     287            }
     288            defIdx = producer ? indexOf(producer, M) : 0;
     289        }
     290
     291        unsigned useIdx = defIdx;
     292        for (PabloAST * user : op->users()) {
     293            // if this user is in the current scope or a nested scope of this block
     294            if ((cast<Statement>(user)->getParent() == block) && (user != var)) {
     295                useIdx = std::max(useIdx, indexOf(user, M));
     296            }
     297        }
     298        if (useIdx > varIdx) {
     299            useIdx = varIdx;
     300        }
     301        assert (!lowered.full());
     302        lowered.push_back(std::make_tuple(useIdx, defIdx, op));
     303    }
     304
     305    std::sort(lowered.begin(), lowered.end());
     306
     307    PabloAST * result = nullptr;
     308    while (lowered.size() > 1) {
     309
     310        PabloAST * op1, * op2;
     311        unsigned def1, def2;
     312        unsigned use1, use2;
     313
     314        std::tie(use1, def1, op1) = lowered.front(); lowered.pop_front();
     315        std::tie(use2, def2, op2) = lowered.front(); lowered.pop_front();
     316
     317//        if (((def1 < def2) ? ((def1 + 3) > def2) : ((def2 + 3) > def1)) && (lowered.size() > 0)) {
     318//            unsigned def3 = def1;
     319//            unsigned use3 = use1;
     320//            PabloAST * op3 = op1;
     321//            std::tie(use1, def1, op1) = lowered.front(); lowered.pop_front();
     322//            lowered.push_front(std::make_tuple(use3, def3, op3));
     323//        }
     324
     325        // Is the last use of op1 prior to the definition of op2?
     326        if (use1 < def2) {
     327            use1 = def2;
     328        }
     329
     330        block->setInsertPoint(S[use1]);
     331
     332        if (isa<And>(var)) {
     333            result = block->createAnd(op1, op2);
     334        } else if (isa<Or>(var)) {
     335            result = block->createOr(op1, op2);
     336        } else { // if (isa<Xor>(var)) {
     337            result = block->createXor(op1, op2);
     338        }
     339
     340        S[use1] = cast<Statement>(result);
     341
     342        assert (!lowered.full());
     343        lowered.push_front(std::make_tuple(use1, use1, result));
     344    }
     345    assert (result);
     346
    110347    return var->replaceWith(result, true);
    111348}
    112 
    113 /** ------------------------------------------------------------------------------------------------------------- *
    114  * @brief finalize
    115  ** ------------------------------------------------------------------------------------------------------------- */
    116 void FactorizeDFG::lower(PabloBlock * const block) {
     349#endif
     350
     351/** ------------------------------------------------------------------------------------------------------------- *
     352 * @brief lower
     353 ** ------------------------------------------------------------------------------------------------------------- */
     354void FactorizeDFG::lower(PabloBlock * const block) const {
    117355    Statement * stmt = block->front();
    118356    while (stmt) {
    119357        if (isa<If>(stmt) || isa<While>(stmt)) {
    120358            lower(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
    121         } else if ((isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) && (stmt->getNumOperands() > 2)) {
    122             // We should maintain an ordering list and sort each Variadic according to it prior to writing them out.
     359        } else if ((stmt->getNumOperands() > 2) && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) {
    123360            stmt = lower(cast<Variadic>(stmt), block);
    124361            continue;
     
    128365}
    129366
    130 ///** ------------------------------------------------------------------------------------------------------------- *
    131 // * @brief lower
    132 // ** ------------------------------------------------------------------------------------------------------------- */
    133 //inline void FactorizeDFG::lower(PabloFunction & function) {
    134 //    OrderingMap ordering;
    135 //    for (unsigned i = 0; i != function.getNumOfParameters(); ++i) {
    136 //        ordering.enumerate(function.getParameter(i));
    137 //    }
    138 //    lower(function.getEntryBlock(), ordering);
    139 //}
     367/** ------------------------------------------------------------------------------------------------------------- *
     368 * @brief lower
     369 ** ------------------------------------------------------------------------------------------------------------- */
     370inline void FactorizeDFG::lower(PabloFunction & function) const {
     371    lower(function.getEntryBlock());
     372}
     373
     374#if 0
     375
     376using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, PabloAST *>;
     377using Vertex = Graph::vertex_descriptor;
     378using Map = flat_map<const PabloAST *, Vertex>;
     379
     380using VertexSet = std::vector<PabloAST *>;
     381using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]
     382using BicliqueSet = std::set<Biclique>;
     383using TypeId = PabloAST::ClassTypeId;
    140384
    141385/** ------------------------------------------------------------------------------------------------------------- *
     
    145389 * bicliques" by Alexe et. al. (2003).
    146390 ** ------------------------------------------------------------------------------------------------------------- */
    147 static BicliqueSet enumerateBicliques(Variadic * const var) {
    148     using IntersectionSets = std::set<VertexSet>;
     391inline void FactorizeDFG::enumerateBicliques(Variadic * const var, BicliqueSet & bicliques) {
     392    using IntersectionSets = flat_set<VertexSet>;
    149393
    150394    IntersectionSets B1;
     
    198442    }
    199443
    200     BicliqueSet bicliques;
    201     unsigned userSize = 2;
    202444    for (auto Bi = B.begin(); Bi != B.end(); ++Bi) {
    203         if (userSize <= Bi->size()) {
    204             VertexSet Ai(var->begin(), var->end());
    205             for (const PabloAST * user : *Bi) {
    206                 VertexSet Aj(cast<Variadic>(user)->begin(), cast<Variadic>(user)->end());
    207                 std::sort(Aj.begin(), Aj.end());
    208                 VertexSet Ak;
    209                 Ak.reserve(std::min<unsigned>(Ai.size(), Aj.size()));
    210                 std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
    211                 Ai.swap(Ak);
    212             }
    213             if (Ai.size() > 1) {
    214                 if (userSize < Bi->size()) {
    215                     bicliques.clear();
    216                     userSize = Bi->size();
    217                 }
    218                 bicliques.emplace_back(std::move(Ai), std::move(*Bi));
    219             }
    220         }
    221     }
    222     return bicliques;
    223 }
    224 
    225 /** ------------------------------------------------------------------------------------------------------------- *
    226  * @brief pick
    227  ** ------------------------------------------------------------------------------------------------------------- */
    228 inline static Biclique pick(BicliqueSet && bicliques) {
    229     unsigned best_w = 0;
    230     auto best_c = bicliques.begin();
    231     for (auto c = bicliques.begin(); c != bicliques.end(); ++c) {
    232         const unsigned w = std::get<0>(*c).size();
    233         if (best_w < w) {
    234             best_c = c;
    235             best_w = w;
    236         }
    237     }
    238     return std::move(*best_c);
    239 }
    240 
    241 /** ------------------------------------------------------------------------------------------------------------- *
    242  * @brief chooseInsertionPoint
    243  ** ------------------------------------------------------------------------------------------------------------- */
    244 inline PabloBlock * FactorizeDFG::chooseInsertionScope(const ASTVector & users) {
     445        VertexSet Ai(var->begin(), var->end());
     446        for (const PabloAST * user : *Bi) {
     447            VertexSet Aj(cast<Variadic>(user)->begin(), cast<Variadic>(user)->end());
     448            std::sort(Aj.begin(), Aj.end());
     449            VertexSet Ak;
     450            Ak.reserve(std::min<unsigned>(Ai.size(), Aj.size()));
     451            std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
     452            Ai.swap(Ak);
     453        }
     454        if (Ai.size() > 1 && Bi->size() > 1) {
     455            bicliques.emplace(std::move(Ai), std::move(*Bi));
     456        }
     457    }
     458}
     459
     460/** ------------------------------------------------------------------------------------------------------------- *
     461 * @brief intersects
     462 ** ------------------------------------------------------------------------------------------------------------- */
     463template <class Type>
     464inline bool intersects(const Type & A, const Type & B) {
     465    auto first1 = A.begin(), last1 = A.end();
     466    auto first2 = B.begin(), last2 = B.end();
     467    while (first1 != last1 && first2 != last2) {
     468        if (*first1 < *first2) {
     469            ++first1;
     470        } else if (*first2 < *first1) {
     471            ++first2;
     472        } else {
     473            return true;
     474        }
     475    }
     476    return false;
     477}
     478
     479/** ------------------------------------------------------------------------------------------------------------- *
     480 * @brief independentCliqueSets
     481 ** ------------------------------------------------------------------------------------------------------------- */
     482inline void FactorizeDFG::independentCliqueSets(BicliqueSet & bicliques) {
     483    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
     484
     485    const auto l = bicliques.size();
     486    IndependentSetGraph I(l);
     487
     488    // Initialize our weights and determine the constraints
     489    for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
     490        I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2);
     491        for (auto j = i; ++j != bicliques.end(); ) {
     492            if (intersects(i->second, j->second) && intersects(i->first, j->first)) {
     493                add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
     494            }
     495        }
     496    }
     497
     498    // Use the greedy algorithm to choose our independent set
     499    std::vector<Vertex> selected;
     500    for (;;) {
     501        unsigned w = 0;
     502        Vertex u = 0;
     503        for (unsigned i = 0; i != l; ++i) {
     504            if (I[i] > w) {
     505                w = I[i];
     506                u = i;
     507            }
     508        }
     509        if (w < 2) break;
     510        selected.push_back(u);
     511        I[u] = 0;
     512        for (auto v : make_iterator_range(adjacent_vertices(u, I))) {
     513            I[v] = 0;
     514        }
     515    }
     516
     517    // Sort the selected list and then remove the unselected cliques
     518    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
     519    auto end = bicliques.end();
     520    for (const unsigned offset : selected) {
     521        end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
     522    }
     523    bicliques.erase(bicliques.begin(), end);
     524}
     525
     526/** ------------------------------------------------------------------------------------------------------------- *
     527 * @brief findInsertionPoint
     528 *
     529 * Look for the first user in this scope; if none can be found, look for the last operand.
     530 ** ------------------------------------------------------------------------------------------------------------- */
     531inline PabloBlock * FactorizeDFG::findInsertionPoint(NodeSet & operands, NodeSet & users) const {
    245532
    246533    ScopeSet scopes;
    247534    scopes.reserve(users.size());
    248535
    249     flat_set<PabloBlock *> visited;
    250     visited.reserve(users.size());
    251 
    252536    for (PabloAST * user : users) {
    253         PabloBlock * const scope = cast<Statement>(user)->getParent();
    254         assert (scope);
    255         if (visited.insert(scope).second) {
     537        PabloBlock * const scope = cast<Statement>(user)->getParent(); assert (scope);
     538        if (std::find(scopes.begin(), scopes.end(), scope) == scopes.end()) {
    256539            scopes.push_back(scope);
    257540        }
     541    }
     542
     543    if (LLVM_UNLIKELY(scopes.size() == 1)) {
     544        PabloBlock * const scope = scopes.front();
     545        Statement * stmt = scope->front();
     546        std::sort(users.begin(), users.end());
     547        while (stmt) {
     548            if (LLVM_UNLIKELY(std::binary_search(users.begin(), users.end(), stmt))) {
     549                scope->setInsertPoint(stmt->getPrevNode());
     550                return scope;
     551            }
     552            stmt = stmt->getNextNode();
     553        }
     554        llvm_unreachable("Failed to locate user in single scope block!");
    258555    }
    259556
     
    286583    }
    287584    assert (scopes.size() == 1);
    288     return scopes[0];
    289 }
    290 
    291 /** ------------------------------------------------------------------------------------------------------------- *
    292  * @brief findInsertionPoint
    293  ** ------------------------------------------------------------------------------------------------------------- */
    294 void FactorizeDFG::findInsertionPoint(const ASTVector & operands, PabloBlock * const scope) {
    295     const flat_set<const PabloAST *> ops(operands.begin(), operands.end());
     585
     586    PabloBlock * const scope = scopes.front();
    296587    Statement * stmt = scope->back();
    297     scope->setInsertPoint(nullptr);
     588    std::sort(operands.begin(), operands.end());
    298589    while (stmt) {
    299590        if (isa<If>(stmt)) {
    300591            for (Assign * def : cast<If>(stmt)->getDefined()) {
    301                 if (ops.count(def)) {
     592                if (LLVM_UNLIKELY(std::binary_search(operands.begin(), operands.end(), def))) {
    302593                    scope->setInsertPoint(stmt);
    303                     return;
     594                    return scope;
    304595                }
    305596            }
    306597        } else if (isa<While>(stmt)) {
    307598            for (Next * var : cast<While>(stmt)->getVariants()) {
    308                 if (ops.count(var)) {
     599                if (LLVM_UNLIKELY(std::binary_search(operands.begin(), operands.end(), var))) {
    309600                    scope->setInsertPoint(stmt);
    310                     return;
    311                 }
    312             }
    313         } else if (ops.count(stmt)) {
     601                    return scope;
     602                }
     603            }
     604        } else if (LLVM_UNLIKELY(std::binary_search(operands.begin(), operands.end(), stmt))) {
    314605            scope->setInsertPoint(stmt);
    315             break;
     606            return scope;
    316607        }
    317608        stmt = stmt->getPrevNode();
    318609    }
    319 }
    320 
    321 /** ------------------------------------------------------------------------------------------------------------- *
    322  * @brief cse
    323  ** ------------------------------------------------------------------------------------------------------------- */
    324 inline void FactorizeDFG::cse(Variadic * var) {
    325     while (var->getNumOperands() > 2) {
    326         BicliqueSet bicliques = enumerateBicliques(var);
    327         if (bicliques.empty()) {
    328             break;
    329         }
    330         ASTVector operands;
    331         ASTVector users;
    332         std::tie(operands, users) = pick(std::move(bicliques));
    333         PabloBlock * const block = chooseInsertionScope(users);
    334         findInsertionPoint(operands, block);
     610    scope->setInsertPoint(nullptr);
     611    return scope;
     612}
     613
     614/** ------------------------------------------------------------------------------------------------------------- *
     615 * @brief processBicliques
     616 ** ------------------------------------------------------------------------------------------------------------- */
     617void FactorizeDFG::processBicliques(BicliqueSet & bicliques) const {
     618    independentCliqueSets(bicliques);
     619    for (Biclique & B : bicliques) {
     620        VertexSet & operands = B.first;
     621        VertexSet & users = B.second;
     622        PabloBlock * const block = findInsertionPoint(operands, users);
    335623        Variadic * factored = nullptr;
    336         if (isa<And>(var)) {
     624        if (isa<And>(users.front())) {
    337625            factored = block->createAnd(operands.begin(), operands.end());
    338         } else if (isa<Or>(var)) {
     626        } else if (isa<Or>(users.front())) {
    339627            factored = block->createOr(operands.begin(), operands.end());
    340628        } else { // if (isa<Xor>(var)) {
     
    348636        }
    349637    }
    350 }
    351 
    352 /** ------------------------------------------------------------------------------------------------------------- *
    353  * @brief cse
     638    bicliques.clear();
     639}
     640
     641/** ------------------------------------------------------------------------------------------------------------- *
     642 * @brief factor
    354643 *
    355644 * Perform common subexpression elimination on the Variadic statements
    356645 ** ------------------------------------------------------------------------------------------------------------- */
    357 void FactorizeDFG::cse(PabloBlock * const block) {
     646void FactorizeDFG::factor(PabloBlock * const block, BicliqueSet & bicliques) const {
    358647    Statement * stmt = block->front();
    359648    while (stmt) {
    360         if (isa<If>(stmt) || isa<While>(stmt)) {           
    361             cse(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
     649        if (isa<If>(stmt) || isa<While>(stmt)) {
     650            processBicliques(bicliques);
     651            factor(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), bicliques);
     652        } else if (stmt->getNumOperands() > 2 && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) {
     653            enumerateBicliques(cast<Variadic>(stmt), bicliques);
     654        }
     655        stmt = stmt->getNextNode();
     656    }
     657    processBicliques(bicliques);
     658}
     659
     660/** ------------------------------------------------------------------------------------------------------------- *
     661 * @brief factor
     662 ** ------------------------------------------------------------------------------------------------------------- */
     663inline void FactorizeDFG::factor(PabloFunction & function) const {
     664    BicliqueSet bicliques;
     665    factor(function.getEntryBlock(), bicliques);
     666}
     667
     668#endif
     669
     670#if 1
     671
     672using VertexSet = std::vector<PabloAST *>;
     673using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]
     674using BicliqueSet = std::vector<Biclique>;
     675using TypeId = PabloAST::ClassTypeId;
     676
     677/** ------------------------------------------------------------------------------------------------------------- *
     678 * @brief findAllFactoringsOf
     679 *
     680 * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal
     681 * bicliques" by Alexe et. al. (2003).
     682 ** ------------------------------------------------------------------------------------------------------------- */
     683inline BicliqueSet FactorizeDFG::findAllFactoringsOf(Variadic * const var) {
     684    using IntersectionSets = flat_set<VertexSet>;
     685
     686    IntersectionSets B1;
     687    const TypeId typeId = var->getClassTypeId();
     688    for (unsigned i = 0; i != var->getNumOperands(); ++i) {
     689        PabloAST * const op = var->getOperand(i);
     690        VertexSet B;
     691        B.reserve(op->getNumUses());
     692        for (PabloAST * user : op->users()) {
     693            if (user->getClassTypeId() == typeId) {
     694                // Only consider a user who is in the same or a nested scope?
     695                PabloBlock * scope = cast<Variadic>(user)->getParent();
     696                while (scope) {
     697                    if (scope == var->getParent()) {
     698                        B.push_back(user);
     699                        break;
     700                    }
     701                    scope = scope->getParent();
     702                }
     703
     704//                B.push_back(user);
     705            }
     706        }
     707        std::sort(B.begin(), B.end());
     708        B1.insert(std::move(B));
     709    }
     710
     711    IntersectionSets B(B1);
     712
     713    IntersectionSets Bi;
     714
     715    VertexSet clique;
     716    for (auto i = B1.begin(); i != B1.end(); ++i) {
     717        for (auto j = i; ++j != B1.end(); ) {
     718            std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique));
     719            if (clique.size() > 0) {
     720                if (B.count(clique) == 0) {
     721                    Bi.insert(clique);
     722                }
     723                clique.clear();
     724            }
     725        }
     726    }
     727
     728    while (!Bi.empty()) {
     729        B.insert(Bi.begin(), Bi.end());
     730        IntersectionSets Bk;
     731        for (auto i = B1.begin(); i != B1.end(); ++i) {
     732            for (auto j = Bi.begin(); j != Bi.end(); ++j) {
     733                std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique));
     734                if (clique.size() > 0) {
     735                    if (B.count(clique) == 0) {
     736                        Bk.insert(clique);
     737                    }
     738                    clique.clear();
     739                }
     740            }
     741        }
     742        Bi.swap(Bk);
     743    }
     744
     745    BicliqueSet bicliques;
     746    for (auto Bi = B.begin(); Bi != B.end(); ++Bi) {
     747        VertexSet Ai(var->begin(), var->end());
     748        for (const PabloAST * user : *Bi) {
     749            VertexSet Aj(cast<Variadic>(user)->begin(), cast<Variadic>(user)->end());
     750            std::sort(Aj.begin(), Aj.end());
     751            VertexSet Ak;
     752            Ak.reserve(std::min<unsigned>(Ai.size(), Aj.size()));
     753            std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
     754            Ai.swap(Ak);
     755        }
     756        if (Ai.size() > 1 && Bi->size() > 1) {
     757            bicliques.emplace_back(std::move(Ai), std::move(*Bi));
     758        }
     759    }
     760    return bicliques;
     761}
     762
     763/** ------------------------------------------------------------------------------------------------------------- *
     764 * @brief intersects
     765 ** ------------------------------------------------------------------------------------------------------------- */
     766template <class Type>
     767inline bool intersects(const Type & A, const Type & B) {
     768    auto first1 = A.begin(), last1 = A.end();
     769    auto first2 = B.begin(), last2 = B.end();
     770    assert (std::is_sorted(first1, last1));
     771    assert (std::is_sorted(first2, last2));
     772    while (first1 != last1 && first2 != last2) {
     773        if (*first1 < *first2) {
     774            ++first1;
     775        } else if (*first2 < *first1) {
     776            ++first2;
     777        } else {
     778            return true;
     779        }
     780    }
     781    return false;
     782}
     783
     784/** ------------------------------------------------------------------------------------------------------------- *
     785 * @brief independentCliqueSets
     786 ** ------------------------------------------------------------------------------------------------------------- */
     787inline void FactorizeDFG::independentCliqueSets(BicliqueSet & bicliques) {
     788    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
     789    using Vertex = IndependentSetGraph::vertex_descriptor;
     790
     791    const auto l = bicliques.size();
     792    IndependentSetGraph I(l);
     793
     794    // Initialize our weights and determine the constraints
     795    for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
     796        I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2);
     797        for (auto j = i; ++j != bicliques.end(); ) {
     798            if (intersects(i->second, j->second) && intersects(i->first, j->first)) {
     799                add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
     800            }
     801        }
     802    }
     803
     804    // Use the greedy algorithm to choose our independent set
     805    std::vector<Vertex> selected;
     806    for (;;) {
     807        unsigned w = 0;
     808        Vertex u = 0;
     809        for (unsigned i = 0; i != l; ++i) {
     810            if (I[i] > w) {
     811                w = I[i];
     812                u = i;
     813            }
     814        }
     815        if (w < 2) break;
     816        selected.push_back(u);
     817        I[u] = 0;
     818        for (auto v : make_iterator_range(adjacent_vertices(u, I))) {
     819            I[v] = 0;
     820        }
     821    }
     822
     823    // Sort the selected list and then remove the unselected cliques
     824    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
     825    auto end = bicliques.end();
     826    for (const unsigned offset : selected) {
     827        end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
     828    }
     829    bicliques.erase(bicliques.begin(), end);
     830}
     831
     832/** ------------------------------------------------------------------------------------------------------------- *
     833 * @brief findInsertionScope
     834 ** ------------------------------------------------------------------------------------------------------------- */
     835inline PabloBlock * FactorizeDFG::findInsertionScope(const NodeSet & users) const {
     836    ScopeSet scopes;
     837    scopes.reserve(users.size());
     838
     839    for (PabloAST * user : users) {
     840        PabloBlock * const scope = cast<Statement>(user)->getParent(); assert (scope);
     841        if (std::find(scopes.begin(), scopes.end(), scope) == scopes.end()) {
     842            scopes.push_back(scope);
     843        }
     844    }
     845
     846    while (scopes.size() > 1) {
     847        // Find the LCA of both scopes then add the LCA back to the list of scopes.
     848        PabloBlock * scope1 = scopes.back(); scopes.pop_back();
     849        PabloBlock * scope2 = scopes.back(); scopes.pop_back();
     850        if (scope1 != scope2) {
     851            unsigned depth1 = scopeDepthOf(scope1);
     852            unsigned depth2 = scopeDepthOf(scope2);
     853            // If one of these scopes is nested deeper than the other, scan upwards through
     854            // the scope tree until both scopes are at the same depth.
     855            while (depth1 > depth2) {
     856                scope1 = scope1->getParent();
     857                --depth1;
     858            }
     859            while (depth1 < depth2) {
     860                scope2 = scope2->getParent();
     861                --depth2;
     862            }
     863            // Then iteratively step backwards until we find a matching set of scopes; this
     864            // must be the LCA of our original scopes.
     865            while (scope1 != scope2) {
     866                scope1 = scope1->getParent();
     867                scope2 = scope2->getParent();
     868            }
     869            assert (scope1 && scope2);
     870        }
     871        if (LLVM_LIKELY(std::find(scopes.begin(), scopes.end(), scope1) == scopes.end())) {
     872            scopes.push_back(scope1);
     873        }
     874    }
     875    assert (scopes.size() == 1);
     876
     877    return scopes.front();
     878}
     879
     880/** ------------------------------------------------------------------------------------------------------------- *
     881 * @brief makeCheckSet
     882 ** ------------------------------------------------------------------------------------------------------------- */
     883FactorizeDFG::CheckSet FactorizeDFG::makeCheckSet(PabloBlock * const scope, const NodeSet & values) const {
     884    CheckSet checks;
     885    assert (scope);
     886    const unsigned baseScopeDepth = scopeDepthOf(scope);
     887    for (PabloAST * value : values) {
     888        if (isa<Statement>(value)) {
     889            unsigned scopeDepth = scopeDepthOf(cast<Statement>(value)->getParent());
     890            // Is this from a preceeding scope? Or a nested scope?
     891            if (scopeDepth < baseScopeDepth) {
     892                continue;
     893            } else if (scopeDepth > baseScopeDepth) {
     894                // If we're in a nested scope, we want to know what branch statement enters it and
     895                // add that to our checks instead of the operand itself.
     896                PabloBlock * nested = cast<Statement>(value)->getParent();
     897                for (;;) {
     898                    assert (nested);
     899                    value = nested->getBranch();
     900                    nested = nested->getParent();
     901                    if (nested == scope) {
     902                        break;
     903                    }
     904                }
     905            }
     906            checks.insert(value);
     907        }
     908    }
     909    return checks;
     910}
     911
     912/** ------------------------------------------------------------------------------------------------------------- *
     913 * @brief firstIn
     914 ** ------------------------------------------------------------------------------------------------------------- */
     915inline Statement * FactorizeDFG::firstIn(PabloBlock * const scope, Statement * stmt, const NodeSet & users) const {
     916    const auto checks = makeCheckSet(scope, users);
     917    while (stmt) {
     918        if (LLVM_UNLIKELY(checks.count(stmt) != 0)) {
     919            break;
     920        }
     921        stmt = stmt->getNextNode();
     922    }
     923    return stmt;
     924}
     925
     926/** ------------------------------------------------------------------------------------------------------------- *
     927 * @brief lastIn
     928 ** ------------------------------------------------------------------------------------------------------------- */
     929inline Statement * FactorizeDFG::lastIn(PabloBlock * const scope, Statement * stmt, const NodeSet & operands) const {
     930    const auto checks = makeCheckSet(scope, operands);
     931    while (stmt) {
     932        if (LLVM_UNLIKELY(checks.count(stmt) != 0)) {
     933            break;
     934        }
     935        stmt = stmt->getPrevNode();
     936    }
     937    return stmt;
     938}
     939
     940/** ------------------------------------------------------------------------------------------------------------- *
     941 * @brief findInsertionPoint
     942 *
     943 * Look for the first user in this scope; if none can be found, look for the last operand.
     944 ** ------------------------------------------------------------------------------------------------------------- */
     945inline PabloBlock * FactorizeDFG::findInsertionPoint(const NodeSet & operands, const NodeSet & users) const {
     946    PabloBlock * const scope = findInsertionScope(users);
     947    Statement * const lastOperand = lastIn(scope, scope->back(), operands);
     948    Statement * const firstUsage = firstIn(scope, lastOperand, users);
     949    scope->setInsertPoint(firstUsage ? firstUsage->getPrevNode() : lastOperand);
     950    return scope;
     951}
     952
     953/** ------------------------------------------------------------------------------------------------------------- *
     954 * @brief factor
     955 ** ------------------------------------------------------------------------------------------------------------- */
     956inline void FactorizeDFG::factor(Variadic * const var, Variadics & factors) const {
     957    if (var->getNumOperands() > 2) {
     958        BicliqueSet S = findAllFactoringsOf(var);
     959        if (S.empty()) return;
     960        independentCliqueSets(S);
     961        assert (S.size() > 0);
     962        for (Biclique & B : S) {
     963            VertexSet & operands = B.first;
     964            VertexSet & users = B.second;
     965            PabloBlock * const block = findInsertionPoint(operands, users);
     966            Variadic * factoring = nullptr;
     967            if (isa<And>(var)) {
     968                factoring = block->createAnd(operands.begin(), operands.end());
     969            } else if (isa<Or>(var)) {
     970                factoring = block->createOr(operands.begin(), operands.end());
     971            } else { // if (isa<Xor>(var)) {
     972                factoring = block->createXor(operands.begin(), operands.end());
     973            }
     974            for (PabloAST * user : users) {
     975                for (PabloAST * op : operands) {
     976                    cast<Variadic>(user)->deleteOperand(op);
     977                }
     978                cast<Variadic>(user)->addOperand(factoring);
     979            }
     980            if (factoring->getNumOperands() > 2) {
     981                if (LLVM_UNLIKELY(factors.full())) {
     982                    factors.set_capacity(factors.capacity() + factors.capacity() / 2);
     983                }
     984                factors.push_back(factoring);
     985            }
     986        }
     987        if (var->getNumOperands() > 2) {
     988            if (LLVM_UNLIKELY(factors.full())) {
     989                factors.set_capacity(factors.capacity() + factors.capacity() / 2);
     990            }
     991            factors.push_back(var);
     992        } else if (var->getNumOperands()  == 1) {
     993            var->replaceWith(var->getOperand(0));
     994        }
     995    }
     996}
     997
     998/** ------------------------------------------------------------------------------------------------------------- *
     999 * @brief factor
     1000 *
     1001 * Perform common subexpression elimination on the Variadic statements
     1002 ** ------------------------------------------------------------------------------------------------------------- */
     1003void FactorizeDFG::factor(PabloBlock * const block, Variadics & vars) const {
     1004    Statement * stmt = block->front();
     1005    while (stmt) {
     1006        if (isa<If>(stmt) || isa<While>(stmt)) {
     1007            factor(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), vars);
     1008        } else if (stmt->getNumOperands() > 2 && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) {
     1009            assert (!vars.full());
     1010            vars.push_back(cast<Variadic>(stmt));
     1011        }
     1012        stmt = stmt->getNextNode();
     1013    }
     1014}
     1015
     1016/** ------------------------------------------------------------------------------------------------------------- *
     1017 * @brief factor
     1018 ** ------------------------------------------------------------------------------------------------------------- */
     1019inline void FactorizeDFG::factor(PabloFunction & function) const {
     1020    Variadics vars(mNumOfVariadics);
     1021    factor(function.getEntryBlock(), vars);
     1022    while (vars.size() > 0) {
     1023        Variadic * var = vars.front();
     1024        vars.pop_front();
     1025        factor(var, vars);
     1026    }
     1027}
     1028
     1029#endif
     1030
     1031#if 0
     1032
     1033using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, PabloAST *>;
     1034using Vertex = Graph::vertex_descriptor;
     1035using Map = flat_map<const PabloAST *, Vertex>;
     1036
     1037using VertexSet = std::vector<Vertex>;
     1038using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]
     1039using BicliqueSet = std::vector<Biclique>;
     1040using TypeId = PabloAST::ClassTypeId;
     1041
     1042/** ------------------------------------------------------------------------------------------------------------- *
     1043 * @brief enumerateBicliques
     1044 *
     1045 * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal
     1046 * bicliques" by Alexe et. al. (2003). Note: this implementation considers all verticies in set A to be in
     1047 * bipartition A and their adjacencies to be in B.
     1048  ** ------------------------------------------------------------------------------------------------------------- */
     1049template <class Graph>
     1050static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) {
     1051    using IntersectionSets = std::set<VertexSet>;
     1052
     1053    IntersectionSets B1;
     1054    for (auto u : A) {
     1055        if (out_degree(u, G) > 0) {
     1056            VertexSet incomingAdjacencies;
     1057            incomingAdjacencies.reserve(out_degree(u, G));
     1058            for (auto e : make_iterator_range(out_edges(u, G))) {
     1059                incomingAdjacencies.push_back(target(e, G));
     1060            }
     1061            std::sort(incomingAdjacencies.begin(), incomingAdjacencies.end());
     1062            B1.insert(std::move(incomingAdjacencies));
     1063        }
     1064    }
     1065
     1066    IntersectionSets B(B1);
     1067
     1068    IntersectionSets Bi;
     1069
     1070    VertexSet clique;
     1071    for (auto i = B1.begin(); i != B1.end(); ++i) {
     1072        for (auto j = i; ++j != B1.end(); ) {
     1073            std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique));
     1074            if (clique.size() > 0) {
     1075                if (B.count(clique) == 0) {
     1076                    Bi.insert(clique);
     1077                }
     1078                clique.clear();
     1079            }
     1080        }
     1081    }
     1082
     1083    for (;;) {
     1084        if (Bi.empty()) {
     1085            break;
     1086        }
     1087        B.insert(Bi.begin(), Bi.end());
     1088        IntersectionSets Bk;
     1089        for (auto i = B1.begin(); i != B1.end(); ++i) {
     1090            for (auto j = Bi.begin(); j != Bi.end(); ++j) {
     1091                std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique));
     1092                if (clique.size() > 0) {
     1093                    if (B.count(clique) == 0) {
     1094                        Bk.insert(clique);
     1095                    }
     1096                    clique.clear();
     1097                }
     1098            }
     1099        }
     1100        Bi.swap(Bk);
     1101    }
     1102
     1103    BicliqueSet cliques;
     1104    cliques.reserve(B.size());
     1105    for (auto Bi = B.begin(); Bi != B.end(); ++Bi) {
     1106        VertexSet Ai(A);
     1107        for (const Vertex u : *Bi) {
     1108            VertexSet Aj;
     1109            Aj.reserve(in_degree(u, G));
     1110            for (auto e : make_iterator_range(in_edges(u, G))) {
     1111                Aj.push_back(source(e, G));
     1112            }
     1113            std::sort(Aj.begin(), Aj.end());
     1114            VertexSet Ak;
     1115            Ak.reserve(std::min(Ai.size(), Aj.size()));
     1116            std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
     1117            Ai.swap(Ak);
     1118        }
     1119        assert (Ai.size() > 0); // cannot happen if this algorithm is working correctly
     1120        cliques.emplace_back(std::move(Ai), std::move(*Bi));
     1121    }
     1122    return std::move(cliques);
     1123}
     1124
     1125/** ------------------------------------------------------------------------------------------------------------- *
     1126 * @brief intersects
     1127 ** ------------------------------------------------------------------------------------------------------------- */
     1128template <class Type>
     1129inline bool intersects(const Type & A, const Type & B) {
     1130    auto first1 = A.begin(), last1 = A.end();
     1131    auto first2 = B.begin(), last2 = B.end();
     1132    while (first1 != last1 && first2 != last2) {
     1133        if (*first1 < *first2) {
     1134            ++first1;
     1135        } else if (*first2 < *first1) {
     1136            ++first2;
     1137        } else {
     1138            return true;
     1139        }
     1140    }
     1141    return false;
     1142}
     1143
     1144/** ------------------------------------------------------------------------------------------------------------- *
     1145 * @brief getMaximalIndependentBicliques
     1146 ** ------------------------------------------------------------------------------------------------------------- */
     1147template <unsigned side = 1>
     1148inline static BicliqueSet && maximalIndependentBicliques(BicliqueSet && cliques, const unsigned minimum = 1) {
     1149    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
     1150
     1151    static_assert((side == 0 || side == 1), "Side must be 0 (bipartition A) or 1 (bipartition B)");
     1152    assert ("Minimum must be at least 1 or an infinite number of empty sets would be generated!" && minimum > 0);
     1153
     1154    const auto l = cliques.size();
     1155    IndependentSetGraph I(l);
     1156
     1157    // Initialize our weights
     1158    for (unsigned i = 0; i != l; ++i) {
     1159        I[i] = std::pow(std::get<side>(cliques[i]).size(), 2);
     1160    }
     1161
     1162    // Determine our constraints
     1163    for (unsigned i = 0; i != l; ++i) {
     1164        for (unsigned j = i + 1; j != l; ++j) {
     1165            if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) {
     1166                add_edge(i, j, I);
     1167            }
     1168        }
     1169    }
     1170
     1171    // Use the greedy algorithm to choose our independent set
     1172    VertexSet selected;
     1173    for (;;) {
     1174        unsigned w = 0;
     1175        Vertex u = 0;
     1176        for (unsigned i = 0; i != l; ++i) {
     1177            if (I[i] > w) {
     1178                w = I[i];
     1179                u = i;
     1180            }
     1181        }
     1182        if (w < minimum) break;
     1183        selected.push_back(u);
     1184        I[u] = 0;
     1185        for (auto v : make_iterator_range(adjacent_vertices(u, I))) {
     1186            I[v] = 0;
     1187        }
     1188    }
     1189
     1190    // Sort the selected list and then remove the unselected cliques
     1191    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
     1192    auto end = cliques.end();
     1193    for (const unsigned offset : selected) {
     1194        end = cliques.erase(cliques.begin() + offset + 1, end) - 1;
     1195    }
     1196    cliques.erase(cliques.begin(), end);
     1197
     1198    return std::move(cliques);
     1199}
     1200
     1201/** ------------------------------------------------------------------------------------------------------------- *
     1202 * @brief factor
     1203 ** ------------------------------------------------------------------------------------------------------------- */
     1204template<class Type>
     1205inline void factorAny(Graph & G, VertexSet & A, const Biclique & Si, PabloBlock * const block) {
     1206
     1207    static_assert (std::is_same<Type, And>::value || std::is_same<Type, Or>::value || std::is_same<Type, Xor>::value, "Can only factor And, Or or Xor statements here!");
     1208
     1209    flat_set<Statement *> S;
     1210    for (auto u : Si.first) {
     1211        if (isa<Type>(G[u])) {
     1212            S.insert(cast<Type>(G[u]));
     1213        }
     1214    }
     1215    if (S.empty()) {
     1216        return;
     1217    }
     1218    // find the insertion point for this statement type
     1219    for (Statement * ip : *block) {
     1220        if (S.count(ip)) {
     1221            block->setInsertPoint(ip->getPrevNode());
     1222            break;
     1223        }
     1224    }
     1225    Variadic * factored = nullptr;
     1226    if (std::is_same<Type, And>::value) {
     1227        factored = block->createAnd(Si.second.size());
     1228    } else if (std::is_same<Type, Or>::value) {
     1229        factored = block->createOr(Si.second.size());
     1230    } else if (std::is_same<Type, Xor>::value) {
     1231        factored = block->createXor(Si.second.size());
     1232    }
     1233    const auto u = add_vertex(factored, G);
     1234    A.push_back(u);
     1235    for (auto v : Si.second) {
     1236        factored->addOperand(G[v]);
     1237        add_edge(u, v, G);
     1238    }
     1239    const auto w = add_vertex(factored, G);
     1240    for (auto u : Si.first) {
     1241        if (isa<Type>(G[u])) {
     1242            Type * factoring = cast<Type>(G[u]);
     1243            for (auto v : Si.second) {
     1244                factoring->deleteOperand(G[v]);
     1245                remove_edge(u, v, G);
     1246            }
     1247            factoring->addOperand(factored);
     1248            add_edge(u, w, G);
     1249        }
     1250    }
     1251}
     1252
     1253/** ------------------------------------------------------------------------------------------------------------- *
     1254 * @brief eliminateCommonSubexpressions
     1255 ** ------------------------------------------------------------------------------------------------------------- */
     1256void eliminateCommonSubexpressions(Graph & G, VertexSet & A, PabloBlock * const block) {
     1257    for (;;) {
     1258        const auto S = maximalIndependentBicliques<1>(enumerateBicliques(G, A), 2);
     1259        if (S.empty()) {
     1260            break;
     1261        }
     1262        for (const Biclique Si : S) {
     1263            factorAny<And>(G, A, Si, block);
     1264            factorAny<Or>(G, A, Si, block);
     1265            factorAny<Xor>(G, A, Si, block);
     1266        }
     1267    }
     1268}
     1269
     1270/** ------------------------------------------------------------------------------------------------------------- *
     1271 * @brief factor
     1272 *
     1273 * Perform common subexpression elimination on the Variadic statements
     1274 ** ------------------------------------------------------------------------------------------------------------- */
     1275void FactorizeDFG::factor(PabloBlock * const block) {
     1276
     1277    Graph G;
     1278    VertexSet A;
     1279    Map B;
     1280
     1281    Statement * stmt = block->front();
     1282    while (stmt) {
     1283        if (isa<If>(stmt) || isa<While>(stmt)) {
     1284            eliminateCommonSubexpressions(G, A, block);
     1285            G.clear(); A.clear(); B.clear();
     1286            factor(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
    3621287        } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) {
    363             cse(cast<Variadic>(stmt));
     1288            // When we encounter a reassociative statement, add it and its operands to our bigraph G.
     1289            const auto u = add_vertex(stmt, G);
     1290            A.push_back(u);
     1291            for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     1292                auto f = B.find(stmt->getOperand(i));
     1293                if (f == B.end()) {
     1294                    f = B.emplace(stmt->getOperand(i), add_vertex(stmt->getOperand(i), G)).first;
     1295                }
     1296                add_edge(f->second, u, G);
     1297            }
     1298        }
     1299        stmt = stmt->getNextNode();
     1300    }
     1301    eliminateCommonSubexpressions(G, A, block);
     1302}
     1303#endif
     1304
     1305#if 0
     1306
     1307using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, PabloAST *>;
     1308using Matrix = adjacency_matrix<directedS>;
     1309using Vertex = Graph::vertex_descriptor;
     1310using Map = flat_map<const PabloAST *, Vertex>;
     1311using TypeId = PabloAST::ClassTypeId;
     1312
     1313/** ------------------------------------------------------------------------------------------------------------- *
     1314 * @brief getVertex
     1315 ** ------------------------------------------------------------------------------------------------------------- */
     1316static inline Vertex getVertex(PabloAST * expr, Graph & G, Map & M) {
     1317    auto f = M.find(expr);
     1318    if (LLVM_LIKELY(f != M.end())) {
     1319        return f->second;
     1320    } else {
     1321        const Vertex u = add_vertex(expr, G);
     1322        M.emplace(expr, u);
     1323        return u;
     1324    }
     1325}
     1326
     1327/** ------------------------------------------------------------------------------------------------------------- *
     1328 * @brief buildDependencyGraph
     1329 ** ------------------------------------------------------------------------------------------------------------- */
     1330static void buildDependencyGraph(PabloBlock * const block, Graph & G, Map & M) {
     1331    Statement * stmt = block->front();
     1332    while (stmt) {
     1333        if (isa<If>(stmt) || isa<While>(stmt)) {
     1334            buildDependencyGraph(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), G, M);
     1335        } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) {
     1336            const Vertex u = getVertex(stmt, G, M);
     1337            for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     1338                add_edge(getVertex(stmt->getOperand(i), G, M), u, G);
     1339            }
     1340        }
     1341        stmt = stmt->getNextNode();
     1342    }
     1343}
     1344
     1345/** ------------------------------------------------------------------------------------------------------------- *
     1346 * @brief transitiveClosure
     1347 ** ------------------------------------------------------------------------------------------------------------- */
     1348static Matrix transitiveClosureOf(const Graph & G) {
     1349    std::vector<Vertex> rto; // reverse topological ordering
     1350    rto.reserve(num_vertices(G));
     1351    topological_sort(G, std::back_inserter(rto));
     1352    Matrix M(num_vertices(G));
     1353    for (auto u : rto) {
     1354        for (auto ej : make_iterator_range(in_edges(u, G))) {
     1355            add_edge(source(ej, G), target(ej, G), M);
     1356        }
     1357        for (auto ei : make_iterator_range(out_edges(u, M))) {
     1358            for (auto ej : make_iterator_range(in_edges(u, G))) {
     1359                add_edge(source(ej, G), target(ei, M), M);
     1360            }
     1361        }
     1362        add_edge(u, u, M);
     1363    }
     1364    return std::move(M);
     1365}
     1366
     1367using VertexSet = std::vector<Vertex>;
     1368using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]
     1369using BicliqueSet = flat_set<Biclique>;
     1370using IntersectionSets = std::set<VertexSet>;
     1371
     1372/** ------------------------------------------------------------------------------------------------------------- *
     1373 * @brief enumerateBicliques
     1374 *
     1375 * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal
     1376 * bicliques" by Alexe et. al. (2003). Note: this implementation considers all verticies in set A to be in
     1377 * bipartition A and their adjacencies to be in B.
     1378  ** ------------------------------------------------------------------------------------------------------------- */
     1379inline static void enumerateBicliques(VertexSet & A, VertexSet & B, const Graph & G, BicliqueSet & bicliques) {
     1380
     1381    std::sort(A.begin(), A.end());
     1382    std::sort(B.begin(), B.end());
     1383
     1384    VertexSet S;
     1385    IntersectionSets B1;
     1386    for (auto u : A) {
     1387        S.reserve(out_degree(u, G));
     1388        for (auto e : make_iterator_range(out_edges(u, G))) {
     1389            S.push_back(target(e, G));
     1390        }
     1391        assert (S.size() > 0);
     1392        std::sort(S.begin(), S.end());
     1393        VertexSet T;       
     1394        std::set_intersection(B.begin(), B.end(), S.begin(), S.end(), std::back_inserter(T));
     1395        assert (T.size() > 0);
     1396        B1.emplace(std::move(T));
     1397        S.clear();
     1398    }
     1399
     1400    IntersectionSets C(B1);
     1401    IntersectionSets Bi;
     1402    for (auto i = B1.begin(); i != B1.end(); ++i) {
     1403        for (auto j = i; ++j != B1.end(); ) {
     1404            std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(S));
     1405            if (S.size() > 0) {
     1406                if (C.count(S) == 0) {
     1407                    Bi.insert(S);
     1408                }
     1409                S.clear();
     1410            }
     1411        }
     1412    }
     1413
     1414    for (;;) {
     1415        if (Bi.empty()) {
     1416            break;
     1417        }
     1418        C.insert(Bi.begin(), Bi.end());
     1419        IntersectionSets Bk;
     1420        for (auto i = B1.begin(); i != B1.end(); ++i) {
     1421            for (auto j = Bi.begin(); j != Bi.end(); ++j) {
     1422                std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(S));
     1423                if (S.size() > 0) {
     1424                    if (C.count(S) == 0) {
     1425                        Bk.insert(S);
     1426                    }
     1427                    S.clear();
     1428                }
     1429            }
     1430        }
     1431        Bi.swap(Bk);
     1432    }
     1433
     1434    for (auto Bi = C.begin(); Bi != C.end(); ++Bi) {
     1435        VertexSet Ai(A);
     1436        for (const Vertex u : *Bi) {
     1437            VertexSet Aj;
     1438            Aj.reserve(in_degree(u, G));
     1439            for (auto e : make_iterator_range(in_edges(u, G))) {
     1440                Aj.push_back(source(e, G));
     1441            }
     1442            std::sort(Aj.begin(), Aj.end());
     1443            VertexSet Ak;
     1444            Ak.reserve(std::min(Ai.size(), Aj.size()));
     1445            std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
     1446            Ai.swap(Ak);
     1447        }
     1448        assert (Ai.size() > 0); // cannot happen if this algorithm is working correctly
     1449        if (Ai.size() > 1 && Bi->size() > 1) {
     1450            bicliques.emplace(std::move(Ai), std::move(*Bi));
     1451        }
     1452    }
     1453}
     1454
     1455/** ------------------------------------------------------------------------------------------------------------- *
     1456 * @brief analyzeGraph
     1457 ** ------------------------------------------------------------------------------------------------------------- */
     1458inline static void analyzeGraph(const Vertex u, Graph & G, const Matrix & M, BicliqueSet & bicliques, const unsigned traversalLimit = 10) {
     1459
     1460    VertexSet A;
     1461    VertexSet B;
     1462
     1463    std::vector<bool> visited(num_vertices(G), false);
     1464    circular_buffer<Vertex> Q(64);
     1465    const TypeId typeId = G[u]->getClassTypeId();
     1466
     1467    Vertex v = u;
     1468    visited[v] = true;
     1469    for (;;) {
     1470        bool independent = true;
     1471        for (auto w : B) {
     1472            if (M.get_edge(v, w)) {
     1473                independent = false;
     1474                break;
     1475            }
     1476        }
     1477        if (independent) {
     1478            B.push_back(v);
     1479            if (B.size() < traversalLimit) {
     1480                for (auto e : make_iterator_range(in_edges(v, G))) {
     1481                    const auto w = source(e, G);
     1482                    if (visited[w]) {
     1483                        continue;
     1484                    }
     1485                    bool independent = true;
     1486                    for (auto x : A) {
     1487                        if (M.get_edge(w, x)) {
     1488                            independent = false;
     1489                            break;
     1490                        }
     1491                    }
     1492                    visited[w] = true;
     1493                    if (independent) {
     1494                        A.push_back(w);
     1495                        for (auto e : make_iterator_range(out_edges(w, G))) {
     1496                            const auto x = target(e, G);
     1497                            if (visited[x]) {
     1498                                continue;
     1499                            }
     1500                            visited[x] = true;
     1501                            if (G[x]->getClassTypeId() == typeId) {
     1502                                if (LLVM_UNLIKELY(Q.full())) {
     1503                                    Q.set_capacity(Q.capacity() + (Q.capacity() / 2));
     1504                                }
     1505                                Q.push_back(x);
     1506                            }
     1507                        }
     1508                    }
     1509                }
     1510            }
     1511        }
     1512        if (Q.empty()) {
     1513            break;
     1514        }
     1515        v = Q.front();
     1516        Q.pop_front();
     1517    }
     1518
     1519    enumerateBicliques(A, B, G, bicliques);
     1520}
     1521
     1522/** ------------------------------------------------------------------------------------------------------------- *
     1523 * @brief intersects
     1524 ** ------------------------------------------------------------------------------------------------------------- */
     1525template <class Type>
     1526inline bool intersects(const Type & A, const Type & B) {
     1527    auto first1 = A.begin(), last1 = A.end();
     1528    auto first2 = B.begin(), last2 = B.end();
     1529    while (first1 != last1 && first2 != last2) {
     1530        if (*first1 < *first2) {
     1531            ++first1;
     1532        } else if (*first2 < *first1) {
     1533            ++first2;
     1534        } else {
     1535            return true;
     1536        }
     1537    }
     1538    return false;
     1539}
     1540
     1541/** ------------------------------------------------------------------------------------------------------------- *
     1542 * @brief independentCliqueSets
     1543 ** ------------------------------------------------------------------------------------------------------------- */
     1544inline static void independentCliqueSets(BicliqueSet & bicliques, const unsigned minimum) {
     1545    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
     1546
     1547    const auto l = bicliques.size();
     1548    IndependentSetGraph I(l);
     1549
     1550    // Initialize our weights and determine the constraints
     1551    for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
     1552        I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2);
     1553        for (auto j = i; ++j != bicliques.end(); ) {
     1554            if (intersects(std::get<1>(*i), std::get<1>(*j))) {
     1555                add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
     1556            }
     1557        }
     1558    }
     1559
     1560    // Use the greedy algorithm to choose our independent set
     1561    VertexSet selected;
     1562    for (;;) {
     1563        unsigned w = 0;
     1564        Vertex u = 0;
     1565        for (unsigned i = 0; i != l; ++i) {
     1566            if (I[i] > w) {
     1567                w = I[i];
     1568                u = i;
     1569            }
     1570        }
     1571        if (w < minimum) break;
     1572        selected.push_back(u);
     1573        I[u] = 0;
     1574        for (auto v : make_iterator_range(adjacent_vertices(u, I))) {
     1575            I[v] = 0;
     1576        }
     1577    }
     1578
     1579    // Sort the selected list and then remove the unselected cliques
     1580    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
     1581    auto end = bicliques.end();
     1582    for (const unsigned offset : selected) {
     1583        end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
     1584    }
     1585    bicliques.erase(bicliques.begin(), end);
     1586}
     1587
     1588/** ------------------------------------------------------------------------------------------------------------- *
     1589 * @brief chooseInsertionPoint
     1590 ** ------------------------------------------------------------------------------------------------------------- */
     1591inline PabloBlock * FactorizeDFG::chooseInsertionScope(const NodeSet & users) {
     1592
     1593    ScopeSet scopes;
     1594    scopes.reserve(users.size());
     1595
     1596    flat_set<PabloBlock *> visited;
     1597    visited.reserve(users.size());
     1598
     1599    for (PabloAST * user : users) {
     1600        PabloBlock * const scope = cast<Statement>(user)->getParent();
     1601        assert (scope);
     1602        if (visited.insert(scope).second) {
     1603            scopes.push_back(scope);
     1604        }
     1605    }
     1606
     1607    while (scopes.size() > 1) {
     1608        // Find the LCA of both scopes then add the LCA back to the list of scopes.
     1609        PabloBlock * scope1 = scopes.back(); scopes.pop_back();
     1610        PabloBlock * scope2 = scopes.back(); scopes.pop_back();
     1611        if (scope1 != scope2) {
     1612            unsigned depth1 = mScopeDepth.find(scope1)->second;
     1613            unsigned depth2 = mScopeDepth.find(scope2)->second;
     1614            // If one of these scopes is nested deeper than the other, scan upwards through
     1615            // the scope tree until both scopes are at the same depth.
     1616            while (depth1 > depth2) {
     1617                scope1 = scope1->getParent();
     1618                --depth1;
     1619            }
     1620            while (depth1 < depth2) {
     1621                scope2 = scope2->getParent();
     1622                --depth2;
     1623            }
     1624            // Then iteratively step backwards until we find a matching set of scopes; this
     1625            // must be the LCA of our original scopes.
     1626            while (scope1 != scope2) {
     1627                scope1 = scope1->getParent();
     1628                scope2 = scope2->getParent();
     1629            }
     1630            assert (scope1 && scope2);
     1631        }
     1632        scopes.push_back(scope1);
     1633    }
     1634    assert (scopes.size() == 1);
     1635    return scopes[0];
     1636}
     1637
     1638/** ------------------------------------------------------------------------------------------------------------- *
     1639 * @brief findInsertionPoint
     1640 ** ------------------------------------------------------------------------------------------------------------- */
     1641void FactorizeDFG::findInsertionPoint(const NodeSet & operands, PabloBlock * const scope) {
     1642    Statement * stmt = scope->back();
     1643    scope->setInsertPoint(nullptr);
     1644    assert (std::is_sorted(operands.begin(), operands.end()));
     1645    while (stmt) {
     1646        if (isa<If>(stmt)) {
     1647            for (Assign * def : cast<If>(stmt)->getDefined()) {
     1648                if (std::binary_search(operands.begin(), operands.end(), def)) {
     1649                    scope->setInsertPoint(stmt);
     1650                    return;
     1651                }
     1652            }
     1653        } else if (isa<While>(stmt)) {
     1654            for (Next * var : cast<While>(stmt)->getVariants()) {
     1655                if (std::binary_search(operands.begin(), operands.end(), var)) {
     1656                    scope->setInsertPoint(stmt);
     1657                    return;
     1658                }
     1659            }
     1660        } else if (std::binary_search(operands.begin(), operands.end(), stmt)) {
     1661            scope->setInsertPoint(stmt);
     1662            break;
     1663        }
     1664        stmt = stmt->getPrevNode();
     1665    }
     1666}
     1667
     1668/** ------------------------------------------------------------------------------------------------------------- *
     1669 * @brief factor
     1670 ** ------------------------------------------------------------------------------------------------------------- */
     1671inline void FactorizeDFG::factor(PabloFunction & function) {
     1672
     1673//    raw_os_ostream out(std::cerr);
     1674
     1675//    out << "BEFORE:\n\n";
     1676//    PabloPrinter::print(function, out);
     1677//    out << "******************************************\n";
     1678//    out.flush();
     1679
     1680    for (;;) {
     1681
     1682        Graph G;
     1683        {
     1684            Map M;
     1685            // Let G be a DAG representing each associative statement and its inputs
     1686            buildDependencyGraph(function.getEntryBlock(), G, M);
     1687        }
     1688
     1689        BicliqueSet S;
     1690        {
     1691            const Matrix M = transitiveClosureOf(G);
     1692            for (const Vertex u : make_iterator_range(vertices(M))) {
     1693                if ((in_degree(u, G) > 2) && (isa<And>(G[u]) || isa<Or>(G[u]) || isa<Xor>(G[u]))) {
     1694                    analyzeGraph(u, G, M, S);
     1695                }
     1696            }
     1697        }
     1698
     1699        independentCliqueSets(S, 2);
     1700
     1701        if (S.empty()) {
     1702            break;
     1703        }
     1704
     1705        std::vector<PabloAST *> operands;
     1706        std::vector<PabloAST *> users;
     1707
     1708        for (const Biclique & B : S) {
     1709            for (auto u : std::get<0>(B)) {
     1710                operands.push_back(G[u]);
     1711            }
     1712            std::sort(operands.begin(), operands.end());
     1713
     1714            for (auto u : std::get<1>(B)) {
     1715                users.push_back(G[u]);
     1716            }
     1717            std::sort(users.begin(), users.end());
     1718
     1719//            out << " -- factoring {";
     1720//            for (PabloAST * operand : operands) {
     1721//                out << ' ';
     1722//                PabloPrinter::print(operand, out);
     1723//            }
     1724//            out << " } from { ";
     1725//            for (PabloAST * user : users) {
     1726//                out << ' ';
     1727//                PabloPrinter::print(user, out);
     1728//            }
     1729//            out << " } -> ";
     1730
     1731            const TypeId typeId = users.front()->getClassTypeId();
     1732            assert(typeId == TypeId::And || typeId == TypeId::Or || typeId == TypeId::Xor);
     1733            #ifndef NDEBUG
     1734            for (PabloAST * user : users) {
     1735                assert(user->getClassTypeId() == typeId);
     1736            }
     1737            #endif
     1738            PabloBlock * const block = chooseInsertionScope(users);
     1739            findInsertionPoint(operands, block);
     1740            Variadic * factored = nullptr;
     1741            if (typeId == TypeId::And) {
     1742                factored = block->createAnd(operands.begin(), operands.end());
     1743            } else if (typeId == TypeId::Or) {
     1744                factored = block->createOr(operands.begin(), operands.end());
     1745            } else { // if (isa<Xor>(var)) {
     1746                factored = block->createXor(operands.begin(), operands.end());
     1747            }
     1748
     1749//            PabloPrinter::print(factored, out);
     1750//            out << '\n';
     1751//            out.flush();
     1752
     1753            for (PabloAST * user : users) {
     1754                for (PabloAST * op : operands) {
     1755                    cast<Variadic>(user)->deleteOperand(op);
     1756                }
     1757                cast<Variadic>(user)->addOperand(factored);
     1758            }
     1759            operands.clear();
     1760            users.clear();
     1761        }
     1762//        out << "-----------------------------------------------------------------\n";
     1763    }
     1764
     1765//    out << "AFTER:\n\n";
     1766//    PabloPrinter::print(function, out);
     1767//    out << "******************************************\n";
     1768
     1769}
     1770
     1771
     1772#endif
     1773
     1774/** ------------------------------------------------------------------------------------------------------------- *
     1775 * @brief deMorgansExpansion
     1776 *
     1777 * Apply the De Morgans' law to any negated And or Or statement with the intent of further coalescing its operands
     1778 * thereby allowing the Simplifier to check for tautologies and contradictions.
     1779 ** ------------------------------------------------------------------------------------------------------------- */
     1780inline void FactorizeDFG::deMorgansExpansion(Not * const var, PabloBlock * const block) {
     1781    PabloAST * const negatedVar = var->getOperand(0);
     1782    if (isa<And>(negatedVar) || isa<Or>(negatedVar)) {
     1783        const TypeId desiredTypeId = isa<And>(negatedVar) ? TypeId::Or : TypeId::And;
     1784        bool canApplyDeMorgans = true;
     1785        for (PabloAST * user : var->users()) {
     1786            if (desiredTypeId != user->getClassTypeId()) {
     1787                canApplyDeMorgans = false;
     1788                break;
     1789            }
     1790        }
     1791        if (canApplyDeMorgans) {
     1792            const unsigned operands = cast<Variadic>(negatedVar)->getNumOperands();
     1793            PabloAST * negations[operands];
     1794            block->setInsertPoint(var);
     1795            for (unsigned i = 0; i != operands; ++i) {
     1796                negations[i] = block->createNot(cast<Variadic>(negatedVar)->getOperand(i));
     1797            }
     1798            const unsigned users = var->getNumUses();
     1799            PabloAST * user[users];
     1800            std::copy(var->user_begin(), var->user_end(), user);
     1801            for (unsigned i = 0; i != users; ++i) {
     1802                cast<Variadic>(user[i])->deleteOperand(var);
     1803                for (unsigned j = 0; j != operands; ++j) {
     1804                    cast<Variadic>(user[i])->addOperand(negations[j]);
     1805                }
     1806            }
     1807            var->eraseFromParent(true);
     1808        }
     1809    }
     1810}
     1811
     1812/** ------------------------------------------------------------------------------------------------------------- *
     1813 * @brief deMorgansExpansion
     1814 ** ------------------------------------------------------------------------------------------------------------- */
     1815void FactorizeDFG::deMorgansExpansion(PabloBlock * const block) {
     1816    Statement * stmt = block->front();
     1817    while (stmt) {
     1818        if (isa<If>(stmt) || isa<While>(stmt)) {
     1819            deMorgansExpansion(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
     1820        } else if (isa<Not>(stmt)) {
     1821            deMorgansExpansion(cast<Not>(stmt), block);
    3641822        }
    3651823        stmt = stmt->getNextNode();
     
    3771835            mScopeDepth.emplace(body, depth);
    3781836            enumerateScopeDepth(body, depth + 1);
     1837        } else if (stmt->getNumOperands() > 2 && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) {
     1838            ++mNumOfVariadics;
    3791839        }
    3801840        stmt = stmt->getNextNode();
     
    3861846 ** ------------------------------------------------------------------------------------------------------------- */
    3871847inline void FactorizeDFG::enumerateScopeDepth(const PabloFunction & function) {
    388     mScopeDepth.emplace(function.getEntryBlock(), 0);
    389     enumerateScopeDepth(function.getEntryBlock(), 1);
     1848    mScopeDepth.emplace(nullptr, 0);
     1849    mScopeDepth.emplace(function.getEntryBlock(), 1);
     1850    enumerateScopeDepth(function.getEntryBlock(), 2);
     1851}
     1852
     1853/** ------------------------------------------------------------------------------------------------------------- *
     1854 * @brief scopeDepthOf
     1855 ** ------------------------------------------------------------------------------------------------------------- */
     1856inline unsigned FactorizeDFG::scopeDepthOf(const PabloAST * const expr) const {
     1857    return LLVM_LIKELY(isa<Statement>(expr)) ? scopeDepthOf(cast<Statement>(expr)->getParent()) : 0;
     1858}
     1859
     1860/** ------------------------------------------------------------------------------------------------------------- *
     1861 * @brief scopeDepthOf
     1862 ** ------------------------------------------------------------------------------------------------------------- */
     1863inline unsigned FactorizeDFG::scopeDepthOf(const PabloBlock * const block) const {
     1864    assert (block);
     1865    const auto f = mScopeDepth.find(block);
     1866    assert (f != mScopeDepth.end());
     1867    return f->second;
     1868}
     1869
     1870/** ------------------------------------------------------------------------------------------------------------- *
     1871 * @brief ensureLegality
     1872 *
     1873 * We don't want to run the simplifier after this as it might undo some of the ordering work we've done. Instead
     1874 * just do the minimum possible to ensure that each variadic is legal prior to compilation.
     1875 ** ------------------------------------------------------------------------------------------------------------- */
     1876void FactorizeDFG::ensureLegality(PabloBlock * const block) {
     1877    Statement * stmt = block->front();
     1878    while (stmt) {
     1879        if (isa<If>(stmt) || isa<While>(stmt)) {
     1880            ensureLegality(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
     1881        } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) {
     1882            assert (stmt->getNumOperands() <= 2);
     1883            if (LLVM_UNLIKELY(stmt->getNumOperands() < 2)) {
     1884                if (LLVM_UNLIKELY(stmt->getNumOperands() == 0)) {
     1885                    stmt = stmt->eraseFromParent(true);
     1886                } else {
     1887                    stmt = stmt->replaceWith(stmt->getOperand(0), true, true);
     1888                }
     1889                continue;
     1890            }
     1891        }
     1892        stmt = stmt->getNextNode();
     1893    }
    3901894}
    3911895
     
    3951899void FactorizeDFG::transform(PabloFunction & function) {
    3961900    FactorizeDFG ldfg;
     1901//    ldfg.deMorgansExpansion(function.getEntryBlock());
     1902//    #ifndef NDEBUG
     1903//    PabloVerifier::verify(function, "post-demorgans-expansion");
     1904//    #endif
    3971905    ldfg.enumerateScopeDepth(function);
    398     ldfg.cse(function.getEntryBlock());
     1906    ldfg.factor(function);
    3991907    #ifndef NDEBUG
    400     PabloVerifier::verify(function, "post-cse");
     1908    PabloVerifier::verify(function, "post-factoring");
    4011909    #endif
    402     Simplifier::optimize(function);
    403     ldfg.lower(function.getEntryBlock());
     1910    ldfg.lower(function);
    4041911    #ifndef NDEBUG
    4051912    PabloVerifier::verify(function, "post-lowering");
    406     #endif   
    407 }
    408 
    409 }
     1913    #endif
     1914    ldfg.ensureLegality(function.getEntryBlock());
     1915}
     1916
     1917}
  • icGREP/icgrep-devel/icgrep/pablo/passes/factorizedfg.h

    r4899 r4919  
    33
    44#include <boost/container/flat_map.hpp>
     5#include <boost/circular_buffer.hpp>
    56#include <vector>
     7#include <set>
    68
    79namespace pablo {
     
    1719    using ScopeDepth = boost::container::flat_map<const PabloBlock *, unsigned>;
    1820    using ScopeSet = std::vector<PabloBlock *>;
    19     using ASTVector = std::vector<PabloAST *>;
    20     struct OrderingMap {
    21         unsigned of(const PabloAST * const expr) const {
    22             auto f = mMap.find(expr);
    23             if (f == mMap.end()) {
    24                 return mParent ? mParent->of(expr) : 0;
    25             }
    26             return f->second;
    27         }
    28         inline void enumerate(const PabloAST * const expr) {
    29             mMap.emplace(expr, ++mOrderingCount);
    30         }
    31         OrderingMap() :  mParent(nullptr), mOrderingCount(0) {}
    32         OrderingMap(OrderingMap * const parent) :  mParent(parent), mOrderingCount(parent->mOrderingCount) {}
    33         ~OrderingMap() { if (mParent) { mParent->mOrderingCount = mOrderingCount; } }
    34     private:
    35         OrderingMap * const mParent;
    36         unsigned mOrderingCount;
    37         boost::container::flat_map<const PabloAST *, unsigned> mMap;
    38     };
     21    using NodeSet = std::vector<PabloAST *>;
     22    // using Variadics = std::vector<Variadic *>;
     23    using Variadics = boost::circular_buffer<Variadic *>;
    3924
     25    using VertexSet = std::vector<PabloAST *>;
     26    using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]
     27    // using BicliqueSet = boost::container::flat_set<Biclique>;
     28    using BicliqueSet = std::vector<Biclique>;
     29    using CheckSet = boost::container::flat_set<PabloAST *>;
    4030public:
    4131    static void transform(PabloFunction & function);
     
    4333    void enumerateScopeDepth(const PabloFunction & function);
    4434    void enumerateScopeDepth(const PabloBlock * const block, const unsigned depth);
    45     void cse(PabloBlock * const block);
    46     void cse(Variadic * const var);
    47     PabloBlock * chooseInsertionScope(const ASTVector & users);
    48     void findInsertionPoint(const ASTVector & operands, PabloBlock * const scope);
    49     void lower(PabloFunction & function);
    50 //    void lower(PabloBlock * const block, OrderingMap & parent);
    51 //    PabloAST * lower(Variadic * const var, PabloBlock * block, OrderingMap & order);
    52     void lower(PabloBlock * const block);
    53     Statement * lower(Variadic * const var, PabloBlock * block);
    54     FactorizeDFG() = default;
     35    static void deMorgansExpansion(Not * const var, PabloBlock * const block);
     36    static void deMorgansExpansion(PabloBlock * const block);
     37    void factor(PabloFunction & function) const;
     38    // static void factor(PabloBlock * const block, BicliqueSet & bicliques);
     39    // static void enumerateBicliques(Variadic * const var, BicliqueSet & bicliques);
     40    static BicliqueSet findAllFactoringsOf(Variadic * const var);
     41    static void independentCliqueSets(BicliqueSet & bicliques);
     42    void processBicliques(BicliqueSet & bicliques) const;
     43    // void factor(PabloBlock * const block, BicliqueSet & vars) const;
     44    void factor(PabloBlock * const block, Variadics & vars) const;
     45    void factor(Variadic * const var, Variadics & Q) const;
     46    PabloBlock * findInsertionScope(const NodeSet & users) const;
     47    CheckSet makeCheckSet(PabloBlock * const scope, const NodeSet & values) const;
     48    Statement * firstIn(PabloBlock * const scope, Statement * stmt, const NodeSet & operands) const;
     49    Statement * lastIn(PabloBlock * const scope, Statement * stmt, const NodeSet & users) const;
     50    PabloBlock * findInsertionPoint(const NodeSet & operands, const NodeSet & users) const;
     51    void lower(PabloFunction & function) const;
     52    void lower(PabloBlock * const block) const;
     53    Statement * lower(Variadic * const var, PabloBlock * block) const;
     54    unsigned scopeDepthOf(const PabloBlock * const block) const;
     55    unsigned scopeDepthOf(const PabloAST * const expr) const;
     56    static void ensureLegality(PabloBlock * const block);
     57    FactorizeDFG() : mNumOfVariadics(0) {}
    5558private:
    56     ScopeDepth  mScopeDepth;
     59    unsigned        mNumOfVariadics;
     60    ScopeDepth      mScopeDepth;
    5761};
    5862
  • icGREP/icgrep-devel/icgrep/pablo/passes/flattenassociativedfg.cpp

    r4899 r4919  
    1818 * @brief coalesce
    1919 ** ------------------------------------------------------------------------------------------------------------- */
    20 inline void FlattenAssociativeDFG::coalesce(Variadic * const var) {
     20inline Variadic * CoalesceDFG::coalesce(Variadic * const var) {
    2121    const TypeId typeId = var->getClassTypeId();
    2222    for (unsigned i = 0; i < var->getNumOperands(); ) {
     
    3636        ++i;
    3737    }
     38    return var;
    3839}
    3940
     
    4142 * @brief coalesce
    4243 ** ------------------------------------------------------------------------------------------------------------- */
    43 void FlattenAssociativeDFG::coalesce(PabloBlock * const block, const bool traverse) {
     44void CoalesceDFG::coalesce(PabloBlock * const block, const bool traverse) {
    4445    Statement * stmt = block->front();
    4546    while (stmt) {
     
    4950        } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) {
    5051            coalesce(cast<Variadic>(stmt));
    51         } else if (isa<Not>(stmt)) {
     52        }/* else if (isa<Not>(stmt)) {
    5253            deMorgansExpansion(cast<Not>(stmt), block);
    53         }
     54        }*/
    5455        stmt = next;
    5556    }
     
    6263 * thereby allowing the Simplifier to check for tautologies and contradictions.
    6364 ** ------------------------------------------------------------------------------------------------------------- */
    64 inline void FlattenAssociativeDFG::deMorgansExpansion(Not * const var, PabloBlock * const block) {
     65inline void CoalesceDFG::deMorgansExpansion(Not * const var, PabloBlock * const block) {
    6566    PabloAST * const negatedVar = var->getOperand(0);
    6667    if (isa<And>(negatedVar) || isa<Or>(negatedVar)) {
    67         Variadic * src = cast<Variadic>(negatedVar);
    68         const unsigned operands = src->getNumOperands();
    69         Variadic * replacement = nullptr;
    70         block->setInsertPoint(var->getPrevNode());
    71         if (isa<And>(negatedVar)) {
    72             replacement = block->createOr(operands);
    73         } else {
    74             replacement = block->createAnd(operands);
    75         }
    76         block->setInsertPoint(replacement->getPrevNode());
    77         for (unsigned i = 0; i != operands; ++i) {
    78             replacement->addOperand(block->createNot(src->getOperand(i)));
    79         }
    80         coalesce(replacement);
    81         var->replaceWith(replacement, true, true);
     68        const TypeId desiredTypeId = isa<And>(negatedVar) ? TypeId::Or : TypeId::And;
     69        bool canApplyDeMorgans = true;
     70        for (PabloAST * user : var->users()) {
     71            if (desiredTypeId != user->getClassTypeId()) {
     72                canApplyDeMorgans = false;
     73                break;
     74            }
     75        }
     76        if (canApplyDeMorgans) {
     77            const unsigned operands = cast<Variadic>(negatedVar)->getNumOperands();
     78            PabloAST * negations[operands];
     79            block->setInsertPoint(var);
     80            for (unsigned i = 0; i != operands; ++i) {
     81                negations[i] = block->createNot(cast<Variadic>(negatedVar)->getOperand(i));
     82            }
     83            const unsigned users = var->getNumUses();
     84            PabloAST * user[users];
     85            std::copy(var->user_begin(), var->user_end(), user);
     86            for (unsigned i = 0; i != users; ++i) {
     87                cast<Variadic>(user[i])->deleteOperand(var);
     88                for (unsigned j = 0; j != operands; ++j) {
     89                    cast<Variadic>(user[i])->addOperand(negations[j]);
     90                }
     91            }
     92            var->eraseFromParent(true);
     93        }
    8294    }
    8395}
     
    8698 * @brief deMorgansReduction
    8799 ** ------------------------------------------------------------------------------------------------------------- */
    88 inline void FlattenAssociativeDFG::deMorgansReduction(Variadic * const var, PabloBlock * const block) {
     100inline void CoalesceDFG::deMorgansReduction(Variadic * const var, PabloBlock * const block) {
    89101    unsigned negations = 0;
    90102    for (unsigned i = 0; i < var->getNumOperands(); ++i) {
     
    117129 * @brief deMorgansReduction
    118130 ** ------------------------------------------------------------------------------------------------------------- */
    119 void FlattenAssociativeDFG::deMorgansReduction(PabloBlock * const block, const bool traverse) {
     131void CoalesceDFG::deMorgansReduction(PabloBlock * const block, const bool traverse) {
    120132    for (Statement * stmt : *block) {
    121133        if (LLVM_UNLIKELY((isa<If>(stmt) || isa<While>(stmt)) && traverse)) {
     
    134146    explicit VertexData(TypeId typeId) : typeId(typeId) { }
    135147};
    136 using Graph = adjacency_list<vecS, vecS, bidirectionalS, VertexData, Variadic *>;
    137 using Vertex = Graph::vertex_descriptor;
     148using DependencyGraph = adjacency_list<vecS, vecS, bidirectionalS, VertexData, Variadic *>;
     149using Vertex = DependencyGraph::vertex_descriptor;
    138150using SourceMap = flat_map<Assign *, Vertex>;
    139151using SinkMap = flat_map<TypeId, Vertex>;
     
    145157 * @brief addToVariadicGraph
    146158 ** ------------------------------------------------------------------------------------------------------------- */
    147 static bool addToVariadicGraph(Assign * const def, Graph & G, SourceMap & A, SinkMap & B) {
     159static bool addToVariadicGraph(Assign * const def, DependencyGraph & G, SourceMap & A, SinkMap & B) {
    148160
    149161    if (LLVM_LIKELY(A.count(def) == 0)) {
     
    191203 * bipartition A and their adjacencies to be in B.
    192204  ** ------------------------------------------------------------------------------------------------------------- */
    193 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) {
     205static BicliqueSet enumerateBicliques(const DependencyGraph & G, const VertexSet & A) {
    194206    using IntersectionSets = std::set<VertexSet>;
    195207
     
    273285    auto first1 = A.begin(), last1 = A.end();
    274286    auto first2 = B.begin(), last2 = B.end();
     287    assert (std::is_sorted(first1, last1));
     288    assert (std::is_sorted(first2, last2));
    275289    while (first1 != last1 && first2 != last2) {
    276290        if (*first1 < *first2) {
     
    289303 ** ------------------------------------------------------------------------------------------------------------- */
    290304template <unsigned side>
    291 inline static BicliqueSet independentCliqueSets(BicliqueSet && cliques, const unsigned minimum) {
     305inline static BicliqueSet independentCliqueSets(BicliqueSet && bicliques, const unsigned minimum) {
    292306    using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
    293307
    294     const auto l = cliques.size();
     308    const auto l = bicliques.size();
    295309    IndependentSetGraph I(l);
    296310
    297     // Initialize our weights
    298     for (unsigned i = 0; i != l; ++i) {
    299         I[i] = std::pow(std::get<side>(cliques[i]).size(), 2);
    300     }
    301 
    302     // Determine our constraints
    303     for (unsigned i = 0; i != l; ++i) {
    304         for (unsigned j = i + 1; j != l; ++j) {
    305             if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) {
    306                 add_edge(i, j, I);
     311    // Initialize our weights and determine the constraints
     312    for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
     313        I[std::distance(bicliques.begin(), i)] = std::pow(std::get<side>(*i).size(), 2);
     314        for (auto j = i; ++j != bicliques.end(); ) {
     315            if (intersects(i->second, j->second) && intersects(i->first, j->first)) {
     316                add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
    307317            }
    308318        }
     
    330340    // Sort the selected list and then remove the unselected cliques
    331341    std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
    332     auto end = cliques.end();
     342    auto end = bicliques.end();
    333343    for (const unsigned offset : selected) {
    334         end = cliques.erase(cliques.begin() + offset + 1, end) - 1;
    335     }
    336     cliques.erase(cliques.begin(), end);
    337 
    338     return std::move(cliques);
     344        end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
     345    }
     346    bicliques.erase(bicliques.begin(), end);
     347
     348    return std::move(bicliques);
    339349}
    340350
     
    342352 * @brief tryToPartiallyExtractVariadic
    343353 ** ------------------------------------------------------------------------------------------------------------- */
    344 inline void FlattenAssociativeDFG::tryToPartiallyExtractVariadic(Variadic * const var) {
    345 
     354inline void CoalesceDFG::tryToPartiallyExtractVariadic(Variadic * const var) {
    346355    for (unsigned i = 0; i < var->getNumOperands(); ++i) {
    347356        PabloAST * const op = var->getOperand(i);
    348357        if (isa<Assign>(op)) {
    349 
    350             // Have we found a variadic operation that can sunk into a nested scope?
     358            // Look for two Assign statements in this variadic; we don't want to generate a dependency graph unless
     359            // we expect some optimization can occur.
    351360            for (unsigned j = i + 1; j != var->getNumOperands(); ++j) {
    352361                bool invalid = false;
    353362                if (LLVM_UNLIKELY(matches(op, var->getOperand(j)))) {
    354                     Graph G;
     363                    DependencyGraph G;
    355364                    SourceMap A;
    356365                    SinkMap B;
     
    375384
    376385                        const auto S = independentCliqueSets<0>(std::move(enumerateBicliques(G, H)), 2);
    377                         assert (S.size() > 0);
     386                        if (LLVM_UNLIKELY(S.empty())) {
     387                            break;
     388                        }
    378389                        for (const Biclique & C : S) {
    379390                            const VertexSet & sources = std::get<0>(C);
     
    405416                                    joiner->addOperand(def->getOperand(0));                                   
    406417                                }
    407 
    408                                 coalesce(joiner);
    409                                 Assign * const joined = block->createAssign("m", joiner);
    410 
     418                                Assign * const joined = block->createAssign("m", coalesce(joiner));
    411419                                for (Assign * def : defs) {
    412420                                    def->replaceWith(joined);
     
    430438 * @brief tryToPartiallyExtractVariadic
    431439 ** ------------------------------------------------------------------------------------------------------------- */
    432 void FlattenAssociativeDFG::tryToPartiallyExtractVariadic(PabloBlock * const block) {
     440void CoalesceDFG::tryToPartiallyExtractVariadic(PabloBlock * const block) {
    433441    for (Statement * stmt = block->back(); stmt; stmt = stmt->getPrevNode()) {
    434442        if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     
    573581 * TODO: make this only iterate over the scope blocks and test the scope branch.
    574582 ** ------------------------------------------------------------------------------------------------------------- */
    575 inline void FlattenAssociativeDFG::removeFalseScopeDependencies(PabloFunction & function) {
     583inline void CoalesceDFG::removeFalseScopeDependencies(PabloFunction & function) {
    576584    ScopeDependencyGraph G;
    577585    {
     
    586594 * @brief transform
    587595 ** ------------------------------------------------------------------------------------------------------------- */
    588 void FlattenAssociativeDFG::transform(PabloFunction & function) {
    589 
    590     FlattenAssociativeDFG::coalesce(function.getEntryBlock(), true);
     596void CoalesceDFG::transform(PabloFunction & function) {
     597
     598    CoalesceDFG::coalesce(function.getEntryBlock(), true);
    591599    #ifndef NDEBUG
    592600    PabloVerifier::verify(function, "post-coalescence");
     
    595603    Simplifier::optimize(function);
    596604
    597     FlattenAssociativeDFG::deMorgansReduction(function.getEntryBlock(), true);
    598     #ifndef NDEBUG
    599     PabloVerifier::verify(function, "post-demorgans-reduction");
    600     #endif
    601 
    602     Simplifier::optimize(function);
    603 
    604     FlattenAssociativeDFG::removeFalseScopeDependencies(function);
     605//    CoalesceDFG::deMorgansReduction(function.getEntryBlock(), true);
     606//    #ifndef NDEBUG
     607//    PabloVerifier::verify(function, "post-demorgans-reduction");
     608//    #endif
     609
     610//    Simplifier::optimize(function);
     611
     612    CoalesceDFG::removeFalseScopeDependencies(function);
    605613    #ifndef NDEBUG
    606614    PabloVerifier::verify(function, "post-remove-false-scope-dependencies");
    607615    #endif
    608616
    609     FlattenAssociativeDFG::tryToPartiallyExtractVariadic(function.getEntryBlock());
     617    Simplifier::optimize(function);
     618
     619    CoalesceDFG::tryToPartiallyExtractVariadic(function.getEntryBlock());
    610620    #ifndef NDEBUG
    611621    PabloVerifier::verify(function, "post-partial-variadic-extraction");
  • icGREP/icgrep-devel/icgrep/pablo/passes/flattenassociativedfg.h

    r4899 r4919  
    1111class Assign;
    1212
    13 class FlattenAssociativeDFG {
     13class CoalesceDFG {
    1414    friend class DistributivePass;
    1515    friend class FactorizeDFG;
     
    1818protected:
    1919    static void coalesce(PabloBlock * const block, const bool traverse);
    20     static void coalesce(Variadic * const var);
     20    static Variadic *coalesce(Variadic * const var);
    2121    static void deMorgansExpansion(Not * const var, PabloBlock * const block);
    2222    static void deMorgansReduction(PabloBlock * const block, const bool traverse);
     
    2525    static void tryToPartiallyExtractVariadic(Variadic * const var);
    2626    static void removeFalseScopeDependencies(PabloFunction & function);
    27     FlattenAssociativeDFG() = default;
     27    CoalesceDFG() = default;
    2828};
    2929
  • icGREP/icgrep-devel/icgrep/pablo/printer_pablos.cpp

    r4876 r4919  
    3535        out << "Next(" << next->getName() << ") = ";
    3636        print(next->getExpr(), out);
    37     } else if (const If * ifstmt = dyn_cast<const If>(stmt)) {
    38         out << "if ";
    39         print(ifstmt->getCondition(), out);
     37    } else if (const If * ifNode = dyn_cast<const If>(stmt)) {
     38        out << "If ";
     39        print(ifNode->getCondition(), out);
    4040        if (expandNested) {
    4141            out << ":\n";
    42             print(ifstmt->getBody(), out, true, indent + BlockIndenting);
     42            print(ifNode->getBody(), out, true, indent + BlockIndenting);
    4343            out.indent(indent);
    44             out << "else:\n";
    45             print_vars(ifstmt->getDefined(), out, indent + BlockIndenting);
     44            out << "Else:\n";
     45            print_vars(ifNode->getDefined(), out, indent + BlockIndenting);
    4646        }
    4747    } else if (const While * whileNode = dyn_cast<const While>(stmt)) {
    48         out << "while ";
     48        out << "While ";
    4949        print(whileNode->getCondition(), out);
    5050        if (expandNested) {
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4870 r4919  
    3939static cl::opt<bool> DisableLog2BoundedRepetition("disable-log2-bounded-repetition", cl::init(false),
    4040                     cl::desc("disable log2 optimizations for bounded repetition of bytes"), cl::cat(fREcompilationOptions));
     41static cl::opt<bool> DisableIfHierarchy("disable-if-hierarchy-strategy", cl::init(false),
     42                     cl::desc("disable nested if hierarchy for generated Unicode classes (not recommended)"), cl::cat(fREcompilationOptions));
    4143static cl::opt<int> IfInsertionGap("if-insertion-gap", cl::init(3), cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), cl::cat(fREcompilationOptions));
    4244static cl::opt<bool> DisableMatchStar("disable-matchstar", cl::init(false),
     
    303305    if (LLVM_LIKELY(nameMap.size() > 0)) {
    304306        UCD::UCDCompiler ucdCompiler(mCCCompiler);
    305         ucdCompiler.generateWithDefaultIfHierarchy(nameMap, mPB);
     307        if (LLVM_UNLIKELY(DisableIfHierarchy)) {
     308            ucdCompiler.generateWithoutIfHierarchy(nameMap, mPB);
     309        } else {
     310            ucdCompiler.generateWithDefaultIfHierarchy(nameMap, mPB);
     311        }
    306312        for (auto t : nameMap) {
    307313            if (t.second) {
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4909 r4919  
    2222#include <llvm/Support/TargetSelect.h>
    2323#include <llvm/Support/Host.h>
     24#include <llvm/Support/FileSystem.h>
     25
    2426
    2527#include <IDISA/idisa_avx_builder.h>
     
    5355#include <pablo/printer_pablos.h>
    5456
    55 #include "do_grep.h"
     57#include <hrtime.h>
     58#include <do_grep.h>
    5659
    5760using namespace pablo;
     
    6568static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
    6669static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
    67 static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
    68                                             "These options control printing of intermediate Pablo code.");
     70static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code.");
    6971
    7072static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
    7173static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
    7274static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
     75static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions));
    7376
    7477static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
    7578
    76 static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
    77                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
     79static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false),
     80                                     cl::desc("Disable Pablo Simplification pass (not recommended)"),
    7881                                     cl::cat(cPabloOptimizationsOptions));
     82
    7983static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
    8084                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
    8185                                      cl::cat(cPabloOptimizationsOptions));
    8286
     87static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
     88
     89
     90static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
     91                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
     92
    8393#ifdef ENABLE_MULTIPLEXING
    8494static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions));
     
    91101                                        cl::desc("maximum size of any candidate multiplexing set."),
    92102                                        cl::cat(cPabloOptimizationsOptions));
     103
    93104static cl::opt<unsigned> MultiplexingSelectionLimit("multiplexing-selection-limit", cl::init(100),
    94105                                        cl::desc("maximum number of selections from any partial candidate multiplexing set."),
    95106                                        cl::cat(cPabloOptimizationsOptions));
     107
    96108static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(1),
    97109                                        cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."),
     
    101113                                         cl::desc("coalesce associative functions prior to optimization passes."),
    102114                                         cl::cat(cPabloOptimizationsOptions));
     115
    103116static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
    104                                          cl::desc("apply distribution law optimization."),
     117                                         cl::desc("apply distribution law optimization prior to multiplexing."),
    105118                                         cl::cat(cPabloOptimizationsOptions));
     119
    106120static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
    107                                          cl::desc("apply distribution law optimization."),
     121                                         cl::desc("apply distribution law optimization after multiplexing."),
     122                                         cl::cat(cPabloOptimizationsOptions));
     123
     124static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false),
     125                                         cl::desc("apply pre-pass scheduling prior to LLVM IR generation."),
    108126                                         cl::cat(cPabloOptimizationsOptions));
    109127#endif
     
    154172}
    155173
    156 void pablo_function_passes(PabloFunction * function) {
     174#ifdef PRINT_TIMING_INFORMATION
     175#define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
     176#else
     177#define READ_CYCLE_COUNTER(name)
     178#endif
     179
     180#ifdef PRINT_TIMING_INFORMATION
     181unsigned COUNT_STATEMENTS(const PabloFunction * const entry) {
     182    std::stack<const Statement *> scope;
     183    unsigned statements = 0;
     184    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     185    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     186        while ( stmt ) {
     187            ++statements;
     188            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     189                // Set the next statement to be the first statement of the inner scope and push the
     190                // next statement of the current statement into the scope stack.
     191                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     192                scope.push(stmt->getNextNode());
     193                stmt = nested->front();
     194                assert (stmt);
     195                continue;
     196            }
     197            stmt = stmt->getNextNode();
     198        }
     199        if (scope.empty()) {
     200            break;
     201        }
     202        stmt = scope.top();
     203        scope.pop();
     204    }
     205    return statements;
     206}
     207
     208unsigned COUNT_ADVANCES(const PabloFunction * const entry) {
     209
     210    std::stack<const Statement *> scope;
     211    unsigned advances = 0;
     212
     213    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     214    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     215        while ( stmt ) {
     216            if (isa<Advance>(stmt)) {
     217                ++advances;
     218            }
     219            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     220                // Set the next statement to be the first statement of the inner scope and push the
     221                // next statement of the current statement into the scope stack.
     222                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     223                scope.push(stmt->getNextNode());
     224                stmt = nested->front();
     225                assert (stmt);
     226                continue;
     227            }
     228            stmt = stmt->getNextNode();
     229        }
     230        if (scope.empty()) {
     231            break;
     232        }
     233        stmt = scope.top();
     234        scope.pop();
     235    }
     236    return advances;
     237}
     238
     239using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
     240
     241DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) {
     242    std::stack<const Statement *> scope;
     243    DistributionMap distribution;
     244    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     245    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     246        while ( stmt ) {
     247            if (isa<Variadic>(stmt)) {
     248                auto f = distribution.find(stmt->getNumOperands());
     249                if (f == distribution.end()) {
     250                    distribution.emplace(stmt->getNumOperands(), 1);
     251                } else {
     252                    f->second += 1;
     253                }
     254            }
     255            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     256                // Set the next statement to be the first statement of the inner scope and push the
     257                // next statement of the current statement into the scope stack.
     258                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     259                scope.push(stmt->getNextNode());
     260                stmt = nested->front();
     261                assert (stmt);
     262                continue;
     263            }
     264            stmt = stmt->getNextNode();
     265        }
     266        if (scope.empty()) {
     267            break;
     268        }
     269        stmt = scope.top();
     270        scope.pop();
     271    }
     272    return distribution;
     273}
     274#endif
     275
     276void pablo_function_passes(PabloFunction * function) {   
    157277    // Scan through the pablo code and perform DCE and CSE
    158     if (!DisablePabloCSE) {
     278
     279#ifdef PRINT_TIMING_INFORMATION
     280    timestamp_t simplification_start = 0, simplification_end = 0;
     281    timestamp_t coalescing_start = 0, coalescing_end = 0;
     282    timestamp_t sinking_start = 0, sinking_end = 0;
     283    timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
     284    timestamp_t multiplexing_start = 0, multiplexing_end = 0;
     285    timestamp_t post_distribution_start = 0, post_distribution_end = 0;
     286    timestamp_t lowering_start = 0, lowering_end = 0;
     287    timestamp_t scheduling_start = 0, scheduling_end = 0;
     288    DistributionMap distribution;
     289    const timestamp_t optimization_start = read_cycle_counter();
     290#endif
     291    if (!DisableSimplification) {
     292        READ_CYCLE_COUNTER(simplification_start);
    159293        Simplifier::optimize(*function);
     294        READ_CYCLE_COUNTER(simplification_end);
    160295    }
    161296#ifdef ENABLE_MULTIPLEXING
    162     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
    163         FlattenAssociativeDFG::transform(*function);
     297    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     298        READ_CYCLE_COUNTER(coalescing_start);
     299        CoalesceDFG::transform(*function);
     300        READ_CYCLE_COUNTER(coalescing_end);
     301    }
     302    if (EnablePreDistribution) {
     303        READ_CYCLE_COUNTER(pre_distribution_start);
     304        DistributivePass::optimize(*function);
     305        READ_CYCLE_COUNTER(pre_distribution_end);
     306    }
     307    if (EnableMultiplexing) {
     308        READ_CYCLE_COUNTER(multiplexing_start);
     309        MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
     310        READ_CYCLE_COUNTER(multiplexing_end);
     311        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     312            CoalesceDFG::transform(*function);
     313        }
     314    }
     315    if (EnablePostDistribution) {
     316        READ_CYCLE_COUNTER(post_distribution_start);
     317        DistributivePass::optimize(*function);
     318        READ_CYCLE_COUNTER(post_distribution_end);
    164319    }
    165320#endif
    166321    if (PabloSinkingPass) {
     322        READ_CYCLE_COUNTER(sinking_start);
    167323        CodeMotionPass::optimize(*function);
    168     }
    169 #ifdef ENABLE_MULTIPLEXING   
    170     if (EnablePreDistribution) {
    171         DistributivePass::optimize(*function);
    172     }
    173     if (EnableMultiplexing) {
    174         MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
    175     }
    176     if (EnablePostDistribution) {
    177         DistributivePass::optimize(*function);
    178     }
    179     SchedulingPrePass::optimize(*function);
     324        READ_CYCLE_COUNTER(sinking_end);
     325    }
     326#ifdef ENABLE_MULTIPLEXING
    180327    if (PrintUnloweredCode) {
    181328        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     
    183330        cerr << "Unlowered Pablo AST:\n";
    184331        PabloPrinter::print(*function, cerr);
    185     }
    186     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
     332    }   
     333    #ifdef PRINT_TIMING_INFORMATION
     334    distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
     335    #endif
     336    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     337        READ_CYCLE_COUNTER(lowering_start);
    187338        FactorizeDFG::transform(*function);
    188     }
     339        READ_CYCLE_COUNTER(lowering_end);
     340    }
     341    if (EnablePrePassScheduling) {
     342        READ_CYCLE_COUNTER(scheduling_start);
     343        SchedulingPrePass::optimize(*function);
     344        READ_CYCLE_COUNTER(scheduling_end);
     345    }
     346#endif
     347#ifdef PRINT_TIMING_INFORMATION
     348    const timestamp_t optimization_end = read_cycle_counter();
    189349#endif
    190350    if (PrintOptimizedREcode) {
    191         PabloVerifier::verify(*function, "post-optimization");
    192         //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    193         llvm::raw_os_ostream cerr(std::cerr);
    194         cerr << "Final Pablo AST:\n";
    195         PabloPrinter::print(*function, cerr);
    196     }
     351        if (PabloOutputFilename.empty()) {
     352            //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     353            llvm::raw_os_ostream cerr(std::cerr);
     354            cerr << "Final Pablo AST:\n";
     355            PabloPrinter::print(*function, cerr);
     356        } else {
     357            std::error_code error;
     358            llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None);
     359            PabloPrinter::print(*function, out);
     360        }
     361    }
     362#ifdef PRINT_TIMING_INFORMATION
     363    std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl;
     364    std::cerr << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl;
     365    std::cerr << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl;
     366    std::cerr << "  SINKING TIME: " << (sinking_end - sinking_start) << std::endl;
     367    std::cerr << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl;
     368    std::cerr << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl;
     369    std::cerr << "  MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl;
     370    std::cerr << "  MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl;
     371    std::cerr << "  MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl;
     372    std::cerr << "  LOWERING TIME: " << (lowering_end - lowering_start) << std::endl;
     373    std::cerr << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl;
     374    std::cerr << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl;
     375    std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl;
     376    std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl;
     377    std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
     378    bool join = false;
     379    for (auto dist : distribution) {
     380        if (join) {
     381            std::cerr << ';';
     382        }
     383        std::cerr << dist.first << '|' << dist.second;
     384        join = true;
     385    }
     386    std::cerr << std::endl;
     387#endif
    197388}
    198389
     
    232423    builder.setErrorStr(&errMessage);
    233424    builder.setMCPU(sys::getHostCPUName());
    234     builder.setOptLevel(CodeGenOpt::Level::None);
     425    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
     426    switch (OptLevel) {
     427        case '0': optLevel = CodeGenOpt::None; break;
     428        case '1': optLevel = CodeGenOpt::Less; break;
     429        case '2': optLevel = CodeGenOpt::Default; break;
     430        case '3': optLevel = CodeGenOpt::Aggressive; break;
     431        default: errs() << OptLevel << " is an invalid optimization level.\n";
     432    }
     433    builder.setOptLevel(optLevel);
    235434
    236435#if (BLOCK_SIZE == 256)
Note: See TracChangeset for help on using the changeset viewer.