Changeset 4919
- Timestamp:
- Jan 21, 2016, 5:15:33 PM (3 years ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 3 added
- 24 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/CMakeLists.txt
r4908 r4919 17 17 option(USE_BOOST_MMAP "Using mmap from Boost.Iostreams") 18 18 option(ENABLE_PREGENERATED_UCD_FUNCTIONS "Enable compiling the pregenerated UCD functions") 19 19 option(PRINT_TIMING_INFORMATION "Write compilation and execution timing information to standard error stream") 20 20 21 21 # configure a header file to pass some of the CMake settings … … 37 37 38 38 # We incorporate the CMake features provided by LLVM: 39 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake") 39 set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${LLVM_ROOT}/share/llvm/cmake;${CMAKE_CURRENT_SOURCE_DIR}/cmake") 40 40 41 include(LLVMConfig) 41 42 … … 47 48 # Let's suppose we want to build a JIT compiler with support for 48 49 # binary code (no interpreter): 49 llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native IRReader) 50 llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native IRReader) # ipo 50 51 51 52 message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") … … 124 125 target_link_libraries(icgrep ${Boost_LIBRARIES}) 125 126 ENDIF() 127 IF (PRINT_TIMING_INFORMATION) 128 find_package(PAPI REQUIRED) 129 include_directories(${PAPI_INCLUDE_DIRS}) 130 target_link_libraries(icgrep ${PAPI_LIBRARIES}) 131 ENDIF() 132 126 133 127 134 target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES}) … … 314 321 ENDIF() 315 322 323 IF (PRINT_TIMING_INFORMATION) 324 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPRINT_TIMING_INFORMATION") 325 ENDIF() 326 316 327 SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG") 317 328 IF (${CMAKE_SYSTEM} MATCHES "Linux") -
icGREP/icgrep-devel/icgrep/hrtime.h
r3851 r4919 36 36 } 37 37 38 typedef uint64_t timestamp_t; 39 38 40 // get the number of CPU cycles since startup using rdtsc instruction 39 inline unsigned long long get_hrcycles() { 40 unsigned int tmp[2]; 41 asm ("rdtsc" : "=a" (tmp[1]), "=d" (tmp[0])); 42 return (((unsigned long long)tmp[0] << 32 | tmp[1])); 41 static inline timestamp_t read_cycle_counter() { 42 #ifdef __GNUC__ 43 timestamp_t ts; 44 #ifdef __x86_64__ 45 unsigned int eax, edx; 46 asm volatile("rdtsc" : "=a" (eax), "=d" (edx)); 47 ts = ((timestamp_t) eax) | (((timestamp_t) edx) << 32); 48 #else 49 asm volatile("rdtsc\n" : "=A" (ts)); 50 #endif 51 return(ts); 52 #endif 53 #ifdef _MSC_VER 54 return __rdtsc(); 55 #endif 43 56 } 44 57 … … 48 61 if (CPU_HZ == 0) 49 62 CPU_HZ = getMHZ() * 1000000; 50 return ( get_hrcycles() / CPU_HZ);63 return (read_cycle_counter() / CPU_HZ); 51 64 } 52 65 -
icGREP/icgrep-devel/icgrep/icgrep-devel.files
r4896 r4919 493 493 pablo/optimizers/schedulingprepass.h 494 494 pablo/optimizers/schedulingprepass.cpp 495 papi_helper.hpp -
icGREP/icgrep-devel/icgrep/icgrep-devel.includes
r4876 r4919 12 12 ../buddy-2.4/src 13 13 pablo/passes 14 ../llvm-build/lib -
icGREP/icgrep-devel/icgrep/icgrep.cpp
r4917 r4919 25 25 #include <llvm/Support/Host.h> 26 26 #include <llvm/IR/Verifier.h> 27 28 27 #include <kernels/s2p_gen.h> 29 28 #include <kernels/scanmatchgen.h> … … 36 35 #include <pablo/function.h> 37 36 38 #include "do_grep.h" 37 #include <iostream> 38 39 #include <do_grep.h> 40 #include <hrtime.h> 41 42 #ifdef PRINT_TIMING_INFORMATION 43 #include "papi_helper.hpp" 44 #endif 39 45 40 46 static cl::OptionCategory aRegexSourceOptions("Regular Expression Options", … … 61 67 static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""), cl::cat(aRegexSourceOptions)); 62 68 63 69 using namespace llvm; 64 70 65 71 static unsigned firstInputFile = 1; // Normal case when first positional arg is a regex. … … 154 160 re_ast = regular_expression_passes(encoding, re_ast); 155 161 162 #ifdef PRINT_TIMING_INFORMATION 163 const timestamp_t regex_compilation_start = read_cycle_counter(); 164 #endif 156 165 pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast); 166 #ifdef PRINT_TIMING_INFORMATION 167 const timestamp_t regex_compilation_end = read_cycle_counter(); 168 std::cerr << "REGEX COMPILATION TIME: " << (regex_compilation_end - regex_compilation_start) << std::endl; 169 #endif 157 170 158 171 pablo_function_passes(function); 159 160 172 161 173 pablo::PabloCompiler pablo_compiler(M, idb); … … 181 193 llvm::Function * s2p_IR = M->getFunction("s2p_block"); 182 194 183 llvm::Function * scanRoutine = M->getFunction("scan_matches_in_bitblock");195 // llvm::Function * scanRoutine = M->getFunction("scan_matches_in_bitblock"); 184 196 185 197 if (s2p_IR == nullptr) { … … 200 212 void * icgrep_MCptr = engine->getPointerToFunction(icgrep_IR); 201 213 void * s2p_MCptr = engine->getPointerToFunction(s2p_IR); 202 void * scan_MCptr = engine->getPointerToFunction(scanRoutine);214 // void * scan_MCptr = engine->getPointerToFunction(scanRoutine); 203 215 if (s2p_MCptr == nullptr) { 204 216 std::cerr << "No s2p_MCptr!\n"; 205 217 exit(1); 206 218 } 207 219 208 220 if (icgrep_MCptr) { 209 221 GrepExecutor grepEngine(s2p_MCptr, icgrep_init_carry_ptr, icgrep_MCptr); … … 215 227 grepEngine.setShowFileNameOption(); 216 228 } 229 #ifdef PRINT_TIMING_INFORMATION 230 papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY}); 231 #endif 217 232 for (unsigned i = firstInputFile; i != inputFiles.size(); ++i) { 233 #ifdef PRINT_TIMING_INFORMATION 234 papiCounters.start(); 235 const timestamp_t execution_start = read_cycle_counter(); 236 #endif 218 237 grepEngine.doGrep(inputFiles[i]); 238 #ifdef PRINT_TIMING_INFORMATION 239 const timestamp_t execution_end = read_cycle_counter(); 240 papiCounters.stop(); 241 std::cerr << "EXECUTION TIME: " << inputFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl; 242 #endif 219 243 } 220 244 } -
icGREP/icgrep-devel/icgrep/pablo/analysis/pabloverifier.cpp
r4899 r4919 169 169 PabloPrinter::print(user, str); 170 170 if (count == 0) { 171 str << " is not recorded in";171 str << " is not considered a user of "; 172 172 } else if (count > 0) { 173 str << " is recorded" << count << " times in";173 str << " was recorded too many times (" << count << ") in the user list of "; 174 174 } 175 175 PabloPrinter::print(def, str); 176 str << "'s user list."; 176 throw std::runtime_error(str.str()); 177 } 178 179 /** ------------------------------------------------------------------------------------------------------------- * 180 * @brief throwReportedScopeError 181 ** ------------------------------------------------------------------------------------------------------------- */ 182 static void throwReportedScopeError(const Statement * const stmt) { 183 std::string tmp; 184 raw_string_ostream str(tmp); 185 str << "PabloVerifier: structure error: "; 186 PabloPrinter::print(stmt, str); 187 str << " is not contained in its reported scope block"; 188 throw std::runtime_error(str.str()); 189 } 190 191 /** ------------------------------------------------------------------------------------------------------------- * 192 * @brief throwMisreportedBranchError 193 ** ------------------------------------------------------------------------------------------------------------- */ 194 static void throwMisreportedBranchError(const Statement * const stmt, const Statement * const branch) { 195 std::string tmp; 196 raw_string_ostream str(tmp); 197 str << "PabloVerifier: structure error: "; 198 PabloPrinter::print(stmt, str); 199 str << " branches into a scope block that reports "; 200 PabloPrinter::print(stmt, str); 201 str << " as its branching statement."; 202 throw std::runtime_error(str.str()); 203 } 204 205 /** ------------------------------------------------------------------------------------------------------------- * 206 * @brief throwReflexiveIfConditionError 207 ** ------------------------------------------------------------------------------------------------------------- */ 208 static void throwReflexiveIfConditionError(const PabloAST * const ifNode) { 209 std::string tmp; 210 raw_string_ostream str(tmp); 211 str << "PabloVerifier: structure error: the condition of "; 212 PabloPrinter::print(ifNode, str); 213 str << " cannot be defined by the If node itself."; 177 214 throw std::runtime_error(str.str()); 178 215 } … … 206 243 if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) { 207 244 const PabloBlock * nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(); 208 if (LLVM_UNLIKELY(nested->getParent() != block)) { 209 std::string tmp; 210 raw_string_ostream str(tmp); 211 str << "PabloVerifier: structure error: body of "; 212 PabloPrinter::print(stmt, str); 213 str << " is not nested within the expected scope block"; 214 throw std::runtime_error(str.str()); 245 if (LLVM_UNLIKELY(nested->getBranch() != stmt)) { 246 throwMisreportedBranchError(stmt, nested->getBranch()); 247 } else if (LLVM_UNLIKELY(nested->getParent() != block)) { 248 throwReportedScopeError(stmt); 215 249 } 216 250 if (isa<If>(stmt)) { 217 251 for (const Assign * def : cast<If>(stmt)->getDefined()) { 218 252 if (LLVM_UNLIKELY(def == cast<If>(stmt)->getCondition())) { 219 std::string tmp; 220 raw_string_ostream str(tmp); 221 str << "PabloVerifier: structure error: the condition of "; 222 PabloPrinter::print(cast<PabloAST>(stmt), str); 223 str << " cannot be defined by the If node itself."; 224 throw std::runtime_error(str.str()); 225 } 226 } 227 } 228 if (isa<If>(stmt)) { 229 for (const Assign * def : cast<If>(stmt)->getDefined()) { 230 if (LLVM_UNLIKELY(unreachable(def, nested))) { 253 throwReflexiveIfConditionError(stmt); 254 } else if (LLVM_UNLIKELY(unreachable(def, nested))) { 231 255 throwUncontainedEscapedValueError(stmt, def); 232 256 } … … 250 274 } 251 275 count = std::count(var->user_begin(), var->user_end(), stmt); 252 if (LLVM_UNLIKELY(count != 1)) {276 if (LLVM_UNLIKELY(count != ((cast<While>(stmt)->getCondition() == var) ? 2 : 1))) { 253 277 throwEscapedValueUsageError(stmt, var, var, stmt, count); 254 278 } -
icGREP/icgrep-devel/icgrep/pablo/optimizers/booleanreassociationpass.cpp
r4899 r4919 22 22 23 23 using TypeId = PabloAST::ClassTypeId; 24 using Graph = BooleanReassociationPass::Graph;25 using Vertex = Graph::vertex_descriptor;24 using DependencyGraph = BooleanReassociationPass::Graph; 25 using Vertex = DependencyGraph::vertex_descriptor; 26 26 using VertexData = BooleanReassociationPass::VertexData; 27 27 using SinkMap = BooleanReassociationPass::Map; 28 28 using DistributionGraph = adjacency_list<hash_setS, vecS, bidirectionalS, Vertex>; 29 using DistributionMap = flat_map< Graph::vertex_descriptor, DistributionGraph::vertex_descriptor>;29 using DistributionMap = flat_map<DependencyGraph::vertex_descriptor, DistributionGraph::vertex_descriptor>; 30 30 using VertexSet = std::vector<Vertex>; 31 31 using VertexSets = std::vector<VertexSet>; … … 39 39 ** ------------------------------------------------------------------------------------------------------------- */ 40 40 template<typename Iterator> 41 inline Graph::edge_descriptor first(const std::pair<Iterator, Iterator> & range) {41 inline DependencyGraph::edge_descriptor first(const std::pair<Iterator, Iterator> & range) { 42 42 assert (range.first != range.second); 43 43 return *range.first; … … 45 45 46 46 #ifndef NDEBUG 47 static bool no_path(const Vertex u, const Vertex v, const Graph & G) {47 static bool no_path(const Vertex u, const Vertex v, const DependencyGraph & G) { 48 48 if (u == v) return false; 49 49 flat_set<Vertex> V; … … 70 70 #endif 71 71 72 inline void add_edge(PabloAST * expr, const Vertex u, const Vertex v, Graph & G) {72 inline void add_edge(PabloAST * expr, const Vertex u, const Vertex v, DependencyGraph & G) { 73 73 assert (no_path(v, u, G)); 74 74 // Make sure each edge is unique … … 821 821 * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique. 822 822 ** ------------------------------------------------------------------------------------------------------------- */ 823 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const Graph & G, DistributionGraph & H) {823 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const DependencyGraph & G, DistributionGraph & H) { 824 824 for (auto ci = cliques.begin(); ci != cliques.end(); ) { 825 825 const auto cardinalityA = std::get<0>(*ci).size(); … … 844 844 * @brief safeDistributionSets 845 845 ** ------------------------------------------------------------------------------------------------------------- */ 846 static DistributionSets safeDistributionSets(const Graph & G, DistributionGraph & H) {846 static DistributionSets safeDistributionSets(const DependencyGraph & G, DistributionGraph & H) { 847 847 848 848 VertexSet sinks; … … 868 868 * @brief generateDistributionGraph 869 869 ** ------------------------------------------------------------------------------------------------------------- */ 870 void generateDistributionGraph(const Graph & G, DistributionGraph & H) {870 void generateDistributionGraph(const DependencyGraph & G, DistributionGraph & H) { 871 871 DistributionMap M; 872 872 for (const Vertex u : make_iterator_range(vertices(G))) { -
icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.cpp
r4896 r4919 13 13 ** ------------------------------------------------------------------------------------------------------------- */ 14 14 bool CodeMotionPass::optimize(PabloFunction & function) { 15 CodeMotionPass:: process(function.getEntryBlock());15 CodeMotionPass::global(function.getEntryBlock()); 16 16 #ifndef NDEBUG 17 17 PabloVerifier::verify(function, "post-code-motion"); … … 23 23 * @brief process 24 24 ** ------------------------------------------------------------------------------------------------------------- */ 25 void CodeMotionPass:: process(PabloBlock * const block) {25 void CodeMotionPass::global(PabloBlock * const block) { 26 26 sink(block); 27 27 for (Statement * stmt : *block) { 28 28 if (isa<If>(stmt)) { 29 process(cast<If>(stmt)->getBody());29 global(cast<If>(stmt)->getBody()); 30 30 } else if (isa<While>(stmt)) { 31 process(cast<While>(stmt)->getBody());31 global(cast<While>(stmt)->getBody()); 32 32 // TODO: if we analyzed the probability of this loop being executed once, twice, or many times, we could 33 33 // determine whether hoisting will helpful or harmful to the expected run time. … … 132 132 // must be the LCA of our original scopes. 133 133 while (scope1 != scope2) { 134 assert (scope1 && scope2); 134 135 scope1 = scope1->getParent(); 135 136 scope2 = scope2->getParent(); 136 137 } 137 assert (scope1 && scope2);138 assert (scope1); 138 139 // But if the LCA is the current block, we can't sink the statement. 139 140 if (scope1 == block) { -
icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.h
r4896 r4919 31 31 static bool optimize(PabloFunction & function); 32 32 protected: 33 static void process(PabloBlock * const block);33 static void global(PabloBlock * const block); 34 34 static bool isAcceptableTarget(Statement *stmt, ScopeSet & scopeSet, const PabloBlock * const block); 35 35 static void sink(PabloBlock * const block); -
icGREP/icgrep-devel/icgrep/pablo/optimizers/distributivepass.cpp
r4899 r4919 14 14 namespace pablo { 15 15 16 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>;17 using Vertex = Graph::vertex_descriptor;16 using DependencyGraph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>; 17 using Vertex = DependencyGraph::vertex_descriptor; 18 18 using SinkMap = flat_map<PabloAST *, Vertex>; 19 19 using VertexSet = std::vector<Vertex>; … … 28 28 * @brief getVertex 29 29 ** ------------------------------------------------------------------------------------------------------------- */ 30 static inline Vertex getVertex(PabloAST * value, Graph & G, SinkMap & M) {30 static inline Vertex getVertex(PabloAST * value, DependencyGraph & G, SinkMap & M) { 31 31 const auto f = M.find(value); 32 32 if (f != M.end()) { … … 43 43 * Generate a graph G describing the potential applications of the distributive law for the given block. 44 44 ** ------------------------------------------------------------------------------------------------------------- */ 45 VertexSet generateDistributionGraph(PabloBlock * block, Graph & G) {45 VertexSet generateDistributionGraph(PabloBlock * block, DependencyGraph & G) { 46 46 SinkMap M; 47 47 VertexSet distSet; … … 126 126 ** ------------------------------------------------------------------------------------------------------------- */ 127 127 template <unsigned side> 128 inline static BicliqueSet && independentCliqueSets(BicliqueSet && cliques, const unsigned minimum) {128 inline static BicliqueSet && independentCliqueSets(BicliqueSet && bicliques, const unsigned minimum) { 129 129 using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>; 130 130 131 const auto l = cliques.size();131 const auto l = bicliques.size(); 132 132 IndependentSetGraph I(l); 133 133 134 // Initialize our weights 134 // Initialize our weights and determine the constraints 135 135 for (unsigned i = 0; i != l; ++i) { 136 I[i] = std::pow(std::get<side>(cliques[i]).size(), 2); 137 } 138 139 // Determine our constraints 140 for (unsigned i = 0; i != l; ++i) { 136 I[i] = std::pow(std::get<side>(bicliques[i]).size(), 2); 141 137 for (unsigned j = i + 1; j != l; ++j) { 142 if (intersects(std::get<side>( cliques[i]), std::get<side>(cliques[j]))) {138 if (intersects(std::get<side>(bicliques[i]), std::get<side>(bicliques[j]))) { 143 139 add_edge(i, j, I); 144 140 } 145 141 } 146 142 } 143 144 // // Initialize our weights and determine the constraints 145 // for (auto i = bicliques.begin(); i != bicliques.end(); ++i) { 146 // I[std::distance(bicliques.begin(), i)] = std::pow(std::get<side>(*i).size(), 2); 147 // for (auto j = i; ++j != bicliques.end(); ) { 148 // if (intersects(i->second, j->second) && intersects(i->first, j->first)) { 149 // add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I); 150 // } 151 // } 152 // } 147 153 148 154 // Use the greedy algorithm to choose our independent set … … 167 173 // Sort the selected list and then remove the unselected cliques 168 174 std::sort(selected.begin(), selected.end(), std::greater<Vertex>()); 169 auto end = cliques.end();175 auto end = bicliques.end(); 170 176 for (const unsigned offset : selected) { 171 end = cliques.erase(cliques.begin() + offset + 1, end) - 1;172 } 173 cliques.erase(cliques.begin(), end);174 175 return std::move( cliques);177 end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1; 178 } 179 bicliques.erase(bicliques.begin(), end); 180 181 return std::move(bicliques); 176 182 } 177 183 … … 183 189 * bipartition A and their adjacencies to be in B. 184 190 ** ------------------------------------------------------------------------------------------------------------- */ 185 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) {191 static BicliqueSet enumerateBicliques(const DependencyGraph & G, const VertexSet & A) { 186 192 using IntersectionSets = std::set<VertexSet>; 187 193 … … 265 271 * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique. 266 272 ** ------------------------------------------------------------------------------------------------------------- */ 267 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, Graph & G) {273 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, DependencyGraph & G) { 268 274 for (auto ci = cliques.begin(); ci != cliques.end(); ) { 269 275 const auto cardinalityA = std::get<0>(*ci).size(); … … 288 294 * @brief safeDistributionSets 289 295 ** ------------------------------------------------------------------------------------------------------------- */ 290 static DistributionSets safeDistributionSets( Graph & G, const VertexSet & distSet) {296 static DistributionSets safeDistributionSets(DependencyGraph & G, const VertexSet & distSet) { 291 297 DistributionSets T; 292 298 BicliqueSet lowerSet = independentCliqueSets<1>(removeUnhelpfulBicliques(enumerateBicliques(G, distSet), G), 1); … … 309 315 // FlattenAssociativeDFG::coalesce(block, false); 310 316 311 Graph G;317 DependencyGraph G; 312 318 313 319 const VertexSet distSet = generateDistributionGraph(block, G); … … 350 356 outerOp->addOperand(G[u]); 351 357 } 352 FlattenAssociativeDFG::coalesce(outerOp);358 CoalesceDFG::coalesce(outerOp); 353 359 354 360 for (const Vertex u : sources) { … … 359 365 } 360 366 innerOp->addOperand(outerOp); 361 FlattenAssociativeDFG::coalesce(innerOp);367 CoalesceDFG::coalesce(innerOp); 362 368 363 369 for (const Vertex u : sinks) { 364 Variadic * const resultOp = cast<Variadic>(G[u]); 365 resultOp->addOperand(innerOp); 366 FlattenAssociativeDFG::coalesce(resultOp); 370 cast<Variadic>(G[u])->addOperand(innerOp); 367 371 } 368 372 } … … 391 395 #endif 392 396 Simplifier::optimize(function); 393 // FlattenAssociativeDFG::deMorgansReduction(function.getEntryBlock()); 394 // #ifndef NDEBUG 395 // PabloVerifier::verify(function, "post-demorgans-reduction"); 396 // #endif 397 // Simplifier::optimize(function); 398 } 399 400 401 } 397 } 398 399 400 } -
icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp
r4899 r4919 11 11 #include <pablo/analysis/pabloverifier.hpp> 12 12 #include <pablo/optimizers/pablo_simplifier.hpp> 13 #include <pablo/builder.hpp> 13 14 #include <stack> 14 15 #include <queue> … … 23 24 using namespace boost::numeric::ublas; 24 25 25 #define PRINT_DEBUG_OUTPUT26 // #define PRINT_DEBUG_OUTPUT 26 27 27 28 #if !defined(NDEBUG) && !defined(PRINT_DEBUG_OUTPUT) … … 102 103 namespace pablo { 103 104 105 #ifdef PRINT_TIMING_INFORMATION 106 MultiplexingPass::seed_t MultiplexingPass::SEED = 0; 107 unsigned MultiplexingPass::NODES_ALLOCATED = 0; 108 unsigned MultiplexingPass::NODES_USED = 0; 109 #endif 110 104 111 using TypeId = PabloAST::ClassTypeId; 105 112 106 113 bool MultiplexingPass::optimize(PabloFunction & function, const unsigned limit, const unsigned maxSelections, const unsigned windowSize, const bool independent) { 107 114 108 //std::random_device rd;109 // const autoseed = rd();110 const autoseed = 83234827342;115 std::random_device rd; 116 const RNG::result_type seed = rd(); 117 // const RNG::result_type seed = 83234827342; 111 118 112 119 LOG("Seed: " << seed); 120 121 #ifdef PRINT_TIMING_INFORMATION 122 MultiplexingPass::SEED = seed; 123 MultiplexingPass::NODES_ALLOCATED = 0; 124 MultiplexingPass::NODES_USED = 0; 125 #endif 113 126 114 127 MultiplexingPass mp(seed, limit, maxSelections, windowSize); … … 132 145 133 146 LOG("Nodes in BDD: " << bdd_getnodenum() << " of " << bdd_getallocnum()); 147 148 #ifdef PRINT_TIMING_INFORMATION 149 MultiplexingPass::NODES_ALLOCATED = bdd_getallocnum(); 150 MultiplexingPass::NODES_USED = bdd_getnodenum(); 151 #endif 134 152 135 153 RECORD_TIMESTAMP(start_shutdown); … … 491 509 . 492 510 for (auto e : make_iterator_range(in_edges(i, mSubsetGraph))) { 493 unconstrained[source(e, mSubsetGraph)] = true; 511 const auto j = source(e, mSubsetGraph); 512 if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) { 513 continue; 514 } 515 unconstrained[j] = true; 494 516 } 495 517 unconstrained[i] = true; … … 503 525 const auto j = source(e, mSubsetGraph); 504 526 add_edge(j, k, mSubsetGraph); 527 if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) { 528 continue; 529 } 505 530 unconstrained[j] = true; 506 531 } … … 513 538 const auto j = target(e, mSubsetGraph); 514 539 add_edge(k, j, mSubsetGraph); 540 if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) { 541 continue; 542 } 515 543 unconstrained[j] = true; 516 544 } … … 527 555 // Note: this algorithm deems two streams are mutually exclusive if and only if the conjuntion of their BDDs results 528 556 // in a contradiction. To generate a contradiction when comparing Advances, the BDD of each Advance is represented by 529 // the conjunction of variable representing th at Advance and the negation of all variables for the Advances in which530 // the inputs are deemed to be mutually exclusive. For example, if the input of the i-th Advance is mutually exclusive557 // the conjunction of variable representing the k-th Advance and the negation of all variables for the Advances whose 558 // inputs are mutually exclusive with the k-th input. For example, if the input of the i-th Advance is mutually exclusive 531 559 // with the input of the j-th and k-th Advance, the BDD of the i-th Advance is Ai ⧠¬Aj ⧠¬Ak. 532 560 const Advance * const ithAdv = mAdvance[i]; … … 943 971 Advance * const adv = input[0]; 944 972 PabloBlock * const block = adv->getParent(); assert (block); 945 block->setInsertPoint(nullptr); 946 /// Perform n-to-m Multiplexing 973 block->setInsertPoint(nullptr); 974 circular_buffer<PabloAST *> Q(n); 975 PabloBuilder builder(block); 976 /// Perform n-to-m Multiplexing 947 977 for (size_t j = 0; j != m; ++j) { 948 978 std::ostringstream prefix; 949 979 prefix << "mux" << n << "to" << m << '.' << (j + 1); 950 Or * muxing = block->createOr(n); 980 // Or * muxing = block->createOr(n); 981 // for (size_t i = 0; i != n; ++i) { 982 // if (((i + 1) & (1UL << j)) != 0) { 983 // assert (input[i]->getParent() == block); 984 // muxing->addOperand(input[i]->getOperand(0)); 985 // } 986 // } 951 987 for (size_t i = 0; i != n; ++i) { 952 988 if (((i + 1) & (1UL << j)) != 0) { 953 assert (input[i]->getParent() == block); 954 muxing->addOperand(input[i]->getOperand(0)); 955 } 956 } 957 muxed[j] = block->createAdvance(muxing, adv->getOperand(1), prefix.str()); 958 muxed_n[j] = block->createNot(muxed[j]); 959 } 960 /// Perform m-to-n Demultiplexing 989 Q.push_back(input[i]->getOperand(0)); 990 } 991 } 992 while (Q.size() > 1) { 993 PabloAST * a = Q.front(); Q.pop_front(); 994 PabloAST * b = Q.front(); Q.pop_front(); 995 Q.push_back(builder.createOr(a, b)); 996 } 997 PabloAST * muxing = Q.front(); Q.clear(); 998 muxed[j] = builder.createAdvance(muxing, adv->getOperand(1), prefix.str()); 999 muxed_n[j] = builder.createNot(muxed[j]); 1000 } 1001 /// Perform m-to-n Demultiplexing 961 1002 for (size_t i = 0; i != n; ++i) { 962 // Construct the demuxed values and replaces all the users of the original advances with them. 963 And * demuxed = block->createAnd(m);1003 // Construct the demuxed values and replaces all the users of the original advances with them. 1004 assert (Q.empty()); 964 1005 for (size_t j = 0; j != m; ++j) { 965 demuxed->addOperand((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]); 966 } 1006 Q.push_back((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]); 1007 } 1008 while (Q.size() > 1) { 1009 PabloAST * a = Q.front(); Q.pop_front(); 1010 PabloAST * b = Q.front(); Q.pop_front(); 1011 Q.push_back(builder.createAnd(a, b)); 1012 } 1013 PabloAST * demuxed = Q.front(); Q.clear(); 1014 // And * demuxed = block->createAnd(m); 1015 // for (size_t j = 0; j != m; ++j) { 1016 // demuxed->addOperand((((i + 1) & (1UL << j)) != 0) ? muxed[j] : muxed_n[j]); 1017 // } 967 1018 input[i]->replaceWith(demuxed, true, true); 968 1019 } -
icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp
r4896 r4919 44 44 45 45 static bool optimize(PabloFunction & function, const unsigned limit = std::numeric_limits<unsigned>::max(), const unsigned maxSelections = 100, const unsigned windowSize = 1, const bool independent = false); 46 46 #ifdef PRINT_TIMING_INFORMATION 47 using seed_t = RNG::result_type; 48 static seed_t SEED; 49 static unsigned NODES_ALLOCATED; 50 static unsigned NODES_USED; 51 #endif 47 52 protected: 48 53 … … 81 86 , mWindowSize(windowSize) 82 87 , mTestConstrainedAdvances(true) 88 , mSubsetImplicationsAdhereToWindowingSizeConstraint(false) 83 89 , mVariables(0) 84 90 , mRNG(seed) … … 96 102 const unsigned mWindowSize; 97 103 const bool mTestConstrainedAdvances; 104 const bool mSubsetImplicationsAdhereToWindowingSizeConstraint; 98 105 unsigned mVariables; 99 106 RNG mRNG; -
icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp
r4899 r4919 5 5 #include <pablo/printer_pablos.h> 6 6 #include <pablo/analysis/pabloverifier.hpp> 7 #ifdef USE_BOOST8 7 #include <boost/container/flat_set.hpp> 9 #else10 #include <unordered_set>11 #endif12 8 13 9 namespace pablo { 14 10 15 #ifdef USE_BOOST16 11 template <typename Type> 17 12 using SmallSet = boost::container::flat_set<Type>; 18 #else19 template <typename Type>20 using SmallSet = std::unordered_set<Type>;21 #endif22 13 23 14 /** ------------------------------------------------------------------------------------------------------------- * … … 25 16 ** ------------------------------------------------------------------------------------------------------------- */ 26 17 bool Simplifier::optimize(PabloFunction & function) { 18 // negationsShouldImmediatelySucceedTheirLiteral(function.getEntryBlock()); 19 #ifndef NDEBUG 20 PabloVerifier::verify(function, "post-negation-must-immediately-succeed-their-literal"); 21 #endif 27 22 eliminateRedundantCode(function.getEntryBlock()); 28 23 #ifndef NDEBUG … … 476 471 } 477 472 478 } 473 /** ------------------------------------------------------------------------------------------------------------- * 474 * @brief negationsShouldImmediatelySucceedTheirLiteral 475 * 476 * Assuming that most negations are actually replaced by an ANDC operations or that we only need a literal or its 477 * negation, by pushing all negations up to immediately succeed the literal we should generate equally efficient 478 * code whilst simplifying analysis. 479 * 480 * TODO: this theory needs evaluation. 481 ** ------------------------------------------------------------------------------------------------------------- */ 482 void Simplifier::negationsShouldImmediatelySucceedTheirLiteral(PabloBlock * const block) { 483 Statement * stmt = block->front(); 484 while (stmt) { 485 Statement * next = stmt->getNextNode(); 486 if (isa<If>(stmt)) { 487 negationsShouldImmediatelySucceedTheirLiteral(cast<If>(stmt)->getBody()); 488 } else if (isa<While>(stmt)) { 489 negationsShouldImmediatelySucceedTheirLiteral(cast<While>(stmt)->getBody()); 490 } else if (isa<Not>(stmt)) { 491 PabloAST * op = cast<Not>(stmt)->getExpr(); 492 if (LLVM_LIKELY(isa<Statement>(op))) { 493 PabloBlock * scope = cast<Statement>(op)->getParent(); 494 // If the operand is not in a nested scope, we can move it. 495 for (;;) { 496 if (LLVM_UNLIKELY(scope == nullptr)) { 497 stmt->insertAfter(cast<Statement>(op)); 498 break; 499 } 500 scope = scope->getParent(); 501 if (LLVM_UNLIKELY(scope == block)) { 502 break; 503 } 504 } 505 } 506 } 507 stmt = next; 508 } 509 } 510 511 } -
icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.hpp
r4886 r4919 15 15 Simplifier() = default; 16 16 private: 17 static void negationsShouldImmediatelySucceedTheirLiteral(PabloBlock * const block); 17 18 static void eliminateRedundantCode(PabloBlock * const block, ExpressionTable * predecessor = nullptr); 18 19 static PabloAST * fold(Variadic * const var, PabloBlock * const block); -
icGREP/icgrep-devel/icgrep/pablo/optimizers/schedulingprepass.cpp
r4899 r4919 3 3 #include <boost/circular_buffer.hpp> 4 4 #include <boost/container/flat_set.hpp> 5 #include <boost/container/flat_map.hpp> 5 6 #include <pablo/analysis/pabloverifier.hpp> 7 #include <boost/graph/adjacency_list.hpp> 8 #include <unordered_map> 6 9 7 10 #include <pablo/printer_pablos.h> … … 13 16 namespace pablo { 14 17 18 using DependencyGraph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, std::vector<PabloAST *>>; 19 using Vertex = DependencyGraph::vertex_descriptor; 20 using Map = std::unordered_map<const PabloAST *, Vertex>; 21 using ReadyPair = std::pair<unsigned, Vertex>; 22 using ReadySet = std::vector<ReadyPair>; 23 24 using weight_t = unsigned; 25 using TypeId = PabloAST::ClassTypeId; 26 using LiveSet = flat_set<Vertex>; 27 28 void schedule(PabloBlock * const block); 29 30 /** ------------------------------------------------------------------------------------------------------------- * 31 * @brief printGraph 32 ** ------------------------------------------------------------------------------------------------------------- */ 33 template <typename DependencyGraph> 34 static void printGraph(const DependencyGraph & G, const std::vector<unsigned> & ordering, const std::string name, raw_ostream & out) { 35 out << "*******************************************\n"; 36 out << "digraph " << name << " {\n"; 37 for (auto u : make_iterator_range(vertices(G))) { 38 if (G[u].empty()) { 39 continue; 40 } 41 out << "v" << u << " [label=\"" << ordering[u] << " : "; 42 bool newLine = false; 43 for (PabloAST * expr : G[u]) { 44 if (newLine) { 45 out << '\n'; 46 } 47 newLine = true; 48 if (isa<Statement>(expr)) { 49 PabloPrinter::print(cast<Statement>(expr), out); 50 } else { 51 PabloPrinter::print(expr, out); 52 } 53 } 54 out << "\"];\n"; 55 } 56 for (auto e : make_iterator_range(edges(G))) { 57 out << "v" << source(e, G) << " -> v" << target(e, G); 58 out << ";\n"; 59 } 60 61 out << "}\n\n"; 62 out.flush(); 63 } 64 15 65 /** ------------------------------------------------------------------------------------------------------------- * 16 66 * @brief resolveNestedUsages 17 67 ** ------------------------------------------------------------------------------------------------------------- */ 18 void SchedulingPrePass::resolveNestedUsages(Statement * const root, Statement * const stmt,Graph & G, Map & M, PabloBlock * const block) {68 static void resolveNestedUsages(Statement * const root, Statement * const stmt, DependencyGraph & G, Map & M, PabloBlock * const block) { 19 69 for (PabloAST * use : stmt->users()) { 20 70 if (LLVM_LIKELY(isa<Statement>(use))) { … … 23 73 for (PabloBlock * prior = scope->getParent(); prior; scope = prior, prior = prior->getParent()) { 24 74 if (prior == block) { 25 auto s = mResolvedScopes.find(scope); 26 assert (s != mResolvedScopes.end()); 27 assert (s->second); 28 auto v = M.find(s->second); 75 assert (scope->getBranch()); 76 auto v = M.find(scope->getBranch()); 29 77 assert (v != M.end()); 30 78 auto u = M.find(root); … … 41 89 } 42 90 43 ///** ------------------------------------------------------------------------------------------------------------- * 44 // * @brief printGraph 45 // ** ------------------------------------------------------------------------------------------------------------- */ 46 //template <typename Graph> 47 //static void printGraph(const Graph & G, const std::vector<unsigned> & ordering, const std::string name, raw_ostream & out) { 48 // out << "*******************************************\n"; 49 // out << "digraph " << name << " {\n"; 50 // for (auto u : make_iterator_range(vertices(G))) { 51 // out << "v" << u << " [label=\"" << ordering[u] << " : "; 52 // PabloPrinter::print(G[u], out); 53 // out << "\"];\n"; 54 // } 55 // for (auto e : make_iterator_range(edges(G))) { 56 // out << "v" << source(e, G) << " -> v" << target(e, G); 57 // out << ";\n"; 58 // } 59 60 // out << "}\n\n"; 61 // out.flush(); 62 //} 63 64 /** ------------------------------------------------------------------------------------------------------------- * 65 * @brief schedule 66 ** ------------------------------------------------------------------------------------------------------------- */ 67 void SchedulingPrePass::schedule(PabloBlock * const block) { 68 Graph G; 91 /** ------------------------------------------------------------------------------------------------------------- * 92 * @brief resolveNestedUsages 93 ** ------------------------------------------------------------------------------------------------------------- */ 94 static void computeDependencyGraph(DependencyGraph & G, PabloBlock * const block) { 69 95 Map M; 70 71 96 // Construct a use-def graph G representing the current scope block 72 97 for (Statement * stmt : *block) { 73 if (isa<If>(stmt) || isa<While>(stmt)) { 74 PabloBlock * body = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(); 75 mResolvedScopes.emplace(body, stmt); 76 schedule(body); 77 } 78 const auto u = add_vertex(stmt, G); 98 if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) { 99 schedule(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody()); 100 } 101 const auto u = add_vertex({stmt}, G); 79 102 M.insert(std::make_pair(stmt, u)); 80 103 for (unsigned i = 0; i != stmt->getNumOperands(); ++i) { … … 94 117 // Was this instruction computed by a nested block? 95 118 if (prior == block) { 96 auto s = mResolvedScopes.find(scope); 97 assert (s != mResolvedScopes.end()); 98 assert (s->second); 99 auto v = M.find(s->second); 119 assert (scope->getBranch()); 120 auto v = M.find(scope->getBranch()); 100 121 assert (v != M.end()); 101 122 if (v->second != u) { … … 109 130 } 110 131 } 111 // Do a second pass to ensure that we've accounted for any nested usage of a statement132 // Do a second pass to ensure that we've accounted for any nested usage of an If or While statement 112 133 for (Statement * stmt : *block) { 113 134 if (LLVM_UNLIKELY(isa<If>(stmt))) { … … 123 144 } 124 145 } 125 146 // Contract the graph 147 for (;;) { 148 bool done = true; 149 for (const Vertex u : make_iterator_range(vertices(G))) { 150 if (out_degree(u, G) == 1) { 151 const Vertex v = target(*(out_edges(u, G).first), G); 152 G[v].insert(G[v].begin(), G[u].begin(), G[u].end()); 153 for (auto e : make_iterator_range(in_edges(u, G))) { 154 add_edge(source(e, G), v, G); 155 } 156 G[u].clear(); 157 clear_vertex(u, G); 158 done = false; 159 } 160 } 161 if (done) { 162 break; 163 } 164 } 165 166 167 } 168 169 /** ------------------------------------------------------------------------------------------------------------- * 170 * @brief computeGraphOrdering 171 ** ------------------------------------------------------------------------------------------------------------- */ 172 std::vector<weight_t> computeGraphOrdering(const DependencyGraph & G) { 126 173 // Determine the bottom-up ordering of G 127 std::vector< unsigned> ordering(num_vertices(G));174 std::vector<weight_t> ordering(num_vertices(G)); 128 175 circular_buffer<Vertex> Q(num_vertices(G)); 129 176 for (const Vertex u : make_iterator_range(vertices(G))) { 130 if (out_degree(u, G) == 0 ) {177 if (out_degree(u, G) == 0 && G[u].size() > 0) { 131 178 ordering[u] = 1; 132 179 Q.push_back(u); 133 180 } 134 181 } 135 136 182 while (!Q.empty()) { 137 183 const Vertex u = Q.front(); … … 141 187 const Vertex v = source(ei, G); 142 188 if (ordering[v] == 0) { 143 unsignedweight = 0;189 weight_t weight = 0; 144 190 bool ready = true; 145 191 for (const auto ej : make_iterator_range(out_edges(v, G))) { 146 192 const Vertex w = target(ej, G); 147 const unsignedt = ordering[w];193 const weight_t t = ordering[w]; 148 194 if (t == 0) { 149 195 ready = false; … … 155 201 assert (weight < std::numeric_limits<unsigned>::max()); 156 202 assert (weight > 0); 157 ordering[v] = (weight + 1);203 ordering[v] = weight + 1; 158 204 Q.push_back(v); 159 205 } … … 161 207 } 162 208 } 209 return ordering; 210 } 211 212 /** ------------------------------------------------------------------------------------------------------------- * 213 * @brief schedule 214 ** ------------------------------------------------------------------------------------------------------------- */ 215 void schedule(PabloBlock * const block) { 216 DependencyGraph G; 217 computeDependencyGraph(G, block); 218 std::vector<weight_t> ordering = computeGraphOrdering(G); 219 220 // raw_os_ostream out(std::cerr); 221 // printGraph(G, ordering, "G", out); 163 222 164 223 // Compute the initial ready set 165 224 ReadySet readySet; 166 225 for (const Vertex u : make_iterator_range(vertices(G))) { 167 if (in_degree(u, G) == 0 ) {226 if (in_degree(u, G) == 0 && G[u].size() > 0) { 168 227 readySet.emplace_back(ordering[u], u); 169 228 } … … 180 239 block->setInsertPoint(nullptr); 181 240 182 183 // Rewrite the AST using the bottom-up ordering 241 // Rewrite the AST 184 242 while (!readySet.empty()) { 185 186 // Scan through the ready set to determine which one 'kills' the greatest number of inputs 187 // NOTE: the readySet is kept in sorted "distance to sink" order; thus those closest to a 188 // sink will be evaluated first. 189 double best = 0; 243 DependencyGraph::degree_size_type killed = 0; 190 244 auto chosen = readySet.begin(); 191 245 for (auto ri = readySet.begin(); ri != readySet.end(); ++ri) { 192 const Vertex u = std::get<1>(*ri); 193 const PabloAST * const expr = G[u]; 194 if (expr && (isa<Assign>(expr) || isa<Next>(expr))) { 246 DependencyGraph::degree_size_type kills = 0; 247 for (auto e : make_iterator_range(in_edges(ri->first, G))) { 248 if (out_degree(source(e, G), G) == 1) { 249 ++kills; 250 } 251 } 252 if (kills > killed) { 195 253 chosen = ri; 196 break; 197 } 198 double progress = 0; 199 for (auto ei : make_iterator_range(in_edges(u, G))) { 200 const auto v = source(ei, G); 201 const auto totalUsesOfIthOperand = out_degree(v, G); 202 if (LLVM_UNLIKELY(totalUsesOfIthOperand == 0)) { 203 progress += 1.0; 204 } else { 205 unsigned unscheduledUsesOfIthOperand = 0; 206 for (auto ej : make_iterator_range(out_edges(v, G))) { 207 if (ordering[target(ej, G)]) { // if this edge leads to an unscheduled statement 208 ++unscheduledUsesOfIthOperand; 209 } 210 } 211 progress += std::pow((double)(totalUsesOfIthOperand - unscheduledUsesOfIthOperand + 1) / (double)(totalUsesOfIthOperand), 2); 212 } 213 } 214 if (progress > best) { 215 chosen = ri; 216 best = progress; 254 killed = kills; 217 255 } 218 256 } … … 221 259 std::tie(weight, u) = *chosen; 222 260 readySet.erase(chosen); 261 clear_in_edges(u, G); 223 262 224 263 assert ("Error: SchedulingPrePass is attempting to reschedule a statement!" && (weight > 0)); 225 264 226 265 // insert the statement then mark it as written ... 227 block->insert(cast<Statement>(G[u])); 266 for (PabloAST * expr : G[u]) { 267 block->insert(cast<Statement>(expr)); 268 } 269 228 270 ordering[u] = 0; 229 271 // Now check whether any new instructions are ready … … 245 287 } 246 288 } 289 247 290 } 248 291 … … 250 293 * @brief schedule 251 294 ** ------------------------------------------------------------------------------------------------------------- */ 252 void SchedulingPrePass::schedule(PabloFunction & function) { 253 mResolvedScopes.emplace(function.getEntryBlock(), nullptr); 295 void schedule(PabloFunction & function) { 254 296 schedule(function.getEntryBlock()); 255 297 } … … 259 301 ** ------------------------------------------------------------------------------------------------------------- */ 260 302 bool SchedulingPrePass::optimize(PabloFunction & function) { 261 SchedulingPrePass pp; 262 pp.schedule(function); 303 schedule(function); 263 304 #ifndef NDEBUG 264 305 PabloVerifier::verify(function, "post-scheduling-prepass"); -
icGREP/icgrep-devel/icgrep/pablo/optimizers/schedulingprepass.h
r4896 r4919 1 1 #ifndef SCHEDULINGPREPASS_H 2 2 #define SCHEDULINGPREPASS_H 3 4 #include <boost/container/flat_map.hpp>5 #include <boost/graph/adjacency_list.hpp>6 #include <unordered_map>7 3 8 4 namespace pablo { 9 5 10 6 class PabloFunction; 11 class PabloBlock;12 class Statement;13 class PabloAST;14 7 15 8 class SchedulingPrePass { 16 9 public: 17 using Graph = boost::adjacency_list<boost::hash_setS, boost::vecS, boost::bidirectionalS, PabloAST *>;18 using Vertex = Graph::vertex_descriptor;19 using Map = std::unordered_map<const PabloAST *, Vertex>;20 using ScopeMap = boost::container::flat_map<const PabloBlock *, Statement *>;21 using ReadyPair = std::pair<unsigned, Vertex>;22 using ReadySet = std::vector<ReadyPair>;23 public:24 10 static bool optimize(PabloFunction & function); 25 11 protected: 26 void schedule(PabloFunction & function);27 void schedule(PabloBlock * const block);28 void resolveNestedUsages(Statement * const root, Statement * const stmt, Graph & G, Map & M, PabloBlock * const block);29 12 SchedulingPrePass() = default; 30 private:31 ScopeMap mResolvedScopes;32 13 }; 33 34 14 35 15 } 36 16 37 38 17 #endif // SCHEDULINGPREPASS_H -
icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp
r4900 r4919 47 47 #include <llvm/ADT/Twine.h> 48 48 #include <iostream> 49 #include <llvm/Support/raw_ostream.h> 50 #include <llvm/Support/FileSystem.h> 51 52 //#include <llvm/PassManager.h> 53 //#include <llvm/Transforms/IPO/PassManagerBuilder.h> 54 55 #include <hrtime.h> 49 56 50 57 static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR."); 51 58 static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(eIRDumpOptions)); 59 static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(eIRDumpOptions)); 60 52 61 53 62 static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces."); … … 76 85 llvm::Function * PabloCompiler::compile(PabloFunction * function) { 77 86 87 #ifdef PRINT_TIMING_INFORMATION 88 const timestamp_t pablo_compilation_start = read_cycle_counter(); 89 #endif 78 90 79 91 PabloBlock * const mainScope = function->getEntryBlock(); … … 126 138 127 139 // Clean up 128 delete mCarryManager; mCarryManager = nullptr; 129 140 delete mCarryManager; 141 mCarryManager = nullptr; 142 143 #ifdef PRINT_TIMING_INFORMATION 144 const timestamp_t pablo_compilation_end = read_cycle_counter(); 145 std::cerr << "PABLO COMPILATION TIME: " << (pablo_compilation_end - pablo_compilation_start) << std::endl; 146 #endif 147 148 // llvm::PassManager pm; 149 // llvm::PassManagerBuilder pmb; 150 // pmb.OptLevel = 3; 151 // pmb.populateModulePassManager(pm); 152 // pm.run(*mMod); 153 130 154 if (LLVM_UNLIKELY(DumpGeneratedIR)) { 131 mMod->dump(); 155 156 if (IROutputFilename.empty()) { 157 mMod->dump(); 158 } else { 159 std::error_code error; 160 llvm::raw_fd_ostream out(IROutputFilename, error, sys::fs::OpenFlags::F_None); 161 mMod->print(out, nullptr); 162 } 132 163 } 133 164 -
icGREP/icgrep-devel/icgrep/pablo/passes/factorizedfg.cpp
r4899 r4919 6 6 #include <pablo/passes/flattenassociativedfg.h> 7 7 #include <boost/container/flat_set.hpp> 8 #include <boost/circular_buffer.hpp> 9 #include <boost/graph/adjacency_list.hpp> 10 #include <boost/graph/adjacency_matrix.hpp> 11 #include <boost/graph/topological_sort.hpp> 12 #include <queue> 13 #include <tuple> 14 8 15 #include <set> 16 #include <type_traits> 17 18 #include <pablo/printer_pablos.h> 19 #include <iostream> 9 20 10 21 using namespace boost; 11 22 using namespace boost::container; 12 23 13 14 24 namespace pablo { 15 25 16 using VertexSet = std::vector<PabloAST *>;17 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}]18 using BicliqueSet = std::vector<Biclique>;19 26 using TypeId = PabloAST::ClassTypeId; 20 27 21 ///** ------------------------------------------------------------------------------------------------------------- * 22 // * @brief lower 23 // ** ------------------------------------------------------------------------------------------------------------- */ 24 //inline PabloAST * FactorizeDFG::lower(Variadic * const var, PabloBlock * block, OrderingMap & order) { 25 // PabloAST * result = nullptr; 26 // assert (var->getNumOperands() > 2); 27 //// // Sort the operands by their order in the current AST 28 //// std::sort(var->begin(), var->end(), 29 //// [&order](const PabloAST * const a, const PabloAST * const b) 30 //// { 31 //// return order.of(a) > order.of(b); 32 //// }); 33 //// unsigned operands = var->getNumOperands(); 34 //// // Swap the final two operands so that we can just append the temporary calculations onto the end 35 //// // and trust that the PENULTIMATE operand is the "latest" in the AST. 36 //// PabloAST * const item = var->getOperand(operands - 1); 37 //// var->setOperand(operands - 1, var->getOperand(operands - 2)); 38 //// var->setOperand(operands - 2, item); 39 // // Finally rewrite all of the variadic statements as binary operations 40 // block->setInsertPoint(var->getPrevNode()); 41 // while (var->getNumOperands() > 1) { 42 // PabloAST * const op2 = var->removeOperand(1); 43 // PabloAST * const op1 = var->removeOperand(0); 44 // assert (op1 != op2); 45 // if (isa<And>(var)) { 46 // result = block->createAnd(op1, op2); 47 // } else if (isa<Or>(var)) { 48 // result = block->createOr(op1, op2); 49 // } else { // if (isa<Xor>(var)) { 50 // result = block->createXor(op1, op2); 51 // } 52 // var->addOperand(result); 53 // } 54 // assert (result); 55 // return result; 56 //} 57 58 ///** ------------------------------------------------------------------------------------------------------------- * 59 // * @brief lower 60 // ** ------------------------------------------------------------------------------------------------------------- */ 61 //void FactorizeDFG::lower(PabloBlock * const block, OrderingMap & parent) { 62 // OrderingMap order(parent); 63 // Statement * stmt = block->front(); 64 // while (stmt) { 65 // if (isa<If>(stmt) || isa<While>(stmt)) { 66 // lower(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), order); 67 // if (isa<If>(stmt)) { 68 // for (Assign * def : cast<If>(stmt)->getDefined()) { 69 // order.enumerate(def); 70 // } 71 // } else { // if (isa<While>(stmt)) { 72 // for (Next * var : cast<While>(stmt)->getVariants()) { 73 // order.enumerate(var); 74 // } 75 // } 76 // } else { 77 // if ((isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) && (stmt->getNumOperands() > 2)) { 78 // // We should maintain an ordering list and sort each Variadic according to it prior to writing them out. 79 // PabloAST * result = lower(cast<Variadic>(stmt), block, order); 80 // order.enumerate(result); 81 // stmt = stmt->replaceWith(result, true); 82 // continue; 83 // } 84 // order.enumerate(stmt); 85 // } 86 // stmt = stmt->getNextNode(); 87 // } 88 //} 89 90 /** ------------------------------------------------------------------------------------------------------------- * 91 * @brief finalize 92 ** ------------------------------------------------------------------------------------------------------------- */ 93 inline Statement * FactorizeDFG::lower(Variadic * const var, PabloBlock * block) { 94 PabloAST * result = nullptr; 28 // TODO: move the "tryToPartiallyExtractVariadic" from the coalescedfg class into here to run prior to lowering? 29 30 /** ------------------------------------------------------------------------------------------------------------- * 31 * @brief lower 32 * 33 * The goal of this function is to try and lower a variadic operation into binary form while attempting to mitigate 34 * register pressure under the assumption that LLVM is not going to significantly change the IR (which was observed 35 * when assessing the ASM output of LLVM 3.6.1 using O3.) 36 ** ------------------------------------------------------------------------------------------------------------- */ 37 inline Statement * FactorizeDFG::lower(Variadic * const var, PabloBlock * block) const { 38 39 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *, int>; 40 using Vertex = Graph::vertex_descriptor; 41 using Map = flat_map<const PabloAST *, Vertex>; 42 using SchedulingData = std::pair<unsigned, PabloAST *>; 43 using SchedulingQueue = std::priority_queue<SchedulingData, std::vector<SchedulingData>, std::greater<SchedulingData>>; 44 45 const unsigned NOT_STEP = 1; 46 const unsigned BOOLEAN_STEP = 10; 47 const unsigned OTHER_STEP = 30; 48 const unsigned DESIRED_GAP = 30; 49 50 assert (var->getParent() == block); 95 51 assert (var->getNumOperands() > 2); 96 block->setInsertPoint(var->getPrevNode()); 97 while (var->getNumOperands() > 1) { 98 PabloAST * const op2 = var->removeOperand(1); 99 PabloAST * const op1 = var->removeOperand(0); 100 assert (op1 != op2); 52 53 // Begin by computing a graph G depicting which operands are used by which statements. We know that 54 // all of them are used by the Variadic but they might be used elsewhere in the current block. 55 56 57 // If this operand was defined in this block, we want to indicate that in G as well so that we 58 // don't mistakingly pair them together. 59 60 const unsigned operands = var->getNumOperands(); 61 62 Graph G(operands + 1); 63 Map M; 64 65 G[operands] = var; 66 M.emplace(var, operands); 67 68 for (Vertex u = 0; u != operands; ++u) { 69 PabloAST * const op = var->getOperand(u); 70 G[u] = op; 71 M.emplace(op, u); 72 assert ("AST structural error!" && (op->getNumUses() > 0)); 73 for (PabloAST * user : op->users()) { 74 if (LLVM_LIKELY(isa<Statement>(user))) { 75 Statement * usage = cast<Statement>(user); 76 PabloBlock * scope = usage->getParent(); 77 while (scope) { 78 if (scope == block) { 79 auto f = M.find(usage); 80 Vertex v = 0; 81 if (f == M.end()) { 82 v = add_vertex(usage, G); 83 M.emplace(usage, v); 84 } else { 85 v = f->second; 86 } 87 G[add_edge(u, v, G).first] = 0; 88 break; 89 } 90 usage = scope->getBranch(); 91 scope = scope->getParent(); 92 } 93 } 94 } 95 } 96 97 assert (M.count(var) == 1); 98 99 unsigned estQuantum = 0; 100 circular_buffer<std::pair<unsigned, Vertex>> defs(operands); 101 for (Statement * stmt : *block) { 102 switch (stmt->getClassTypeId()) { 103 case TypeId::And: 104 case TypeId::Or: 105 case TypeId::Xor: 106 estQuantum += BOOLEAN_STEP; 107 break; 108 case TypeId::Not: 109 estQuantum += NOT_STEP; 110 break; 111 default: 112 estQuantum += OTHER_STEP; 113 } 114 auto f = M.find(stmt); 115 if (LLVM_UNLIKELY(f != M.end())) { 116 const auto u = f->second; 117 if (LLVM_UNLIKELY(u == operands)) { 118 assert (stmt == var); 119 break; 120 } 121 for (auto e : make_iterator_range(in_edges(u, G))) { 122 G[e] = estQuantum; 123 } 124 if (u < operands) { 125 defs.push_back(std::make_pair(estQuantum + DESIRED_GAP, u)); 126 } 127 } 128 // Annotate G to indicate when we expect a statement will be available 129 while (defs.size() > 0) { 130 unsigned availQuantum = 0; 131 Vertex u = 0; 132 std::tie(availQuantum, u) = defs.front(); 133 if (availQuantum > estQuantum) { 134 break; 135 } 136 defs.pop_front(); 137 auto f = M.find(stmt); 138 Vertex v = 0; 139 if (f == M.end()) { 140 v = add_vertex(stmt, G); 141 M.emplace(stmt, v); 142 } else { 143 v = f->second; 144 } 145 G[add_edge(u, v, G).first] = estQuantum; 146 } 147 } 148 149 for (Vertex u = 0; u < operands; ++u) { 150 unsigned quantum = 0; 151 Vertex v = operands; 152 for (auto e : make_iterator_range(out_edges(u, G))) { 153 if (quantum < G[e]) { 154 quantum = G[e]; 155 v = target(e, G); 156 } 157 } 158 clear_out_edges(u, G); 159 G[add_edge(u, v, G).first] = quantum; 160 } 161 162 for (auto e : make_iterator_range(in_edges(operands, G))) { 163 G[e] = estQuantum; 164 } 165 166 assert (num_edges(G) == var->getNumOperands()); 167 168 SchedulingQueue Q; 169 while (num_edges(G) > 0) { 170 171 Graph::edge_descriptor f; 172 unsigned quantum = std::numeric_limits<unsigned>::max(); 173 for (auto e : make_iterator_range(edges(G))) { 174 if (in_degree(source(e, G), G) == 0) { 175 if (quantum > G[e]) { 176 quantum = G[e]; 177 f = e; 178 } 179 } 180 } 181 182 assert ("No edge selected!" && (quantum < std::numeric_limits<unsigned>::max())); 183 184 const auto u = source(f, G); 185 assert (u < operands); 186 const auto v = target(f, G); 187 assert (isa<Statement>(G[v])); 188 // Since this might have been a target of a prior pairing, read the original operand value instead of 189 // G when checking which value is indicated by u. 190 PabloAST * const op1 = var->getOperand(u); 191 block->setInsertPoint(cast<Statement>(G[v])); 192 remove_edge(f, G); 193 194 if (LLVM_LIKELY(Q.size() > 0)) { 195 unsigned minQuantum = 0; PabloAST * op2 = nullptr; 196 std::tie(minQuantum, op2) = Q.top(); 197 if (minQuantum < quantum) { 198 Q.pop(); 199 PabloAST * result = nullptr; 200 if (isa<And>(var)) { 201 result = block->createAnd(op1, op2); 202 } else if (isa<Or>(var)) { 203 result = block->createOr(op1, op2); 204 } else { // if (isa<Xor>(var)) { 205 result = block->createXor(op1, op2); 206 } 207 Q.emplace(quantum + DESIRED_GAP, result); 208 G[v] = result; // update the insertion point node value 209 continue; 210 } 211 } 212 Q.emplace(quantum, op1); 213 } 214 215 // If we've done our best to schedule the statements and have operands remaining in our queue, generate a 216 // tree of the remaining operands. 217 while (Q.size() > 1) { 218 unsigned q1 = 0; PabloAST * op1 = nullptr; 219 std::tie(q1, op1) = Q.top(); 220 Q.pop(); 221 222 unsigned q2 = 0; PabloAST * op2 = nullptr; 223 std::tie(q2, op2) = Q.top(); 224 Q.pop(); 225 226 PabloAST * result = nullptr; 101 227 if (isa<And>(var)) { 102 228 result = block->createAnd(op1, op2); … … 106 232 result = block->createXor(op1, op2); 107 233 } 108 var->addOperand(result); 109 } 234 Q.emplace(std::max(q1, q2) + DESIRED_GAP, result); 235 } 236 237 assert (Q.size() == 1); 238 return var->replaceWith(std::get<1>(Q.top())); 239 } 240 241 #if 0 242 using EnumerationMap = flat_map<const PabloAST *, unsigned>; 243 244 inline void enumerate(PabloBlock * const block, EnumerationMap & M, std::vector<Statement *> & S) { 245 for (Statement * stmt : *block) { 246 M.emplace(stmt, static_cast<int>(S.size())); 247 S.push_back(stmt); 248 } 249 } 250 251 inline static unsigned indexOf(const PabloAST * const expr, const EnumerationMap & M) { 252 assert (expr); 253 const auto f = M.find(expr); 254 assert (f != M.end()); 255 return f->second; 256 } 257 258 /** ------------------------------------------------------------------------------------------------------------- * 259 * @brief lower 260 * 261 * The goal of this function is to try and lower a variadic operation into binary form while attempting to mitigate 262 * register pressure under the assumption that LLVM is not going to significantly change the IR (which was observed 263 * when assessing the ASM output of LLVM 3.6.1 using O3.) 264 ** ------------------------------------------------------------------------------------------------------------- */ 265 inline Statement * FactorizeDFG::lower(Variadic * const var, PabloBlock * block) const { 266 267 assert (var->getNumOperands() > 2); 268 269 EnumerationMap M; 270 std::vector<Statement *> S; 271 272 enumerate(block, M, S); 273 274 const int varIdx = indexOf(var, M); 275 276 circular_buffer<std::tuple<unsigned, unsigned, PabloAST *>> lowered(var->getNumOperands()); 277 for (unsigned i = 0; i != var->getNumOperands(); ++i) { 278 PabloAST * const op = var->getOperand(i); 279 280 unsigned defIdx = 0; 281 if (LLVM_LIKELY(isa<Statement>(op))) { 282 Statement * producer = cast<Statement>(op); 283 PabloBlock * scope = producer->getParent(); 284 while (scope && (scope != block)) { 285 producer = scope->getBranch(); 286 scope = scope->getParent(); 287 } 288 defIdx = producer ? indexOf(producer, M) : 0; 289 } 290 291 unsigned useIdx = defIdx; 292 for (PabloAST * user : op->users()) { 293 // if this user is in the current scope or a nested scope of this block 294 if ((cast<Statement>(user)->getParent() == block) && (user != var)) { 295 useIdx = std::max(useIdx, indexOf(user, M)); 296 } 297 } 298 if (useIdx > varIdx) { 299 useIdx = varIdx; 300 } 301 assert (!lowered.full()); 302 lowered.push_back(std::make_tuple(useIdx, defIdx, op)); 303 } 304 305 std::sort(lowered.begin(), lowered.end()); 306 307 PabloAST * result = nullptr; 308 while (lowered.size() > 1) { 309 310 PabloAST * op1, * op2; 311 unsigned def1, def2; 312 unsigned use1, use2; 313 314 std::tie(use1, def1, op1) = lowered.front(); lowered.pop_front(); 315 std::tie(use2, def2, op2) = lowered.front(); lowered.pop_front(); 316 317 // if (((def1 < def2) ? ((def1 + 3) > def2) : ((def2 + 3) > def1)) && (lowered.size() > 0)) { 318 // unsigned def3 = def1; 319 // unsigned use3 = use1; 320 // PabloAST * op3 = op1; 321 // std::tie(use1, def1, op1) = lowered.front(); lowered.pop_front(); 322 // lowered.push_front(std::make_tuple(use3, def3, op3)); 323 // } 324 325 // Is the last use of op1 prior to the definition of op2? 326 if (use1 < def2) { 327 use1 = def2; 328 } 329 330 block->setInsertPoint(S[use1]); 331 332 if (isa<And>(var)) { 333 result = block->createAnd(op1, op2); 334 } else if (isa<Or>(var)) { 335 result = block->createOr(op1, op2); 336 } else { // if (isa<Xor>(var)) { 337 result = block->createXor(op1, op2); 338 } 339 340 S[use1] = cast<Statement>(result); 341 342 assert (!lowered.full()); 343 lowered.push_front(std::make_tuple(use1, use1, result)); 344 } 345 assert (result); 346 110 347 return var->replaceWith(result, true); 111 348 } 112 113 /** ------------------------------------------------------------------------------------------------------------- * 114 * @brief finalize 115 ** ------------------------------------------------------------------------------------------------------------- */ 116 void FactorizeDFG::lower(PabloBlock * const block) { 349 #endif 350 351 /** ------------------------------------------------------------------------------------------------------------- * 352 * @brief lower 353 ** ------------------------------------------------------------------------------------------------------------- */ 354 void FactorizeDFG::lower(PabloBlock * const block) const { 117 355 Statement * stmt = block->front(); 118 356 while (stmt) { 119 357 if (isa<If>(stmt) || isa<While>(stmt)) { 120 358 lower(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody()); 121 } else if ((isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) && (stmt->getNumOperands() > 2)) { 122 // We should maintain an ordering list and sort each Variadic according to it prior to writing them out. 359 } else if ((stmt->getNumOperands() > 2) && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) { 123 360 stmt = lower(cast<Variadic>(stmt), block); 124 361 continue; … … 128 365 } 129 366 130 ///** ------------------------------------------------------------------------------------------------------------- * 131 // * @brief lower 132 // ** ------------------------------------------------------------------------------------------------------------- */ 133 //inline void FactorizeDFG::lower(PabloFunction & function) { 134 // OrderingMap ordering; 135 // for (unsigned i = 0; i != function.getNumOfParameters(); ++i) { 136 // ordering.enumerate(function.getParameter(i)); 137 // } 138 // lower(function.getEntryBlock(), ordering); 139 //} 367 /** ------------------------------------------------------------------------------------------------------------- * 368 * @brief lower 369 ** ------------------------------------------------------------------------------------------------------------- */ 370 inline void FactorizeDFG::lower(PabloFunction & function) const { 371 lower(function.getEntryBlock()); 372 } 373 374 #if 0 375 376 using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, PabloAST *>; 377 using Vertex = Graph::vertex_descriptor; 378 using Map = flat_map<const PabloAST *, Vertex>; 379 380 using VertexSet = std::vector<PabloAST *>; 381 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}] 382 using BicliqueSet = std::set<Biclique>; 383 using TypeId = PabloAST::ClassTypeId; 140 384 141 385 /** ------------------------------------------------------------------------------------------------------------- * … … 145 389 * bicliques" by Alexe et. al. (2003). 146 390 ** ------------------------------------------------------------------------------------------------------------- */ 147 static BicliqueSet enumerateBicliques(Variadic * const var) {148 using IntersectionSets = std::set<VertexSet>;391 inline void FactorizeDFG::enumerateBicliques(Variadic * const var, BicliqueSet & bicliques) { 392 using IntersectionSets = flat_set<VertexSet>; 149 393 150 394 IntersectionSets B1; … … 198 442 } 199 443 200 BicliqueSet bicliques;201 unsigned userSize = 2;202 444 for (auto Bi = B.begin(); Bi != B.end(); ++Bi) { 203 if (userSize <= Bi->size()) { 204 VertexSet Ai(var->begin(), var->end()); 205 for (const PabloAST * user : *Bi) { 206 VertexSet Aj(cast<Variadic>(user)->begin(), cast<Variadic>(user)->end()); 207 std::sort(Aj.begin(), Aj.end()); 208 VertexSet Ak; 209 Ak.reserve(std::min<unsigned>(Ai.size(), Aj.size())); 210 std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak)); 211 Ai.swap(Ak); 212 } 213 if (Ai.size() > 1) { 214 if (userSize < Bi->size()) { 215 bicliques.clear(); 216 userSize = Bi->size(); 217 } 218 bicliques.emplace_back(std::move(Ai), std::move(*Bi)); 219 } 220 } 221 } 222 return bicliques; 223 } 224 225 /** ------------------------------------------------------------------------------------------------------------- * 226 * @brief pick 227 ** ------------------------------------------------------------------------------------------------------------- */ 228 inline static Biclique pick(BicliqueSet && bicliques) { 229 unsigned best_w = 0; 230 auto best_c = bicliques.begin(); 231 for (auto c = bicliques.begin(); c != bicliques.end(); ++c) { 232 const unsigned w = std::get<0>(*c).size(); 233 if (best_w < w) { 234 best_c = c; 235 best_w = w; 236 } 237 } 238 return std::move(*best_c); 239 } 240 241 /** ------------------------------------------------------------------------------------------------------------- * 242 * @brief chooseInsertionPoint 243 ** ------------------------------------------------------------------------------------------------------------- */ 244 inline PabloBlock * FactorizeDFG::chooseInsertionScope(const ASTVector & users) { 445 VertexSet Ai(var->begin(), var->end()); 446 for (const PabloAST * user : *Bi) { 447 VertexSet Aj(cast<Variadic>(user)->begin(), cast<Variadic>(user)->end()); 448 std::sort(Aj.begin(), Aj.end()); 449 VertexSet Ak; 450 Ak.reserve(std::min<unsigned>(Ai.size(), Aj.size())); 451 std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak)); 452 Ai.swap(Ak); 453 } 454 if (Ai.size() > 1 && Bi->size() > 1) { 455 bicliques.emplace(std::move(Ai), std::move(*Bi)); 456 } 457 } 458 } 459 460 /** ------------------------------------------------------------------------------------------------------------- * 461 * @brief intersects 462 ** ------------------------------------------------------------------------------------------------------------- */ 463 template <class Type> 464 inline bool intersects(const Type & A, const Type & B) { 465 auto first1 = A.begin(), last1 = A.end(); 466 auto first2 = B.begin(), last2 = B.end(); 467 while (first1 != last1 && first2 != last2) { 468 if (*first1 < *first2) { 469 ++first1; 470 } else if (*first2 < *first1) { 471 ++first2; 472 } else { 473 return true; 474 } 475 } 476 return false; 477 } 478 479 /** ------------------------------------------------------------------------------------------------------------- * 480 * @brief independentCliqueSets 481 ** ------------------------------------------------------------------------------------------------------------- */ 482 inline void FactorizeDFG::independentCliqueSets(BicliqueSet & bicliques) { 483 using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>; 484 485 const auto l = bicliques.size(); 486 IndependentSetGraph I(l); 487 488 // Initialize our weights and determine the constraints 489 for (auto i = bicliques.begin(); i != bicliques.end(); ++i) { 490 I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2); 491 for (auto j = i; ++j != bicliques.end(); ) { 492 if (intersects(i->second, j->second) && intersects(i->first, j->first)) { 493 add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I); 494 } 495 } 496 } 497 498 // Use the greedy algorithm to choose our independent set 499 std::vector<Vertex> selected; 500 for (;;) { 501 unsigned w = 0; 502 Vertex u = 0; 503 for (unsigned i = 0; i != l; ++i) { 504 if (I[i] > w) { 505 w = I[i]; 506 u = i; 507 } 508 } 509 if (w < 2) break; 510 selected.push_back(u); 511 I[u] = 0; 512 for (auto v : make_iterator_range(adjacent_vertices(u, I))) { 513 I[v] = 0; 514 } 515 } 516 517 // Sort the selected list and then remove the unselected cliques 518 std::sort(selected.begin(), selected.end(), std::greater<Vertex>()); 519 auto end = bicliques.end(); 520 for (const unsigned offset : selected) { 521 end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1; 522 } 523 bicliques.erase(bicliques.begin(), end); 524 } 525 526 /** ------------------------------------------------------------------------------------------------------------- * 527 * @brief findInsertionPoint 528 * 529 * Look for the first user in this scope; if none can be found, look for the last operand. 530 ** ------------------------------------------------------------------------------------------------------------- */ 531 inline PabloBlock * FactorizeDFG::findInsertionPoint(NodeSet & operands, NodeSet & users) const { 245 532 246 533 ScopeSet scopes; 247 534 scopes.reserve(users.size()); 248 535 249 flat_set<PabloBlock *> visited;250 visited.reserve(users.size());251 252 536 for (PabloAST * user : users) { 253 PabloBlock * const scope = cast<Statement>(user)->getParent(); 254 assert (scope); 255 if (visited.insert(scope).second) { 537 PabloBlock * const scope = cast<Statement>(user)->getParent(); assert (scope); 538 if (std::find(scopes.begin(), scopes.end(), scope) == scopes.end()) { 256 539 scopes.push_back(scope); 257 540 } 541 } 542 543 if (LLVM_UNLIKELY(scopes.size() == 1)) { 544 PabloBlock * const scope = scopes.front(); 545 Statement * stmt = scope->front(); 546 std::sort(users.begin(), users.end()); 547 while (stmt) { 548 if (LLVM_UNLIKELY(std::binary_search(users.begin(), users.end(), stmt))) { 549 scope->setInsertPoint(stmt->getPrevNode()); 550 return scope; 551 } 552 stmt = stmt->getNextNode(); 553 } 554 llvm_unreachable("Failed to locate user in single scope block!"); 258 555 } 259 556 … … 286 583 } 287 584 assert (scopes.size() == 1); 288 return scopes[0]; 289 } 290 291 /** ------------------------------------------------------------------------------------------------------------- * 292 * @brief findInsertionPoint 293 ** ------------------------------------------------------------------------------------------------------------- */ 294 void FactorizeDFG::findInsertionPoint(const ASTVector & operands, PabloBlock * const scope) { 295 const flat_set<const PabloAST *> ops(operands.begin(), operands.end()); 585 586 PabloBlock * const scope = scopes.front(); 296 587 Statement * stmt = scope->back(); 297 s cope->setInsertPoint(nullptr);588 std::sort(operands.begin(), operands.end()); 298 589 while (stmt) { 299 590 if (isa<If>(stmt)) { 300 591 for (Assign * def : cast<If>(stmt)->getDefined()) { 301 if ( ops.count(def)) {592 if (LLVM_UNLIKELY(std::binary_search(operands.begin(), operands.end(), def))) { 302 593 scope->setInsertPoint(stmt); 303 return ;594 return scope; 304 595 } 305 596 } 306 597 } else if (isa<While>(stmt)) { 307 598 for (Next * var : cast<While>(stmt)->getVariants()) { 308 if ( ops.count(var)) {599 if (LLVM_UNLIKELY(std::binary_search(operands.begin(), operands.end(), var))) { 309 600 scope->setInsertPoint(stmt); 310 return ;311 } 312 } 313 } else if ( ops.count(stmt)) {601 return scope; 602 } 603 } 604 } else if (LLVM_UNLIKELY(std::binary_search(operands.begin(), operands.end(), stmt))) { 314 605 scope->setInsertPoint(stmt); 315 break;606 return scope; 316 607 } 317 608 stmt = stmt->getPrevNode(); 318 609 } 319 } 320 321 /** ------------------------------------------------------------------------------------------------------------- * 322 * @brief cse 323 ** ------------------------------------------------------------------------------------------------------------- */ 324 inline void FactorizeDFG::cse(Variadic * var) { 325 while (var->getNumOperands() > 2) { 326 BicliqueSet bicliques = enumerateBicliques(var); 327 if (bicliques.empty()) { 328 break; 329 } 330 ASTVector operands; 331 ASTVector users; 332 std::tie(operands, users) = pick(std::move(bicliques)); 333 PabloBlock * const block = chooseInsertionScope(users); 334 findInsertionPoint(operands, block); 610 scope->setInsertPoint(nullptr); 611 return scope; 612 } 613 614 /** ------------------------------------------------------------------------------------------------------------- * 615 * @brief processBicliques 616 ** ------------------------------------------------------------------------------------------------------------- */ 617 void FactorizeDFG::processBicliques(BicliqueSet & bicliques) const { 618 independentCliqueSets(bicliques); 619 for (Biclique & B : bicliques) { 620 VertexSet & operands = B.first; 621 VertexSet & users = B.second; 622 PabloBlock * const block = findInsertionPoint(operands, users); 335 623 Variadic * factored = nullptr; 336 if (isa<And>( var)) {624 if (isa<And>(users.front())) { 337 625 factored = block->createAnd(operands.begin(), operands.end()); 338 } else if (isa<Or>( var)) {626 } else if (isa<Or>(users.front())) { 339 627 factored = block->createOr(operands.begin(), operands.end()); 340 628 } else { // if (isa<Xor>(var)) { … … 348 636 } 349 637 } 350 } 351 352 /** ------------------------------------------------------------------------------------------------------------- * 353 * @brief cse 638 bicliques.clear(); 639 } 640 641 /** ------------------------------------------------------------------------------------------------------------- * 642 * @brief factor 354 643 * 355 644 * Perform common subexpression elimination on the Variadic statements 356 645 ** ------------------------------------------------------------------------------------------------------------- */ 357 void FactorizeDFG:: cse(PabloBlock * const block){646 void FactorizeDFG::factor(PabloBlock * const block, BicliqueSet & bicliques) const { 358 647 Statement * stmt = block->front(); 359 648 while (stmt) { 360 if (isa<If>(stmt) || isa<While>(stmt)) { 361 cse(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody()); 649 if (isa<If>(stmt) || isa<While>(stmt)) { 650 processBicliques(bicliques); 651 factor(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), bicliques); 652 } else if (stmt->getNumOperands() > 2 && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) { 653 enumerateBicliques(cast<Variadic>(stmt), bicliques); 654 } 655 stmt = stmt->getNextNode(); 656 } 657 processBicliques(bicliques); 658 } 659 660 /** ------------------------------------------------------------------------------------------------------------- * 661 * @brief factor 662 ** ------------------------------------------------------------------------------------------------------------- */ 663 inline void FactorizeDFG::factor(PabloFunction & function) const { 664 BicliqueSet bicliques; 665 factor(function.getEntryBlock(), bicliques); 666 } 667 668 #endif 669 670 #if 1 671 672 using VertexSet = std::vector<PabloAST *>; 673 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}] 674 using BicliqueSet = std::vector<Biclique>; 675 using TypeId = PabloAST::ClassTypeId; 676 677 /** ------------------------------------------------------------------------------------------------------------- * 678 * @brief findAllFactoringsOf 679 * 680 * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal 681 * bicliques" by Alexe et. al. (2003). 682 ** ------------------------------------------------------------------------------------------------------------- */ 683 inline BicliqueSet FactorizeDFG::findAllFactoringsOf(Variadic * const var) { 684 using IntersectionSets = flat_set<VertexSet>; 685 686 IntersectionSets B1; 687 const TypeId typeId = var->getClassTypeId(); 688 for (unsigned i = 0; i != var->getNumOperands(); ++i) { 689 PabloAST * const op = var->getOperand(i); 690 VertexSet B; 691 B.reserve(op->getNumUses()); 692 for (PabloAST * user : op->users()) { 693 if (user->getClassTypeId() == typeId) { 694 // Only consider a user who is in the same or a nested scope? 695 PabloBlock * scope = cast<Variadic>(user)->getParent(); 696 while (scope) { 697 if (scope == var->getParent()) { 698 B.push_back(user); 699 break; 700 } 701 scope = scope->getParent(); 702 } 703 704 // B.push_back(user); 705 } 706 } 707 std::sort(B.begin(), B.end()); 708 B1.insert(std::move(B)); 709 } 710 711 IntersectionSets B(B1); 712 713 IntersectionSets Bi; 714 715 VertexSet clique; 716 for (auto i = B1.begin(); i != B1.end(); ++i) { 717 for (auto j = i; ++j != B1.end(); ) { 718 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique)); 719 if (clique.size() > 0) { 720 if (B.count(clique) == 0) { 721 Bi.insert(clique); 722 } 723 clique.clear(); 724 } 725 } 726 } 727 728 while (!Bi.empty()) { 729 B.insert(Bi.begin(), Bi.end()); 730 IntersectionSets Bk; 731 for (auto i = B1.begin(); i != B1.end(); ++i) { 732 for (auto j = Bi.begin(); j != Bi.end(); ++j) { 733 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique)); 734 if (clique.size() > 0) { 735 if (B.count(clique) == 0) { 736 Bk.insert(clique); 737 } 738 clique.clear(); 739 } 740 } 741 } 742 Bi.swap(Bk); 743 } 744 745 BicliqueSet bicliques; 746 for (auto Bi = B.begin(); Bi != B.end(); ++Bi) { 747 VertexSet Ai(var->begin(), var->end()); 748 for (const PabloAST * user : *Bi) { 749 VertexSet Aj(cast<Variadic>(user)->begin(), cast<Variadic>(user)->end()); 750 std::sort(Aj.begin(), Aj.end()); 751 VertexSet Ak; 752 Ak.reserve(std::min<unsigned>(Ai.size(), Aj.size())); 753 std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak)); 754 Ai.swap(Ak); 755 } 756 if (Ai.size() > 1 && Bi->size() > 1) { 757 bicliques.emplace_back(std::move(Ai), std::move(*Bi)); 758 } 759 } 760 return bicliques; 761 } 762 763 /** ------------------------------------------------------------------------------------------------------------- * 764 * @brief intersects 765 ** ------------------------------------------------------------------------------------------------------------- */ 766 template <class Type> 767 inline bool intersects(const Type & A, const Type & B) { 768 auto first1 = A.begin(), last1 = A.end(); 769 auto first2 = B.begin(), last2 = B.end(); 770 assert (std::is_sorted(first1, last1)); 771 assert (std::is_sorted(first2, last2)); 772 while (first1 != last1 && first2 != last2) { 773 if (*first1 < *first2) { 774 ++first1; 775 } else if (*first2 < *first1) { 776 ++first2; 777 } else { 778 return true; 779 } 780 } 781 return false; 782 } 783 784 /** ------------------------------------------------------------------------------------------------------------- * 785 * @brief independentCliqueSets 786 ** ------------------------------------------------------------------------------------------------------------- */ 787 inline void FactorizeDFG::independentCliqueSets(BicliqueSet & bicliques) { 788 using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>; 789 using Vertex = IndependentSetGraph::vertex_descriptor; 790 791 const auto l = bicliques.size(); 792 IndependentSetGraph I(l); 793 794 // Initialize our weights and determine the constraints 795 for (auto i = bicliques.begin(); i != bicliques.end(); ++i) { 796 I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2); 797 for (auto j = i; ++j != bicliques.end(); ) { 798 if (intersects(i->second, j->second) && intersects(i->first, j->first)) { 799 add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I); 800 } 801 } 802 } 803 804 // Use the greedy algorithm to choose our independent set 805 std::vector<Vertex> selected; 806 for (;;) { 807 unsigned w = 0; 808 Vertex u = 0; 809 for (unsigned i = 0; i != l; ++i) { 810 if (I[i] > w) { 811 w = I[i]; 812 u = i; 813 } 814 } 815 if (w < 2) break; 816 selected.push_back(u); 817 I[u] = 0; 818 for (auto v : make_iterator_range(adjacent_vertices(u, I))) { 819 I[v] = 0; 820 } 821 } 822 823 // Sort the selected list and then remove the unselected cliques 824 std::sort(selected.begin(), selected.end(), std::greater<Vertex>()); 825 auto end = bicliques.end(); 826 for (const unsigned offset : selected) { 827 end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1; 828 } 829 bicliques.erase(bicliques.begin(), end); 830 } 831 832 /** ------------------------------------------------------------------------------------------------------------- * 833 * @brief findInsertionScope 834 ** ------------------------------------------------------------------------------------------------------------- */ 835 inline PabloBlock * FactorizeDFG::findInsertionScope(const NodeSet & users) const { 836 ScopeSet scopes; 837 scopes.reserve(users.size()); 838 839 for (PabloAST * user : users) { 840 PabloBlock * const scope = cast<Statement>(user)->getParent(); assert (scope); 841 if (std::find(scopes.begin(), scopes.end(), scope) == scopes.end()) { 842 scopes.push_back(scope); 843 } 844 } 845 846 while (scopes.size() > 1) { 847 // Find the LCA of both scopes then add the LCA back to the list of scopes. 848 PabloBlock * scope1 = scopes.back(); scopes.pop_back(); 849 PabloBlock * scope2 = scopes.back(); scopes.pop_back(); 850 if (scope1 != scope2) { 851 unsigned depth1 = scopeDepthOf(scope1); 852 unsigned depth2 = scopeDepthOf(scope2); 853 // If one of these scopes is nested deeper than the other, scan upwards through 854 // the scope tree until both scopes are at the same depth. 855 while (depth1 > depth2) { 856 scope1 = scope1->getParent(); 857 --depth1; 858 } 859 while (depth1 < depth2) { 860 scope2 = scope2->getParent(); 861 --depth2; 862 } 863 // Then iteratively step backwards until we find a matching set of scopes; this 864 // must be the LCA of our original scopes. 865 while (scope1 != scope2) { 866 scope1 = scope1->getParent(); 867 scope2 = scope2->getParent(); 868 } 869 assert (scope1 && scope2); 870 } 871 if (LLVM_LIKELY(std::find(scopes.begin(), scopes.end(), scope1) == scopes.end())) { 872 scopes.push_back(scope1); 873 } 874 } 875 assert (scopes.size() == 1); 876 877 return scopes.front(); 878 } 879 880 /** ------------------------------------------------------------------------------------------------------------- * 881 * @brief makeCheckSet 882 ** ------------------------------------------------------------------------------------------------------------- */ 883 FactorizeDFG::CheckSet FactorizeDFG::makeCheckSet(PabloBlock * const scope, const NodeSet & values) const { 884 CheckSet checks; 885 assert (scope); 886 const unsigned baseScopeDepth = scopeDepthOf(scope); 887 for (PabloAST * value : values) { 888 if (isa<Statement>(value)) { 889 unsigned scopeDepth = scopeDepthOf(cast<Statement>(value)->getParent()); 890 // Is this from a preceeding scope? Or a nested scope? 891 if (scopeDepth < baseScopeDepth) { 892 continue; 893 } else if (scopeDepth > baseScopeDepth) { 894 // If we're in a nested scope, we want to know what branch statement enters it and 895 // add that to our checks instead of the operand itself. 896 PabloBlock * nested = cast<Statement>(value)->getParent(); 897 for (;;) { 898 assert (nested); 899 value = nested->getBranch(); 900 nested = nested->getParent(); 901 if (nested == scope) { 902 break; 903 } 904 } 905 } 906 checks.insert(value); 907 } 908 } 909 return checks; 910 } 911 912 /** ------------------------------------------------------------------------------------------------------------- * 913 * @brief firstIn 914 ** ------------------------------------------------------------------------------------------------------------- */ 915 inline Statement * FactorizeDFG::firstIn(PabloBlock * const scope, Statement * stmt, const NodeSet & users) const { 916 const auto checks = makeCheckSet(scope, users); 917 while (stmt) { 918 if (LLVM_UNLIKELY(checks.count(stmt) != 0)) { 919 break; 920 } 921 stmt = stmt->getNextNode(); 922 } 923 return stmt; 924 } 925 926 /** ------------------------------------------------------------------------------------------------------------- * 927 * @brief lastIn 928 ** ------------------------------------------------------------------------------------------------------------- */ 929 inline Statement * FactorizeDFG::lastIn(PabloBlock * const scope, Statement * stmt, const NodeSet & operands) const { 930 const auto checks = makeCheckSet(scope, operands); 931 while (stmt) { 932 if (LLVM_UNLIKELY(checks.count(stmt) != 0)) { 933 break; 934 } 935 stmt = stmt->getPrevNode(); 936 } 937 return stmt; 938 } 939 940 /** ------------------------------------------------------------------------------------------------------------- * 941 * @brief findInsertionPoint 942 * 943 * Look for the first user in this scope; if none can be found, look for the last operand. 944 ** ------------------------------------------------------------------------------------------------------------- */ 945 inline PabloBlock * FactorizeDFG::findInsertionPoint(const NodeSet & operands, const NodeSet & users) const { 946 PabloBlock * const scope = findInsertionScope(users); 947 Statement * const lastOperand = lastIn(scope, scope->back(), operands); 948 Statement * const firstUsage = firstIn(scope, lastOperand, users); 949 scope->setInsertPoint(firstUsage ? firstUsage->getPrevNode() : lastOperand); 950 return scope; 951 } 952 953 /** ------------------------------------------------------------------------------------------------------------- * 954 * @brief factor 955 ** ------------------------------------------------------------------------------------------------------------- */ 956 inline void FactorizeDFG::factor(Variadic * const var, Variadics & factors) const { 957 if (var->getNumOperands() > 2) { 958 BicliqueSet S = findAllFactoringsOf(var); 959 if (S.empty()) return; 960 independentCliqueSets(S); 961 assert (S.size() > 0); 962 for (Biclique & B : S) { 963 VertexSet & operands = B.first; 964 VertexSet & users = B.second; 965 PabloBlock * const block = findInsertionPoint(operands, users); 966 Variadic * factoring = nullptr; 967 if (isa<And>(var)) { 968 factoring = block->createAnd(operands.begin(), operands.end()); 969 } else if (isa<Or>(var)) { 970 factoring = block->createOr(operands.begin(), operands.end()); 971 } else { // if (isa<Xor>(var)) { 972 factoring = block->createXor(operands.begin(), operands.end()); 973 } 974 for (PabloAST * user : users) { 975 for (PabloAST * op : operands) { 976 cast<Variadic>(user)->deleteOperand(op); 977 } 978 cast<Variadic>(user)->addOperand(factoring); 979 } 980 if (factoring->getNumOperands() > 2) { 981 if (LLVM_UNLIKELY(factors.full())) { 982 factors.set_capacity(factors.capacity() + factors.capacity() / 2); 983 } 984 factors.push_back(factoring); 985 } 986 } 987 if (var->getNumOperands() > 2) { 988 if (LLVM_UNLIKELY(factors.full())) { 989 factors.set_capacity(factors.capacity() + factors.capacity() / 2); 990 } 991 factors.push_back(var); 992 } else if (var->getNumOperands() == 1) { 993 var->replaceWith(var->getOperand(0)); 994 } 995 } 996 } 997 998 /** ------------------------------------------------------------------------------------------------------------- * 999 * @brief factor 1000 * 1001 * Perform common subexpression elimination on the Variadic statements 1002 ** ------------------------------------------------------------------------------------------------------------- */ 1003 void FactorizeDFG::factor(PabloBlock * const block, Variadics & vars) const { 1004 Statement * stmt = block->front(); 1005 while (stmt) { 1006 if (isa<If>(stmt) || isa<While>(stmt)) { 1007 factor(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), vars); 1008 } else if (stmt->getNumOperands() > 2 && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) { 1009 assert (!vars.full()); 1010 vars.push_back(cast<Variadic>(stmt)); 1011 } 1012 stmt = stmt->getNextNode(); 1013 } 1014 } 1015 1016 /** ------------------------------------------------------------------------------------------------------------- * 1017 * @brief factor 1018 ** ------------------------------------------------------------------------------------------------------------- */ 1019 inline void FactorizeDFG::factor(PabloFunction & function) const { 1020 Variadics vars(mNumOfVariadics); 1021 factor(function.getEntryBlock(), vars); 1022 while (vars.size() > 0) { 1023 Variadic * var = vars.front(); 1024 vars.pop_front(); 1025 factor(var, vars); 1026 } 1027 } 1028 1029 #endif 1030 1031 #if 0 1032 1033 using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, PabloAST *>; 1034 using Vertex = Graph::vertex_descriptor; 1035 using Map = flat_map<const PabloAST *, Vertex>; 1036 1037 using VertexSet = std::vector<Vertex>; 1038 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}] 1039 using BicliqueSet = std::vector<Biclique>; 1040 using TypeId = PabloAST::ClassTypeId; 1041 1042 /** ------------------------------------------------------------------------------------------------------------- * 1043 * @brief enumerateBicliques 1044 * 1045 * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal 1046 * bicliques" by Alexe et. al. (2003). Note: this implementation considers all verticies in set A to be in 1047 * bipartition A and their adjacencies to be in B. 1048 ** ------------------------------------------------------------------------------------------------------------- */ 1049 template <class Graph> 1050 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) { 1051 using IntersectionSets = std::set<VertexSet>; 1052 1053 IntersectionSets B1; 1054 for (auto u : A) { 1055 if (out_degree(u, G) > 0) { 1056 VertexSet incomingAdjacencies; 1057 incomingAdjacencies.reserve(out_degree(u, G)); 1058 for (auto e : make_iterator_range(out_edges(u, G))) { 1059 incomingAdjacencies.push_back(target(e, G)); 1060 } 1061 std::sort(incomingAdjacencies.begin(), incomingAdjacencies.end()); 1062 B1.insert(std::move(incomingAdjacencies)); 1063 } 1064 } 1065 1066 IntersectionSets B(B1); 1067 1068 IntersectionSets Bi; 1069 1070 VertexSet clique; 1071 for (auto i = B1.begin(); i != B1.end(); ++i) { 1072 for (auto j = i; ++j != B1.end(); ) { 1073 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique)); 1074 if (clique.size() > 0) { 1075 if (B.count(clique) == 0) { 1076 Bi.insert(clique); 1077 } 1078 clique.clear(); 1079 } 1080 } 1081 } 1082 1083 for (;;) { 1084 if (Bi.empty()) { 1085 break; 1086 } 1087 B.insert(Bi.begin(), Bi.end()); 1088 IntersectionSets Bk; 1089 for (auto i = B1.begin(); i != B1.end(); ++i) { 1090 for (auto j = Bi.begin(); j != Bi.end(); ++j) { 1091 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique)); 1092 if (clique.size() > 0) { 1093 if (B.count(clique) == 0) { 1094 Bk.insert(clique); 1095 } 1096 clique.clear(); 1097 } 1098 } 1099 } 1100 Bi.swap(Bk); 1101 } 1102 1103 BicliqueSet cliques; 1104 cliques.reserve(B.size()); 1105 for (auto Bi = B.begin(); Bi != B.end(); ++Bi) { 1106 VertexSet Ai(A); 1107 for (const Vertex u : *Bi) { 1108 VertexSet Aj; 1109 Aj.reserve(in_degree(u, G)); 1110 for (auto e : make_iterator_range(in_edges(u, G))) { 1111 Aj.push_back(source(e, G)); 1112 } 1113 std::sort(Aj.begin(), Aj.end()); 1114 VertexSet Ak; 1115 Ak.reserve(std::min(Ai.size(), Aj.size())); 1116 std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak)); 1117 Ai.swap(Ak); 1118 } 1119 assert (Ai.size() > 0); // cannot happen if this algorithm is working correctly 1120 cliques.emplace_back(std::move(Ai), std::move(*Bi)); 1121 } 1122 return std::move(cliques); 1123 } 1124 1125 /** ------------------------------------------------------------------------------------------------------------- * 1126 * @brief intersects 1127 ** ------------------------------------------------------------------------------------------------------------- */ 1128 template <class Type> 1129 inline bool intersects(const Type & A, const Type & B) { 1130 auto first1 = A.begin(), last1 = A.end(); 1131 auto first2 = B.begin(), last2 = B.end(); 1132 while (first1 != last1 && first2 != last2) { 1133 if (*first1 < *first2) { 1134 ++first1; 1135 } else if (*first2 < *first1) { 1136 ++first2; 1137 } else { 1138 return true; 1139 } 1140 } 1141 return false; 1142 } 1143 1144 /** ------------------------------------------------------------------------------------------------------------- * 1145 * @brief getMaximalIndependentBicliques 1146 ** ------------------------------------------------------------------------------------------------------------- */ 1147 template <unsigned side = 1> 1148 inline static BicliqueSet && maximalIndependentBicliques(BicliqueSet && cliques, const unsigned minimum = 1) { 1149 using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>; 1150 1151 static_assert((side == 0 || side == 1), "Side must be 0 (bipartition A) or 1 (bipartition B)"); 1152 assert ("Minimum must be at least 1 or an infinite number of empty sets would be generated!" && minimum > 0); 1153 1154 const auto l = cliques.size(); 1155 IndependentSetGraph I(l); 1156 1157 // Initialize our weights 1158 for (unsigned i = 0; i != l; ++i) { 1159 I[i] = std::pow(std::get<side>(cliques[i]).size(), 2); 1160 } 1161 1162 // Determine our constraints 1163 for (unsigned i = 0; i != l; ++i) { 1164 for (unsigned j = i + 1; j != l; ++j) { 1165 if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) { 1166 add_edge(i, j, I); 1167 } 1168 } 1169 } 1170 1171 // Use the greedy algorithm to choose our independent set 1172 VertexSet selected; 1173 for (;;) { 1174 unsigned w = 0; 1175 Vertex u = 0; 1176 for (unsigned i = 0; i != l; ++i) { 1177 if (I[i] > w) { 1178 w = I[i]; 1179 u = i; 1180 } 1181 } 1182 if (w < minimum) break; 1183 selected.push_back(u); 1184 I[u] = 0; 1185 for (auto v : make_iterator_range(adjacent_vertices(u, I))) { 1186 I[v] = 0; 1187 } 1188 } 1189 1190 // Sort the selected list and then remove the unselected cliques 1191 std::sort(selected.begin(), selected.end(), std::greater<Vertex>()); 1192 auto end = cliques.end(); 1193 for (const unsigned offset : selected) { 1194 end = cliques.erase(cliques.begin() + offset + 1, end) - 1; 1195 } 1196 cliques.erase(cliques.begin(), end); 1197 1198 return std::move(cliques); 1199 } 1200 1201 /** ------------------------------------------------------------------------------------------------------------- * 1202 * @brief factor 1203 ** ------------------------------------------------------------------------------------------------------------- */ 1204 template<class Type> 1205 inline void factorAny(Graph & G, VertexSet & A, const Biclique & Si, PabloBlock * const block) { 1206 1207 static_assert (std::is_same<Type, And>::value || std::is_same<Type, Or>::value || std::is_same<Type, Xor>::value, "Can only factor And, Or or Xor statements here!"); 1208 1209 flat_set<Statement *> S; 1210 for (auto u : Si.first) { 1211 if (isa<Type>(G[u])) { 1212 S.insert(cast<Type>(G[u])); 1213 } 1214 } 1215 if (S.empty()) { 1216 return; 1217 } 1218 // find the insertion point for this statement type 1219 for (Statement * ip : *block) { 1220 if (S.count(ip)) { 1221 block->setInsertPoint(ip->getPrevNode()); 1222 break; 1223 } 1224 } 1225 Variadic * factored = nullptr; 1226 if (std::is_same<Type, And>::value) { 1227 factored = block->createAnd(Si.second.size()); 1228 } else if (std::is_same<Type, Or>::value) { 1229 factored = block->createOr(Si.second.size()); 1230 } else if (std::is_same<Type, Xor>::value) { 1231 factored = block->createXor(Si.second.size()); 1232 } 1233 const auto u = add_vertex(factored, G); 1234 A.push_back(u); 1235 for (auto v : Si.second) { 1236 factored->addOperand(G[v]); 1237 add_edge(u, v, G); 1238 } 1239 const auto w = add_vertex(factored, G); 1240 for (auto u : Si.first) { 1241 if (isa<Type>(G[u])) { 1242 Type * factoring = cast<Type>(G[u]); 1243 for (auto v : Si.second) { 1244 factoring->deleteOperand(G[v]); 1245 remove_edge(u, v, G); 1246 } 1247 factoring->addOperand(factored); 1248 add_edge(u, w, G); 1249 } 1250 } 1251 } 1252 1253 /** ------------------------------------------------------------------------------------------------------------- * 1254 * @brief eliminateCommonSubexpressions 1255 ** ------------------------------------------------------------------------------------------------------------- */ 1256 void eliminateCommonSubexpressions(Graph & G, VertexSet & A, PabloBlock * const block) { 1257 for (;;) { 1258 const auto S = maximalIndependentBicliques<1>(enumerateBicliques(G, A), 2); 1259 if (S.empty()) { 1260 break; 1261 } 1262 for (const Biclique Si : S) { 1263 factorAny<And>(G, A, Si, block); 1264 factorAny<Or>(G, A, Si, block); 1265 factorAny<Xor>(G, A, Si, block); 1266 } 1267 } 1268 } 1269 1270 /** ------------------------------------------------------------------------------------------------------------- * 1271 * @brief factor 1272 * 1273 * Perform common subexpression elimination on the Variadic statements 1274 ** ------------------------------------------------------------------------------------------------------------- */ 1275 void FactorizeDFG::factor(PabloBlock * const block) { 1276 1277 Graph G; 1278 VertexSet A; 1279 Map B; 1280 1281 Statement * stmt = block->front(); 1282 while (stmt) { 1283 if (isa<If>(stmt) || isa<While>(stmt)) { 1284 eliminateCommonSubexpressions(G, A, block); 1285 G.clear(); A.clear(); B.clear(); 1286 factor(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody()); 362 1287 } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) { 363 cse(cast<Variadic>(stmt)); 1288 // When we encounter a reassociative statement, add it and its operands to our bigraph G. 1289 const auto u = add_vertex(stmt, G); 1290 A.push_back(u); 1291 for (unsigned i = 0; i != stmt->getNumOperands(); ++i) { 1292 auto f = B.find(stmt->getOperand(i)); 1293 if (f == B.end()) { 1294 f = B.emplace(stmt->getOperand(i), add_vertex(stmt->getOperand(i), G)).first; 1295 } 1296 add_edge(f->second, u, G); 1297 } 1298 } 1299 stmt = stmt->getNextNode(); 1300 } 1301 eliminateCommonSubexpressions(G, A, block); 1302 } 1303 #endif 1304 1305 #if 0 1306 1307 using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, PabloAST *>; 1308 using Matrix = adjacency_matrix<directedS>; 1309 using Vertex = Graph::vertex_descriptor; 1310 using Map = flat_map<const PabloAST *, Vertex>; 1311 using TypeId = PabloAST::ClassTypeId; 1312 1313 /** ------------------------------------------------------------------------------------------------------------- * 1314 * @brief getVertex 1315 ** ------------------------------------------------------------------------------------------------------------- */ 1316 static inline Vertex getVertex(PabloAST * expr, Graph & G, Map & M) { 1317 auto f = M.find(expr); 1318 if (LLVM_LIKELY(f != M.end())) { 1319 return f->second; 1320 } else { 1321 const Vertex u = add_vertex(expr, G); 1322 M.emplace(expr, u); 1323 return u; 1324 } 1325 } 1326 1327 /** ------------------------------------------------------------------------------------------------------------- * 1328 * @brief buildDependencyGraph 1329 ** ------------------------------------------------------------------------------------------------------------- */ 1330 static void buildDependencyGraph(PabloBlock * const block, Graph & G, Map & M) { 1331 Statement * stmt = block->front(); 1332 while (stmt) { 1333 if (isa<If>(stmt) || isa<While>(stmt)) { 1334 buildDependencyGraph(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(), G, M); 1335 } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) { 1336 const Vertex u = getVertex(stmt, G, M); 1337 for (unsigned i = 0; i != stmt->getNumOperands(); ++i) { 1338 add_edge(getVertex(stmt->getOperand(i), G, M), u, G); 1339 } 1340 } 1341 stmt = stmt->getNextNode(); 1342 } 1343 } 1344 1345 /** ------------------------------------------------------------------------------------------------------------- * 1346 * @brief transitiveClosure 1347 ** ------------------------------------------------------------------------------------------------------------- */ 1348 static Matrix transitiveClosureOf(const Graph & G) { 1349 std::vector<Vertex> rto; // reverse topological ordering 1350 rto.reserve(num_vertices(G)); 1351 topological_sort(G, std::back_inserter(rto)); 1352 Matrix M(num_vertices(G)); 1353 for (auto u : rto) { 1354 for (auto ej : make_iterator_range(in_edges(u, G))) { 1355 add_edge(source(ej, G), target(ej, G), M); 1356 } 1357 for (auto ei : make_iterator_range(out_edges(u, M))) { 1358 for (auto ej : make_iterator_range(in_edges(u, G))) { 1359 add_edge(source(ej, G), target(ei, M), M); 1360 } 1361 } 1362 add_edge(u, u, M); 1363 } 1364 return std::move(M); 1365 } 1366 1367 using VertexSet = std::vector<Vertex>; 1368 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}] 1369 using BicliqueSet = flat_set<Biclique>; 1370 using IntersectionSets = std::set<VertexSet>; 1371 1372 /** ------------------------------------------------------------------------------------------------------------- * 1373 * @brief enumerateBicliques 1374 * 1375 * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal 1376 * bicliques" by Alexe et. al. (2003). Note: this implementation considers all verticies in set A to be in 1377 * bipartition A and their adjacencies to be in B. 1378 ** ------------------------------------------------------------------------------------------------------------- */ 1379 inline static void enumerateBicliques(VertexSet & A, VertexSet & B, const Graph & G, BicliqueSet & bicliques) { 1380 1381 std::sort(A.begin(), A.end()); 1382 std::sort(B.begin(), B.end()); 1383 1384 VertexSet S; 1385 IntersectionSets B1; 1386 for (auto u : A) { 1387 S.reserve(out_degree(u, G)); 1388 for (auto e : make_iterator_range(out_edges(u, G))) { 1389 S.push_back(target(e, G)); 1390 } 1391 assert (S.size() > 0); 1392 std::sort(S.begin(), S.end()); 1393 VertexSet T; 1394 std::set_intersection(B.begin(), B.end(), S.begin(), S.end(), std::back_inserter(T)); 1395 assert (T.size() > 0); 1396 B1.emplace(std::move(T)); 1397 S.clear(); 1398 } 1399 1400 IntersectionSets C(B1); 1401 IntersectionSets Bi; 1402 for (auto i = B1.begin(); i != B1.end(); ++i) { 1403 for (auto j = i; ++j != B1.end(); ) { 1404 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(S)); 1405 if (S.size() > 0) { 1406 if (C.count(S) == 0) { 1407 Bi.insert(S); 1408 } 1409 S.clear(); 1410 } 1411 } 1412 } 1413 1414 for (;;) { 1415 if (Bi.empty()) { 1416 break; 1417 } 1418 C.insert(Bi.begin(), Bi.end()); 1419 IntersectionSets Bk; 1420 for (auto i = B1.begin(); i != B1.end(); ++i) { 1421 for (auto j = Bi.begin(); j != Bi.end(); ++j) { 1422 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(S)); 1423 if (S.size() > 0) { 1424 if (C.count(S) == 0) { 1425 Bk.insert(S); 1426 } 1427 S.clear(); 1428 } 1429 } 1430 } 1431 Bi.swap(Bk); 1432 } 1433 1434 for (auto Bi = C.begin(); Bi != C.end(); ++Bi) { 1435 VertexSet Ai(A); 1436 for (const Vertex u : *Bi) { 1437 VertexSet Aj; 1438 Aj.reserve(in_degree(u, G)); 1439 for (auto e : make_iterator_range(in_edges(u, G))) { 1440 Aj.push_back(source(e, G)); 1441 } 1442 std::sort(Aj.begin(), Aj.end()); 1443 VertexSet Ak; 1444 Ak.reserve(std::min(Ai.size(), Aj.size())); 1445 std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak)); 1446 Ai.swap(Ak); 1447 } 1448 assert (Ai.size() > 0); // cannot happen if this algorithm is working correctly 1449 if (Ai.size() > 1 && Bi->size() > 1) { 1450 bicliques.emplace(std::move(Ai), std::move(*Bi)); 1451 } 1452 } 1453 } 1454 1455 /** ------------------------------------------------------------------------------------------------------------- * 1456 * @brief analyzeGraph 1457 ** ------------------------------------------------------------------------------------------------------------- */ 1458 inline static void analyzeGraph(const Vertex u, Graph & G, const Matrix & M, BicliqueSet & bicliques, const unsigned traversalLimit = 10) { 1459 1460 VertexSet A; 1461 VertexSet B; 1462 1463 std::vector<bool> visited(num_vertices(G), false); 1464 circular_buffer<Vertex> Q(64); 1465 const TypeId typeId = G[u]->getClassTypeId(); 1466 1467 Vertex v = u; 1468 visited[v] = true; 1469 for (;;) { 1470 bool independent = true; 1471 for (auto w : B) { 1472 if (M.get_edge(v, w)) { 1473 independent = false; 1474 break; 1475 } 1476 } 1477 if (independent) { 1478 B.push_back(v); 1479 if (B.size() < traversalLimit) { 1480 for (auto e : make_iterator_range(in_edges(v, G))) { 1481 const auto w = source(e, G); 1482 if (visited[w]) { 1483 continue; 1484 } 1485 bool independent = true; 1486 for (auto x : A) { 1487 if (M.get_edge(w, x)) { 1488 independent = false; 1489 break; 1490 } 1491 } 1492 visited[w] = true; 1493 if (independent) { 1494 A.push_back(w); 1495 for (auto e : make_iterator_range(out_edges(w, G))) { 1496 const auto x = target(e, G); 1497 if (visited[x]) { 1498 continue; 1499 } 1500 visited[x] = true; 1501 if (G[x]->getClassTypeId() == typeId) { 1502 if (LLVM_UNLIKELY(Q.full())) { 1503 Q.set_capacity(Q.capacity() + (Q.capacity() / 2)); 1504 } 1505 Q.push_back(x); 1506 } 1507 } 1508 } 1509 } 1510 } 1511 } 1512 if (Q.empty()) { 1513 break; 1514 } 1515 v = Q.front(); 1516 Q.pop_front(); 1517 } 1518 1519 enumerateBicliques(A, B, G, bicliques); 1520 } 1521 1522 /** ------------------------------------------------------------------------------------------------------------- * 1523 * @brief intersects 1524 ** ------------------------------------------------------------------------------------------------------------- */ 1525 template <class Type> 1526 inline bool intersects(const Type & A, const Type & B) { 1527 auto first1 = A.begin(), last1 = A.end(); 1528 auto first2 = B.begin(), last2 = B.end(); 1529 while (first1 != last1 && first2 != last2) { 1530 if (*first1 < *first2) { 1531 ++first1; 1532 } else if (*first2 < *first1) { 1533 ++first2; 1534 } else { 1535 return true; 1536 } 1537 } 1538 return false; 1539 } 1540 1541 /** ------------------------------------------------------------------------------------------------------------- * 1542 * @brief independentCliqueSets 1543 ** ------------------------------------------------------------------------------------------------------------- */ 1544 inline static void independentCliqueSets(BicliqueSet & bicliques, const unsigned minimum) { 1545 using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>; 1546 1547 const auto l = bicliques.size(); 1548 IndependentSetGraph I(l); 1549 1550 // Initialize our weights and determine the constraints 1551 for (auto i = bicliques.begin(); i != bicliques.end(); ++i) { 1552 I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2); 1553 for (auto j = i; ++j != bicliques.end(); ) { 1554 if (intersects(std::get<1>(*i), std::get<1>(*j))) { 1555 add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I); 1556 } 1557 } 1558 } 1559 1560 // Use the greedy algorithm to choose our independent set 1561 VertexSet selected; 1562 for (;;) { 1563 unsigned w = 0; 1564 Vertex u = 0; 1565 for (unsigned i = 0; i != l; ++i) { 1566 if (I[i] > w) { 1567 w = I[i]; 1568 u = i; 1569 } 1570 } 1571 if (w < minimum) break; 1572 selected.push_back(u); 1573 I[u] = 0; 1574 for (auto v : make_iterator_range(adjacent_vertices(u, I))) { 1575 I[v] = 0; 1576 } 1577 } 1578 1579 // Sort the selected list and then remove the unselected cliques 1580 std::sort(selected.begin(), selected.end(), std::greater<Vertex>()); 1581 auto end = bicliques.end(); 1582 for (const unsigned offset : selected) { 1583 end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1; 1584 } 1585 bicliques.erase(bicliques.begin(), end); 1586 } 1587 1588 /** ------------------------------------------------------------------------------------------------------------- * 1589 * @brief chooseInsertionPoint 1590 ** ------------------------------------------------------------------------------------------------------------- */ 1591 inline PabloBlock * FactorizeDFG::chooseInsertionScope(const NodeSet & users) { 1592 1593 ScopeSet scopes; 1594 scopes.reserve(users.size()); 1595 1596 flat_set<PabloBlock *> visited; 1597 visited.reserve(users.size()); 1598 1599 for (PabloAST * user : users) { 1600 PabloBlock * const scope = cast<Statement>(user)->getParent(); 1601 assert (scope); 1602 if (visited.insert(scope).second) { 1603 scopes.push_back(scope); 1604 } 1605 } 1606 1607 while (scopes.size() > 1) { 1608 // Find the LCA of both scopes then add the LCA back to the list of scopes. 1609 PabloBlock * scope1 = scopes.back(); scopes.pop_back(); 1610 PabloBlock * scope2 = scopes.back(); scopes.pop_back(); 1611 if (scope1 != scope2) { 1612 unsigned depth1 = mScopeDepth.find(scope1)->second; 1613 unsigned depth2 = mScopeDepth.find(scope2)->second; 1614 // If one of these scopes is nested deeper than the other, scan upwards through 1615 // the scope tree until both scopes are at the same depth. 1616 while (depth1 > depth2) { 1617 scope1 = scope1->getParent(); 1618 --depth1; 1619 } 1620 while (depth1 < depth2) { 1621 scope2 = scope2->getParent(); 1622 --depth2; 1623 } 1624 // Then iteratively step backwards until we find a matching set of scopes; this 1625 // must be the LCA of our original scopes. 1626 while (scope1 != scope2) { 1627 scope1 = scope1->getParent(); 1628 scope2 = scope2->getParent(); 1629 } 1630 assert (scope1 && scope2); 1631 } 1632 scopes.push_back(scope1); 1633 } 1634 assert (scopes.size() == 1); 1635 return scopes[0]; 1636 } 1637 1638 /** ------------------------------------------------------------------------------------------------------------- * 1639 * @brief findInsertionPoint 1640 ** ------------------------------------------------------------------------------------------------------------- */ 1641 void FactorizeDFG::findInsertionPoint(const NodeSet & operands, PabloBlock * const scope) { 1642 Statement * stmt = scope->back(); 1643 scope->setInsertPoint(nullptr); 1644 assert (std::is_sorted(operands.begin(), operands.end())); 1645 while (stmt) { 1646 if (isa<If>(stmt)) { 1647 for (Assign * def : cast<If>(stmt)->getDefined()) { 1648 if (std::binary_search(operands.begin(), operands.end(), def)) { 1649 scope->setInsertPoint(stmt); 1650 return; 1651 } 1652 } 1653 } else if (isa<While>(stmt)) { 1654 for (Next * var : cast<While>(stmt)->getVariants()) { 1655 if (std::binary_search(operands.begin(), operands.end(), var)) { 1656 scope->setInsertPoint(stmt); 1657 return; 1658 } 1659 } 1660 } else if (std::binary_search(operands.begin(), operands.end(), stmt)) { 1661 scope->setInsertPoint(stmt); 1662 break; 1663 } 1664 stmt = stmt->getPrevNode(); 1665 } 1666 } 1667 1668 /** ------------------------------------------------------------------------------------------------------------- * 1669 * @brief factor 1670 ** ------------------------------------------------------------------------------------------------------------- */ 1671 inline void FactorizeDFG::factor(PabloFunction & function) { 1672 1673 // raw_os_ostream out(std::cerr); 1674 1675 // out << "BEFORE:\n\n"; 1676 // PabloPrinter::print(function, out); 1677 // out << "******************************************\n"; 1678 // out.flush(); 1679 1680 for (;;) { 1681 1682 Graph G; 1683 { 1684 Map M; 1685 // Let G be a DAG representing each associative statement and its inputs 1686 buildDependencyGraph(function.getEntryBlock(), G, M); 1687 } 1688 1689 BicliqueSet S; 1690 { 1691 const Matrix M = transitiveClosureOf(G); 1692 for (const Vertex u : make_iterator_range(vertices(M))) { 1693 if ((in_degree(u, G) > 2) && (isa<And>(G[u]) || isa<Or>(G[u]) || isa<Xor>(G[u]))) { 1694 analyzeGraph(u, G, M, S); 1695 } 1696 } 1697 } 1698 1699 independentCliqueSets(S, 2); 1700 1701 if (S.empty()) { 1702 break; 1703 } 1704 1705 std::vector<PabloAST *> operands; 1706 std::vector<PabloAST *> users; 1707 1708 for (const Biclique & B : S) { 1709 for (auto u : std::get<0>(B)) { 1710 operands.push_back(G[u]); 1711 } 1712 std::sort(operands.begin(), operands.end()); 1713 1714 for (auto u : std::get<1>(B)) { 1715 users.push_back(G[u]); 1716 } 1717 std::sort(users.begin(), users.end()); 1718 1719 // out << " -- factoring {"; 1720 // for (PabloAST * operand : operands) { 1721 // out << ' '; 1722 // PabloPrinter::print(operand, out); 1723 // } 1724 // out << " } from { "; 1725 // for (PabloAST * user : users) { 1726 // out << ' '; 1727 // PabloPrinter::print(user, out); 1728 // } 1729 // out << " } -> "; 1730 1731 const TypeId typeId = users.front()->getClassTypeId(); 1732 assert(typeId == TypeId::And || typeId == TypeId::Or || typeId == TypeId::Xor); 1733 #ifndef NDEBUG 1734 for (PabloAST * user : users) { 1735 assert(user->getClassTypeId() == typeId); 1736 } 1737 #endif 1738 PabloBlock * const block = chooseInsertionScope(users); 1739 findInsertionPoint(operands, block); 1740 Variadic * factored = nullptr; 1741 if (typeId == TypeId::And) { 1742 factored = block->createAnd(operands.begin(), operands.end()); 1743 } else if (typeId == TypeId::Or) { 1744 factored = block->createOr(operands.begin(), operands.end()); 1745 } else { // if (isa<Xor>(var)) { 1746 factored = block->createXor(operands.begin(), operands.end()); 1747 } 1748 1749 // PabloPrinter::print(factored, out); 1750 // out << '\n'; 1751 // out.flush(); 1752 1753 for (PabloAST * user : users) { 1754 for (PabloAST * op : operands) { 1755 cast<Variadic>(user)->deleteOperand(op); 1756 } 1757 cast<Variadic>(user)->addOperand(factored); 1758 } 1759 operands.clear(); 1760 users.clear(); 1761 } 1762 // out << "-----------------------------------------------------------------\n"; 1763 } 1764 1765 // out << "AFTER:\n\n"; 1766 // PabloPrinter::print(function, out); 1767 // out << "******************************************\n"; 1768 1769 } 1770 1771 1772 #endif 1773 1774 /** ------------------------------------------------------------------------------------------------------------- * 1775 * @brief deMorgansExpansion 1776 * 1777 * Apply the De Morgans' law to any negated And or Or statement with the intent of further coalescing its operands 1778 * thereby allowing the Simplifier to check for tautologies and contradictions. 1779 ** ------------------------------------------------------------------------------------------------------------- */ 1780 inline void FactorizeDFG::deMorgansExpansion(Not * const var, PabloBlock * const block) { 1781 PabloAST * const negatedVar = var->getOperand(0); 1782 if (isa<And>(negatedVar) || isa<Or>(negatedVar)) { 1783 const TypeId desiredTypeId = isa<And>(negatedVar) ? TypeId::Or : TypeId::And; 1784 bool canApplyDeMorgans = true; 1785 for (PabloAST * user : var->users()) { 1786 if (desiredTypeId != user->getClassTypeId()) { 1787 canApplyDeMorgans = false; 1788 break; 1789 } 1790 } 1791 if (canApplyDeMorgans) { 1792 const unsigned operands = cast<Variadic>(negatedVar)->getNumOperands(); 1793 PabloAST * negations[operands]; 1794 block->setInsertPoint(var); 1795 for (unsigned i = 0; i != operands; ++i) { 1796 negations[i] = block->createNot(cast<Variadic>(negatedVar)->getOperand(i)); 1797 } 1798 const unsigned users = var->getNumUses(); 1799 PabloAST * user[users]; 1800 std::copy(var->user_begin(), var->user_end(), user); 1801 for (unsigned i = 0; i != users; ++i) { 1802 cast<Variadic>(user[i])->deleteOperand(var); 1803 for (unsigned j = 0; j != operands; ++j) { 1804 cast<Variadic>(user[i])->addOperand(negations[j]); 1805 } 1806 } 1807 var->eraseFromParent(true); 1808 } 1809 } 1810 } 1811 1812 /** ------------------------------------------------------------------------------------------------------------- * 1813 * @brief deMorgansExpansion 1814 ** ------------------------------------------------------------------------------------------------------------- */ 1815 void FactorizeDFG::deMorgansExpansion(PabloBlock * const block) { 1816 Statement * stmt = block->front(); 1817 while (stmt) { 1818 if (isa<If>(stmt) || isa<While>(stmt)) { 1819 deMorgansExpansion(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody()); 1820 } else if (isa<Not>(stmt)) { 1821 deMorgansExpansion(cast<Not>(stmt), block); 364 1822 } 365 1823 stmt = stmt->getNextNode(); … … 377 1835 mScopeDepth.emplace(body, depth); 378 1836 enumerateScopeDepth(body, depth + 1); 1837 } else if (stmt->getNumOperands() > 2 && (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt))) { 1838 ++mNumOfVariadics; 379 1839 } 380 1840 stmt = stmt->getNextNode(); … … 386 1846 ** ------------------------------------------------------------------------------------------------------------- */ 387 1847 inline void FactorizeDFG::enumerateScopeDepth(const PabloFunction & function) { 388 mScopeDepth.emplace(function.getEntryBlock(), 0); 389 enumerateScopeDepth(function.getEntryBlock(), 1); 1848 mScopeDepth.emplace(nullptr, 0); 1849 mScopeDepth.emplace(function.getEntryBlock(), 1); 1850 enumerateScopeDepth(function.getEntryBlock(), 2); 1851 } 1852 1853 /** ------------------------------------------------------------------------------------------------------------- * 1854 * @brief scopeDepthOf 1855 ** ------------------------------------------------------------------------------------------------------------- */ 1856 inline unsigned FactorizeDFG::scopeDepthOf(const PabloAST * const expr) const { 1857 return LLVM_LIKELY(isa<Statement>(expr)) ? scopeDepthOf(cast<Statement>(expr)->getParent()) : 0; 1858 } 1859 1860 /** ------------------------------------------------------------------------------------------------------------- * 1861 * @brief scopeDepthOf 1862 ** ------------------------------------------------------------------------------------------------------------- */ 1863 inline unsigned FactorizeDFG::scopeDepthOf(const PabloBlock * const block) const { 1864 assert (block); 1865 const auto f = mScopeDepth.find(block); 1866 assert (f != mScopeDepth.end()); 1867 return f->second; 1868 } 1869 1870 /** ------------------------------------------------------------------------------------------------------------- * 1871 * @brief ensureLegality 1872 * 1873 * We don't want to run the simplifier after this as it might undo some of the ordering work we've done. Instead 1874 * just do the minimum possible to ensure that each variadic is legal prior to compilation. 1875 ** ------------------------------------------------------------------------------------------------------------- */ 1876 void FactorizeDFG::ensureLegality(PabloBlock * const block) { 1877 Statement * stmt = block->front(); 1878 while (stmt) { 1879 if (isa<If>(stmt) || isa<While>(stmt)) { 1880 ensureLegality(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody()); 1881 } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) { 1882 assert (stmt->getNumOperands() <= 2); 1883 if (LLVM_UNLIKELY(stmt->getNumOperands() < 2)) { 1884 if (LLVM_UNLIKELY(stmt->getNumOperands() == 0)) { 1885 stmt = stmt->eraseFromParent(true); 1886 } else { 1887 stmt = stmt->replaceWith(stmt->getOperand(0), true, true); 1888 } 1889 continue; 1890 } 1891 } 1892 stmt = stmt->getNextNode(); 1893 } 390 1894 } 391 1895 … … 395 1899 void FactorizeDFG::transform(PabloFunction & function) { 396 1900 FactorizeDFG ldfg; 1901 // ldfg.deMorgansExpansion(function.getEntryBlock()); 1902 // #ifndef NDEBUG 1903 // PabloVerifier::verify(function, "post-demorgans-expansion"); 1904 // #endif 397 1905 ldfg.enumerateScopeDepth(function); 398 ldfg. cse(function.getEntryBlock());1906 ldfg.factor(function); 399 1907 #ifndef NDEBUG 400 PabloVerifier::verify(function, "post- cse");1908 PabloVerifier::verify(function, "post-factoring"); 401 1909 #endif 402 Simplifier::optimize(function); 403 ldfg.lower(function.getEntryBlock()); 1910 ldfg.lower(function); 404 1911 #ifndef NDEBUG 405 1912 PabloVerifier::verify(function, "post-lowering"); 406 #endif 407 } 408 409 } 1913 #endif 1914 ldfg.ensureLegality(function.getEntryBlock()); 1915 } 1916 1917 } -
icGREP/icgrep-devel/icgrep/pablo/passes/factorizedfg.h
r4899 r4919 3 3 4 4 #include <boost/container/flat_map.hpp> 5 #include <boost/circular_buffer.hpp> 5 6 #include <vector> 7 #include <set> 6 8 7 9 namespace pablo { … … 17 19 using ScopeDepth = boost::container::flat_map<const PabloBlock *, unsigned>; 18 20 using ScopeSet = std::vector<PabloBlock *>; 19 using ASTVector = std::vector<PabloAST *>; 20 struct OrderingMap { 21 unsigned of(const PabloAST * const expr) const { 22 auto f = mMap.find(expr); 23 if (f == mMap.end()) { 24 return mParent ? mParent->of(expr) : 0; 25 } 26 return f->second; 27 } 28 inline void enumerate(const PabloAST * const expr) { 29 mMap.emplace(expr, ++mOrderingCount); 30 } 31 OrderingMap() : mParent(nullptr), mOrderingCount(0) {} 32 OrderingMap(OrderingMap * const parent) : mParent(parent), mOrderingCount(parent->mOrderingCount) {} 33 ~OrderingMap() { if (mParent) { mParent->mOrderingCount = mOrderingCount; } } 34 private: 35 OrderingMap * const mParent; 36 unsigned mOrderingCount; 37 boost::container::flat_map<const PabloAST *, unsigned> mMap; 38 }; 21 using NodeSet = std::vector<PabloAST *>; 22 // using Variadics = std::vector<Variadic *>; 23 using Variadics = boost::circular_buffer<Variadic *>; 39 24 25 using VertexSet = std::vector<PabloAST *>; 26 using Biclique = std::pair<VertexSet, VertexSet>; // [{Operands}, {Users}] 27 // using BicliqueSet = boost::container::flat_set<Biclique>; 28 using BicliqueSet = std::vector<Biclique>; 29 using CheckSet = boost::container::flat_set<PabloAST *>; 40 30 public: 41 31 static void transform(PabloFunction & function); … … 43 33 void enumerateScopeDepth(const PabloFunction & function); 44 34 void enumerateScopeDepth(const PabloBlock * const block, const unsigned depth); 45 void cse(PabloBlock * const block); 46 void cse(Variadic * const var); 47 PabloBlock * chooseInsertionScope(const ASTVector & users); 48 void findInsertionPoint(const ASTVector & operands, PabloBlock * const scope); 49 void lower(PabloFunction & function); 50 // void lower(PabloBlock * const block, OrderingMap & parent); 51 // PabloAST * lower(Variadic * const var, PabloBlock * block, OrderingMap & order); 52 void lower(PabloBlock * const block); 53 Statement * lower(Variadic * const var, PabloBlock * block); 54 FactorizeDFG() = default; 35 static void deMorgansExpansion(Not * const var, PabloBlock * const block); 36 static void deMorgansExpansion(PabloBlock * const block); 37 void factor(PabloFunction & function) const; 38 // static void factor(PabloBlock * const block, BicliqueSet & bicliques); 39 // static void enumerateBicliques(Variadic * const var, BicliqueSet & bicliques); 40 static BicliqueSet findAllFactoringsOf(Variadic * const var); 41 static void independentCliqueSets(BicliqueSet & bicliques); 42 void processBicliques(BicliqueSet & bicliques) const; 43 // void factor(PabloBlock * const block, BicliqueSet & vars) const; 44 void factor(PabloBlock * const block, Variadics & vars) const; 45 void factor(Variadic * const var, Variadics & Q) const; 46 PabloBlock * findInsertionScope(const NodeSet & users) const; 47 CheckSet makeCheckSet(PabloBlock * const scope, const NodeSet & values) const; 48 Statement * firstIn(PabloBlock * const scope, Statement * stmt, const NodeSet & operands) const; 49 Statement * lastIn(PabloBlock * const scope, Statement * stmt, const NodeSet & users) const; 50 PabloBlock * findInsertionPoint(const NodeSet & operands, const NodeSet & users) const; 51 void lower(PabloFunction & function) const; 52 void lower(PabloBlock * const block) const; 53 Statement * lower(Variadic * const var, PabloBlock * block) const; 54 unsigned scopeDepthOf(const PabloBlock * const block) const; 55 unsigned scopeDepthOf(const PabloAST * const expr) const; 56 static void ensureLegality(PabloBlock * const block); 57 FactorizeDFG() : mNumOfVariadics(0) {} 55 58 private: 56 ScopeDepth mScopeDepth; 59 unsigned mNumOfVariadics; 60 ScopeDepth mScopeDepth; 57 61 }; 58 62 -
icGREP/icgrep-devel/icgrep/pablo/passes/flattenassociativedfg.cpp
r4899 r4919 18 18 * @brief coalesce 19 19 ** ------------------------------------------------------------------------------------------------------------- */ 20 inline void FlattenAssociativeDFG::coalesce(Variadic * const var) {20 inline Variadic * CoalesceDFG::coalesce(Variadic * const var) { 21 21 const TypeId typeId = var->getClassTypeId(); 22 22 for (unsigned i = 0; i < var->getNumOperands(); ) { … … 36 36 ++i; 37 37 } 38 return var; 38 39 } 39 40 … … 41 42 * @brief coalesce 42 43 ** ------------------------------------------------------------------------------------------------------------- */ 43 void FlattenAssociativeDFG::coalesce(PabloBlock * const block, const bool traverse) {44 void CoalesceDFG::coalesce(PabloBlock * const block, const bool traverse) { 44 45 Statement * stmt = block->front(); 45 46 while (stmt) { … … 49 50 } else if (isa<And>(stmt) || isa<Or>(stmt) || isa<Xor>(stmt)) { 50 51 coalesce(cast<Variadic>(stmt)); 51 } else if (isa<Not>(stmt)) {52 }/* else if (isa<Not>(stmt)) { 52 53 deMorgansExpansion(cast<Not>(stmt), block); 53 } 54 }*/ 54 55 stmt = next; 55 56 } … … 62 63 * thereby allowing the Simplifier to check for tautologies and contradictions. 63 64 ** ------------------------------------------------------------------------------------------------------------- */ 64 inline void FlattenAssociativeDFG::deMorgansExpansion(Not * const var, PabloBlock * const block) {65 inline void CoalesceDFG::deMorgansExpansion(Not * const var, PabloBlock * const block) { 65 66 PabloAST * const negatedVar = var->getOperand(0); 66 67 if (isa<And>(negatedVar) || isa<Or>(negatedVar)) { 67 Variadic * src = cast<Variadic>(negatedVar); 68 const unsigned operands = src->getNumOperands(); 69 Variadic * replacement = nullptr; 70 block->setInsertPoint(var->getPrevNode()); 71 if (isa<And>(negatedVar)) { 72 replacement = block->createOr(operands); 73 } else { 74 replacement = block->createAnd(operands); 75 } 76 block->setInsertPoint(replacement->getPrevNode()); 77 for (unsigned i = 0; i != operands; ++i) { 78 replacement->addOperand(block->createNot(src->getOperand(i))); 79 } 80 coalesce(replacement); 81 var->replaceWith(replacement, true, true); 68 const TypeId desiredTypeId = isa<And>(negatedVar) ? TypeId::Or : TypeId::And; 69 bool canApplyDeMorgans = true; 70 for (PabloAST * user : var->users()) { 71 if (desiredTypeId != user->getClassTypeId()) { 72 canApplyDeMorgans = false; 73 break; 74 } 75 } 76 if (canApplyDeMorgans) { 77 const unsigned operands = cast<Variadic>(negatedVar)->getNumOperands(); 78 PabloAST * negations[operands]; 79 block->setInsertPoint(var); 80 for (unsigned i = 0; i != operands; ++i) { 81 negations[i] = block->createNot(cast<Variadic>(negatedVar)->getOperand(i)); 82 } 83 const unsigned users = var->getNumUses(); 84 PabloAST * user[users]; 85 std::copy(var->user_begin(), var->user_end(), user); 86 for (unsigned i = 0; i != users; ++i) { 87 cast<Variadic>(user[i])->deleteOperand(var); 88 for (unsigned j = 0; j != operands; ++j) { 89 cast<Variadic>(user[i])->addOperand(negations[j]); 90 } 91 } 92 var->eraseFromParent(true); 93 } 82 94 } 83 95 } … … 86 98 * @brief deMorgansReduction 87 99 ** ------------------------------------------------------------------------------------------------------------- */ 88 inline void FlattenAssociativeDFG::deMorgansReduction(Variadic * const var, PabloBlock * const block) {100 inline void CoalesceDFG::deMorgansReduction(Variadic * const var, PabloBlock * const block) { 89 101 unsigned negations = 0; 90 102 for (unsigned i = 0; i < var->getNumOperands(); ++i) { … … 117 129 * @brief deMorgansReduction 118 130 ** ------------------------------------------------------------------------------------------------------------- */ 119 void FlattenAssociativeDFG::deMorgansReduction(PabloBlock * const block, const bool traverse) {131 void CoalesceDFG::deMorgansReduction(PabloBlock * const block, const bool traverse) { 120 132 for (Statement * stmt : *block) { 121 133 if (LLVM_UNLIKELY((isa<If>(stmt) || isa<While>(stmt)) && traverse)) { … … 134 146 explicit VertexData(TypeId typeId) : typeId(typeId) { } 135 147 }; 136 using Graph = adjacency_list<vecS, vecS, bidirectionalS, VertexData, Variadic *>;137 using Vertex = Graph::vertex_descriptor;148 using DependencyGraph = adjacency_list<vecS, vecS, bidirectionalS, VertexData, Variadic *>; 149 using Vertex = DependencyGraph::vertex_descriptor; 138 150 using SourceMap = flat_map<Assign *, Vertex>; 139 151 using SinkMap = flat_map<TypeId, Vertex>; … … 145 157 * @brief addToVariadicGraph 146 158 ** ------------------------------------------------------------------------------------------------------------- */ 147 static bool addToVariadicGraph(Assign * const def, Graph & G, SourceMap & A, SinkMap & B) {159 static bool addToVariadicGraph(Assign * const def, DependencyGraph & G, SourceMap & A, SinkMap & B) { 148 160 149 161 if (LLVM_LIKELY(A.count(def) == 0)) { … … 191 203 * bipartition A and their adjacencies to be in B. 192 204 ** ------------------------------------------------------------------------------------------------------------- */ 193 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A) {205 static BicliqueSet enumerateBicliques(const DependencyGraph & G, const VertexSet & A) { 194 206 using IntersectionSets = std::set<VertexSet>; 195 207 … … 273 285 auto first1 = A.begin(), last1 = A.end(); 274 286 auto first2 = B.begin(), last2 = B.end(); 287 assert (std::is_sorted(first1, last1)); 288 assert (std::is_sorted(first2, last2)); 275 289 while (first1 != last1 && first2 != last2) { 276 290 if (*first1 < *first2) { … … 289 303 ** ------------------------------------------------------------------------------------------------------------- */ 290 304 template <unsigned side> 291 inline static BicliqueSet independentCliqueSets(BicliqueSet && cliques, const unsigned minimum) {305 inline static BicliqueSet independentCliqueSets(BicliqueSet && bicliques, const unsigned minimum) { 292 306 using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>; 293 307 294 const auto l = cliques.size();308 const auto l = bicliques.size(); 295 309 IndependentSetGraph I(l); 296 310 297 // Initialize our weights 298 for (unsigned i = 0; i != l; ++i) { 299 I[i] = std::pow(std::get<side>(cliques[i]).size(), 2); 300 } 301 302 // Determine our constraints 303 for (unsigned i = 0; i != l; ++i) { 304 for (unsigned j = i + 1; j != l; ++j) { 305 if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) { 306 add_edge(i, j, I); 311 // Initialize our weights and determine the constraints 312 for (auto i = bicliques.begin(); i != bicliques.end(); ++i) { 313 I[std::distance(bicliques.begin(), i)] = std::pow(std::get<side>(*i).size(), 2); 314 for (auto j = i; ++j != bicliques.end(); ) { 315 if (intersects(i->second, j->second) && intersects(i->first, j->first)) { 316 add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I); 307 317 } 308 318 } … … 330 340 // Sort the selected list and then remove the unselected cliques 331 341 std::sort(selected.begin(), selected.end(), std::greater<Vertex>()); 332 auto end = cliques.end();342 auto end = bicliques.end(); 333 343 for (const unsigned offset : selected) { 334 end = cliques.erase(cliques.begin() + offset + 1, end) - 1;335 } 336 cliques.erase(cliques.begin(), end);337 338 return std::move( cliques);344 end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1; 345 } 346 bicliques.erase(bicliques.begin(), end); 347 348 return std::move(bicliques); 339 349 } 340 350 … … 342 352 * @brief tryToPartiallyExtractVariadic 343 353 ** ------------------------------------------------------------------------------------------------------------- */ 344 inline void FlattenAssociativeDFG::tryToPartiallyExtractVariadic(Variadic * const var) { 345 354 inline void CoalesceDFG::tryToPartiallyExtractVariadic(Variadic * const var) { 346 355 for (unsigned i = 0; i < var->getNumOperands(); ++i) { 347 356 PabloAST * const op = var->getOperand(i); 348 357 if (isa<Assign>(op)) { 349 350 // Have we found a variadic operation that can sunk into a nested scope?358 // Look for two Assign statements in this variadic; we don't want to generate a dependency graph unless 359 // we expect some optimization can occur. 351 360 for (unsigned j = i + 1; j != var->getNumOperands(); ++j) { 352 361 bool invalid = false; 353 362 if (LLVM_UNLIKELY(matches(op, var->getOperand(j)))) { 354 Graph G;363 DependencyGraph G; 355 364 SourceMap A; 356 365 SinkMap B; … … 375 384 376 385 const auto S = independentCliqueSets<0>(std::move(enumerateBicliques(G, H)), 2); 377 assert (S.size() > 0); 386 if (LLVM_UNLIKELY(S.empty())) { 387 break; 388 } 378 389 for (const Biclique & C : S) { 379 390 const VertexSet & sources = std::get<0>(C); … … 405 416 joiner->addOperand(def->getOperand(0)); 406 417 } 407 408 coalesce(joiner); 409 Assign * const joined = block->createAssign("m", joiner); 410 418 Assign * const joined = block->createAssign("m", coalesce(joiner)); 411 419 for (Assign * def : defs) { 412 420 def->replaceWith(joined); … … 430 438 * @brief tryToPartiallyExtractVariadic 431 439 ** ------------------------------------------------------------------------------------------------------------- */ 432 void FlattenAssociativeDFG::tryToPartiallyExtractVariadic(PabloBlock * const block) {440 void CoalesceDFG::tryToPartiallyExtractVariadic(PabloBlock * const block) { 433 441 for (Statement * stmt = block->back(); stmt; stmt = stmt->getPrevNode()) { 434 442 if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) { … … 573 581 * TODO: make this only iterate over the scope blocks and test the scope branch. 574 582 ** ------------------------------------------------------------------------------------------------------------- */ 575 inline void FlattenAssociativeDFG::removeFalseScopeDependencies(PabloFunction & function) {583 inline void CoalesceDFG::removeFalseScopeDependencies(PabloFunction & function) { 576 584 ScopeDependencyGraph G; 577 585 { … … 586 594 * @brief transform 587 595 ** ------------------------------------------------------------------------------------------------------------- */ 588 void FlattenAssociativeDFG::transform(PabloFunction & function) {589 590 FlattenAssociativeDFG::coalesce(function.getEntryBlock(), true);596 void CoalesceDFG::transform(PabloFunction & function) { 597 598 CoalesceDFG::coalesce(function.getEntryBlock(), true); 591 599 #ifndef NDEBUG 592 600 PabloVerifier::verify(function, "post-coalescence"); … … 595 603 Simplifier::optimize(function); 596 604 597 FlattenAssociativeDFG::deMorgansReduction(function.getEntryBlock(), true);598 #ifndef NDEBUG599 PabloVerifier::verify(function, "post-demorgans-reduction");600 #endif601 602 Simplifier::optimize(function);603 604 FlattenAssociativeDFG::removeFalseScopeDependencies(function);605 // CoalesceDFG::deMorgansReduction(function.getEntryBlock(), true); 606 // #ifndef NDEBUG 607 // PabloVerifier::verify(function, "post-demorgans-reduction"); 608 // #endif 609 610 // Simplifier::optimize(function); 611 612 CoalesceDFG::removeFalseScopeDependencies(function); 605 613 #ifndef NDEBUG 606 614 PabloVerifier::verify(function, "post-remove-false-scope-dependencies"); 607 615 #endif 608 616 609 FlattenAssociativeDFG::tryToPartiallyExtractVariadic(function.getEntryBlock()); 617 Simplifier::optimize(function); 618 619 CoalesceDFG::tryToPartiallyExtractVariadic(function.getEntryBlock()); 610 620 #ifndef NDEBUG 611 621 PabloVerifier::verify(function, "post-partial-variadic-extraction"); -
icGREP/icgrep-devel/icgrep/pablo/passes/flattenassociativedfg.h
r4899 r4919 11 11 class Assign; 12 12 13 class FlattenAssociativeDFG {13 class CoalesceDFG { 14 14 friend class DistributivePass; 15 15 friend class FactorizeDFG; … … 18 18 protected: 19 19 static void coalesce(PabloBlock * const block, const bool traverse); 20 static voidcoalesce(Variadic * const var);20 static Variadic *coalesce(Variadic * const var); 21 21 static void deMorgansExpansion(Not * const var, PabloBlock * const block); 22 22 static void deMorgansReduction(PabloBlock * const block, const bool traverse); … … 25 25 static void tryToPartiallyExtractVariadic(Variadic * const var); 26 26 static void removeFalseScopeDependencies(PabloFunction & function); 27 FlattenAssociativeDFG() = default;27 CoalesceDFG() = default; 28 28 }; 29 29 -
icGREP/icgrep-devel/icgrep/pablo/printer_pablos.cpp
r4876 r4919 35 35 out << "Next(" << next->getName() << ") = "; 36 36 print(next->getExpr(), out); 37 } else if (const If * if stmt= dyn_cast<const If>(stmt)) {38 out << " if ";39 print(if stmt->getCondition(), out);37 } else if (const If * ifNode = dyn_cast<const If>(stmt)) { 38 out << "If "; 39 print(ifNode->getCondition(), out); 40 40 if (expandNested) { 41 41 out << ":\n"; 42 print(if stmt->getBody(), out, true, indent + BlockIndenting);42 print(ifNode->getBody(), out, true, indent + BlockIndenting); 43 43 out.indent(indent); 44 out << " else:\n";45 print_vars(if stmt->getDefined(), out, indent + BlockIndenting);44 out << "Else:\n"; 45 print_vars(ifNode->getDefined(), out, indent + BlockIndenting); 46 46 } 47 47 } else if (const While * whileNode = dyn_cast<const While>(stmt)) { 48 out << " while ";48 out << "While "; 49 49 print(whileNode->getCondition(), out); 50 50 if (expandNested) { -
icGREP/icgrep-devel/icgrep/re/re_compiler.cpp
r4870 r4919 39 39 static cl::opt<bool> DisableLog2BoundedRepetition("disable-log2-bounded-repetition", cl::init(false), 40 40 cl::desc("disable log2 optimizations for bounded repetition of bytes"), cl::cat(fREcompilationOptions)); 41 static cl::opt<bool> DisableIfHierarchy("disable-if-hierarchy-strategy", cl::init(false), 42 cl::desc("disable nested if hierarchy for generated Unicode classes (not recommended)"), cl::cat(fREcompilationOptions)); 41 43 static cl::opt<int> IfInsertionGap("if-insertion-gap", cl::init(3), cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), cl::cat(fREcompilationOptions)); 42 44 static cl::opt<bool> DisableMatchStar("disable-matchstar", cl::init(false), … … 303 305 if (LLVM_LIKELY(nameMap.size() > 0)) { 304 306 UCD::UCDCompiler ucdCompiler(mCCCompiler); 305 ucdCompiler.generateWithDefaultIfHierarchy(nameMap, mPB); 307 if (LLVM_UNLIKELY(DisableIfHierarchy)) { 308 ucdCompiler.generateWithoutIfHierarchy(nameMap, mPB); 309 } else { 310 ucdCompiler.generateWithDefaultIfHierarchy(nameMap, mPB); 311 } 306 312 for (auto t : nameMap) { 307 313 if (t.second) { -
icGREP/icgrep-devel/icgrep/toolchain.cpp
r4909 r4919 22 22 #include <llvm/Support/TargetSelect.h> 23 23 #include <llvm/Support/Host.h> 24 #include <llvm/Support/FileSystem.h> 25 24 26 25 27 #include <IDISA/idisa_avx_builder.h> … … 53 55 #include <pablo/printer_pablos.h> 54 56 55 #include "do_grep.h" 57 #include <hrtime.h> 58 #include <do_grep.h> 56 59 57 60 using namespace pablo; … … 65 68 static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions)); 66 69 static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions)); 67 static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", 68 "These options control printing of intermediate Pablo code."); 70 static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code."); 69 71 70 72 static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions)); 71 73 static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions)); 72 74 static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions)); 75 static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions)); 73 76 74 77 static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes."); 75 78 76 static cl::opt<bool> Disable PabloCSE("disable-CSE", cl::init(false),77 cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),79 static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false), 80 cl::desc("Disable Pablo Simplification pass (not recommended)"), 78 81 cl::cat(cPabloOptimizationsOptions)); 82 79 83 static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false), 80 84 cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."), 81 85 cl::cat(cPabloOptimizationsOptions)); 82 86 87 static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels."); 88 89 90 static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"), 91 cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0')); 92 83 93 #ifdef ENABLE_MULTIPLEXING 84 94 static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions)); … … 91 101 cl::desc("maximum size of any candidate multiplexing set."), 92 102 cl::cat(cPabloOptimizationsOptions)); 103 93 104 static cl::opt<unsigned> MultiplexingSelectionLimit("multiplexing-selection-limit", cl::init(100), 94 105 cl::desc("maximum number of selections from any partial candidate multiplexing set."), 95 106 cl::cat(cPabloOptimizationsOptions)); 107 96 108 static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(1), 97 109 cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."), … … 101 113 cl::desc("coalesce associative functions prior to optimization passes."), 102 114 cl::cat(cPabloOptimizationsOptions)); 115 103 116 static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false), 104 cl::desc("apply distribution law optimization ."),117 cl::desc("apply distribution law optimization prior to multiplexing."), 105 118 cl::cat(cPabloOptimizationsOptions)); 119 106 120 static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false), 107 cl::desc("apply distribution law optimization."), 121 cl::desc("apply distribution law optimization after multiplexing."), 122 cl::cat(cPabloOptimizationsOptions)); 123 124 static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false), 125 cl::desc("apply pre-pass scheduling prior to LLVM IR generation."), 108 126 cl::cat(cPabloOptimizationsOptions)); 109 127 #endif … … 154 172 } 155 173 156 void pablo_function_passes(PabloFunction * function) { 174 #ifdef PRINT_TIMING_INFORMATION 175 #define READ_CYCLE_COUNTER(name) name = read_cycle_counter(); 176 #else 177 #define READ_CYCLE_COUNTER(name) 178 #endif 179 180 #ifdef PRINT_TIMING_INFORMATION 181 unsigned COUNT_STATEMENTS(const PabloFunction * const entry) { 182 std::stack<const Statement *> scope; 183 unsigned statements = 0; 184 // Scan through and collect all the advances, calls, scanthrus and matchstars ... 185 for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) { 186 while ( stmt ) { 187 ++statements; 188 if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) { 189 // Set the next statement to be the first statement of the inner scope and push the 190 // next statement of the current statement into the scope stack. 191 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(); 192 scope.push(stmt->getNextNode()); 193 stmt = nested->front(); 194 assert (stmt); 195 continue; 196 } 197 stmt = stmt->getNextNode(); 198 } 199 if (scope.empty()) { 200 break; 201 } 202 stmt = scope.top(); 203 scope.pop(); 204 } 205 return statements; 206 } 207 208 unsigned COUNT_ADVANCES(const PabloFunction * const entry) { 209 210 std::stack<const Statement *> scope; 211 unsigned advances = 0; 212 213 // Scan through and collect all the advances, calls, scanthrus and matchstars ... 214 for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) { 215 while ( stmt ) { 216 if (isa<Advance>(stmt)) { 217 ++advances; 218 } 219 else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) { 220 // Set the next statement to be the first statement of the inner scope and push the 221 // next statement of the current statement into the scope stack. 222 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(); 223 scope.push(stmt->getNextNode()); 224 stmt = nested->front(); 225 assert (stmt); 226 continue; 227 } 228 stmt = stmt->getNextNode(); 229 } 230 if (scope.empty()) { 231 break; 232 } 233 stmt = scope.top(); 234 scope.pop(); 235 } 236 return advances; 237 } 238 239 using DistributionMap = boost::container::flat_map<unsigned, unsigned>; 240 241 DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) { 242 std::stack<const Statement *> scope; 243 DistributionMap distribution; 244 // Scan through and collect all the advances, calls, scanthrus and matchstars ... 245 for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) { 246 while ( stmt ) { 247 if (isa<Variadic>(stmt)) { 248 auto f = distribution.find(stmt->getNumOperands()); 249 if (f == distribution.end()) { 250 distribution.emplace(stmt->getNumOperands(), 1); 251 } else { 252 f->second += 1; 253 } 254 } 255 else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) { 256 // Set the next statement to be the first statement of the inner scope and push the 257 // next statement of the current statement into the scope stack. 258 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody(); 259 scope.push(stmt->getNextNode()); 260 stmt = nested->front(); 261 assert (stmt); 262 continue; 263 } 264 stmt = stmt->getNextNode(); 265 } 266 if (scope.empty()) { 267 break; 268 } 269 stmt = scope.top(); 270 scope.pop(); 271 } 272 return distribution; 273 } 274 #endif 275 276 void pablo_function_passes(PabloFunction * function) { 157 277 // Scan through the pablo code and perform DCE and CSE 158 if (!DisablePabloCSE) { 278 279 #ifdef PRINT_TIMING_INFORMATION 280 timestamp_t simplification_start = 0, simplification_end = 0; 281 timestamp_t coalescing_start = 0, coalescing_end = 0; 282 timestamp_t sinking_start = 0, sinking_end = 0; 283 timestamp_t pre_distribution_start = 0, pre_distribution_end = 0; 284 timestamp_t multiplexing_start = 0, multiplexing_end = 0; 285 timestamp_t post_distribution_start = 0, post_distribution_end = 0; 286 timestamp_t lowering_start = 0, lowering_end = 0; 287 timestamp_t scheduling_start = 0, scheduling_end = 0; 288 DistributionMap distribution; 289 const timestamp_t optimization_start = read_cycle_counter(); 290 #endif 291 if (!DisableSimplification) { 292 READ_CYCLE_COUNTER(simplification_start); 159 293 Simplifier::optimize(*function); 294 READ_CYCLE_COUNTER(simplification_end); 160 295 } 161 296 #ifdef ENABLE_MULTIPLEXING 162 if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) { 163 FlattenAssociativeDFG::transform(*function); 297 if (EnableLowering || EnablePreDistribution || EnablePostDistribution) { 298 READ_CYCLE_COUNTER(coalescing_start); 299 CoalesceDFG::transform(*function); 300 READ_CYCLE_COUNTER(coalescing_end); 301 } 302 if (EnablePreDistribution) { 303 READ_CYCLE_COUNTER(pre_distribution_start); 304 DistributivePass::optimize(*function); 305 READ_CYCLE_COUNTER(pre_distribution_end); 306 } 307 if (EnableMultiplexing) { 308 READ_CYCLE_COUNTER(multiplexing_start); 309 MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize); 310 READ_CYCLE_COUNTER(multiplexing_end); 311 if (EnableLowering || EnablePreDistribution || EnablePostDistribution) { 312 CoalesceDFG::transform(*function); 313 } 314 } 315 if (EnablePostDistribution) { 316 READ_CYCLE_COUNTER(post_distribution_start); 317 DistributivePass::optimize(*function); 318 READ_CYCLE_COUNTER(post_distribution_end); 164 319 } 165 320 #endif 166 321 if (PabloSinkingPass) { 322 READ_CYCLE_COUNTER(sinking_start); 167 323 CodeMotionPass::optimize(*function); 168 } 169 #ifdef ENABLE_MULTIPLEXING 170 if (EnablePreDistribution) { 171 DistributivePass::optimize(*function); 172 } 173 if (EnableMultiplexing) { 174 MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize); 175 } 176 if (EnablePostDistribution) { 177 DistributivePass::optimize(*function); 178 } 179 SchedulingPrePass::optimize(*function); 324 READ_CYCLE_COUNTER(sinking_end); 325 } 326 #ifdef ENABLE_MULTIPLEXING 180 327 if (PrintUnloweredCode) { 181 328 //Print to the terminal the AST that was generated by the pararallel bit-stream compiler. … … 183 330 cerr << "Unlowered Pablo AST:\n"; 184 331 PabloPrinter::print(*function, cerr); 185 } 186 if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) { 332 } 333 #ifdef PRINT_TIMING_INFORMATION 334 distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function); 335 #endif 336 if (EnableLowering || EnablePreDistribution || EnablePostDistribution) { 337 READ_CYCLE_COUNTER(lowering_start); 187 338 FactorizeDFG::transform(*function); 188 } 339 READ_CYCLE_COUNTER(lowering_end); 340 } 341 if (EnablePrePassScheduling) { 342 READ_CYCLE_COUNTER(scheduling_start); 343 SchedulingPrePass::optimize(*function); 344 READ_CYCLE_COUNTER(scheduling_end); 345 } 346 #endif 347 #ifdef PRINT_TIMING_INFORMATION 348 const timestamp_t optimization_end = read_cycle_counter(); 189 349 #endif 190 350 if (PrintOptimizedREcode) { 191 PabloVerifier::verify(*function, "post-optimization"); 192 //Print to the terminal the AST that was generated by the pararallel bit-stream compiler. 193 llvm::raw_os_ostream cerr(std::cerr); 194 cerr << "Final Pablo AST:\n"; 195 PabloPrinter::print(*function, cerr); 196 } 351 if (PabloOutputFilename.empty()) { 352 //Print to the terminal the AST that was generated by the pararallel bit-stream compiler. 353 llvm::raw_os_ostream cerr(std::cerr); 354 cerr << "Final Pablo AST:\n"; 355 PabloPrinter::print(*function, cerr); 356 } else { 357 std::error_code error; 358 llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None); 359 PabloPrinter::print(*function, out); 360 } 361 } 362 #ifdef PRINT_TIMING_INFORMATION 363 std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl; 364 std::cerr << " SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl; 365 std::cerr << " COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl; 366 std::cerr << " SINKING TIME: " << (sinking_end - sinking_start) << std::endl; 367 std::cerr << " PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl; 368 std::cerr << " MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl; 369 std::cerr << " MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl; 370 std::cerr << " MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl; 371 std::cerr << " MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl; 372 std::cerr << " LOWERING TIME: " << (lowering_end - lowering_start) << std::endl; 373 std::cerr << " POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl; 374 std::cerr << " SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl; 375 std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl; 376 std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl; 377 std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: "; 378 bool join = false; 379 for (auto dist : distribution) { 380 if (join) { 381 std::cerr << ';'; 382 } 383 std::cerr << dist.first << '|' << dist.second; 384 join = true; 385 } 386 std::cerr << std::endl; 387 #endif 197 388 } 198 389 … … 232 423 builder.setErrorStr(&errMessage); 233 424 builder.setMCPU(sys::getHostCPUName()); 234 builder.setOptLevel(CodeGenOpt::Level::None); 425 CodeGenOpt::Level optLevel = CodeGenOpt::Level::None; 426 switch (OptLevel) { 427 case '0': optLevel = CodeGenOpt::None; break; 428 case '1': optLevel = CodeGenOpt::Less; break; 429 case '2': optLevel = CodeGenOpt::Default; break; 430 case '3': optLevel = CodeGenOpt::Aggressive; break; 431 default: errs() << OptLevel << " is an invalid optimization level.\n"; 432 } 433 builder.setOptLevel(optLevel); 235 434 236 435 #if (BLOCK_SIZE == 256)
Note: See TracChangeset
for help on using the changeset viewer.