Changeset 5112
 Timestamp:
 Aug 1, 2016, 5:47:21 PM (3 years ago)
 Location:
 icGREP/icgrepdevel/icgrep/pablo/optimizers
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp
r5082 r5112 14 14 #include <queue> 15 15 #include <unordered_set> 16 #include <bdd.h>17 16 #include <functional> 18 19 17 #include <llvm/Support/CommandLine.h> 20 18 … … 71 69 cl::cat(MultiplexingOptions)); 72 70 73 // #define PRINT_DEBUG_OUTPUT74 75 #if !defined(NDEBUG) && !defined(PRINT_DEBUG_OUTPUT)76 #define PRINT_DEBUG_OUTPUT77 #endif78 79 #ifdef PRINT_DEBUG_OUTPUT80 81 #include <iostream>82 83 using namespace pablo;84 typedef uint64_t timestamp_t;85 86 static inline timestamp_t read_cycle_counter() {87 #ifdef __GNUC__88 timestamp_t ts;89 #ifdef __x86_64__90 unsigned int eax, edx;91 asm volatile("rdtsc" : "=a" (eax), "=d" (edx));92 ts = ((timestamp_t) eax)  (((timestamp_t) edx) << 32);93 #else94 asm volatile("rdtsc\n" : "=A" (ts));95 #endif96 return(ts);97 #endif98 #ifdef _MSC_VER99 return __rdtsc();100 #endif101 }102 103 #define LOG(x) std::cerr << x << std::endl;104 #define RECORD_TIMESTAMP(Name) const timestamp_t Name = read_cycle_counter()105 #define LOG_GRAPH(Name, G) \106 LOG(Name << " V=" << num_vertices(G) << ", E=" << num_edges(G) << \107 " (" << (((double)num_edges(G)) / ((double)(num_vertices(G) * (num_vertices(G)  1) / 2))) << ')')108 109 unsigned __count_advances(const PabloBlock * const entry) {110 111 std::stack<const Statement *> scope;112 unsigned advances = 0;113 114 // Scan through and collect all the advances, calls, scanthrus and matchstars ...115 for (const Statement * stmt = entry>front(); ; ) {116 while ( stmt ) {117 if (isa<Advance>(stmt)) {118 ++advances;119 }120 else if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) {121 // Set the next statement to be the first statement of the inner scope and push the122 // next statement of the current statement into the scope stack.123 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)>getBody() : cast<While>(stmt)>getBody();124 scope.push(stmt>getNextNode());125 stmt = nested>front();126 assert (stmt);127 continue;128 }129 stmt = stmt>getNextNode();130 }131 if (scope.empty()) {132 break;133 }134 stmt = scope.top();135 scope.pop();136 }137 return advances;138 }139 140 #define LOG_NUMBER_OF_ADVANCES(entry) LOG("Advances=" << __count_advances(entry))141 142 #else143 #define LOG(x)144 #define RECORD_TIMESTAMP(Name)145 #define LOG_GRAPH(Name, G)146 #define LOG_NUMBER_OF_ADVANCES(entry)147 #endif148 149 150 71 namespace pablo { 151 72 152 #ifdef PRINT_TIMING_INFORMATION153 MultiplexingPass::seed_t MultiplexingPass::SEED = 0;154 unsigned MultiplexingPass::NODES_ALLOCATED = 0;155 unsigned MultiplexingPass::NODES_USED = 0;156 #endif157 158 73 using TypeId = PabloAST::ClassTypeId; 159 74 160 template<typename Graph> 161 static Graph construct(PabloBlock * const block); 162 163 template<typename Graph, typename Map> 164 static Graph construct(PabloBlock * const block, Map & M); 165 166 bool MultiplexingPass::optimize(PabloFunction & function, const bool independent) { 75 /**  * 76 * @brief optimize 77 * @param function the function to optimize 78 **  */ 79 bool MultiplexingPass::optimize(PabloFunction & function) { 167 80 168 81 if (LLVM_UNLIKELY(Samples < 1)) { … … 170 83 } 171 84 172 173 LOG("Seed: " << Seed); 174 175 #ifdef PRINT_TIMING_INFORMATION 176 MultiplexingPass::SEED = Seed; 177 MultiplexingPass::NODES_ALLOCATED = 0; 178 MultiplexingPass::NODES_USED = 0; 179 #endif 180 181 MultiplexingPass mp(Seed); 182 RECORD_TIMESTAMP(start_initialize); 183 const unsigned advances = mp.initialize(function, independent); 184 RECORD_TIMESTAMP(end_initialize); 185 186 LOG("Initialize: " << (end_initialize  start_initialize)); 187 188 LOG_NUMBER_OF_ADVANCES(function.getEntryBlock()); 189 190 if (advances == 0) { 191 return false; 192 } 193 194 RECORD_TIMESTAMP(start_characterize); 195 mp.characterize(function.getEntryBlock()); 196 RECORD_TIMESTAMP(end_characterize); 197 198 LOG("Characterize: " << (end_characterize  start_characterize)); 199 200 LOG("Nodes in BDD: " << bdd_getnodenum() << " of " << bdd_getallocnum()); 201 202 #ifdef PRINT_TIMING_INFORMATION 203 MultiplexingPass::NODES_ALLOCATED = bdd_getallocnum(); 204 MultiplexingPass::NODES_USED = bdd_getnodenum(); 205 #endif 206 207 RECORD_TIMESTAMP(start_shutdown); 208 bdd_done(); 209 RECORD_TIMESTAMP(end_shutdown); 210 LOG("Shutdown: " << (end_shutdown  start_shutdown)); 211 212 RECORD_TIMESTAMP(start_create_multiplex_graph); 213 const bool multiplex = mp.generateCandidateSets(); 214 RECORD_TIMESTAMP(end_create_multiplex_graph); 215 LOG("GenerateCandidateSets: " << (end_create_multiplex_graph  start_create_multiplex_graph)); 216 217 if (multiplex) { 218 219 RECORD_TIMESTAMP(start_usage_weighting); 220 mp.generateUsageWeightingGraph(); 221 RECORD_TIMESTAMP(end_usage_weighting); 222 LOG("GenerateUsageWeighting: " << (end_usage_weighting  start_usage_weighting)); 223 224 RECORD_TIMESTAMP(start_select_multiplex_sets); 225 if (Strategy == SelectionStrategy::Greedy) { 226 mp.selectMultiplexSetsGreedy(); 227 } else if (Strategy == SelectionStrategy::WorkingSet) { 228 mp.selectMultiplexSetsWorkingSet(); 229 } 230 RECORD_TIMESTAMP(end_select_multiplex_sets); 231 LOG("SelectMultiplexSets: " << (end_select_multiplex_sets  start_select_multiplex_sets)); 232 233 RECORD_TIMESTAMP(start_subset_constraints); 234 mp.eliminateSubsetConstraints(); 235 RECORD_TIMESTAMP(end_subset_constraints); 236 LOG("ApplySubsetConstraints: " << (end_subset_constraints  start_subset_constraints)); 237 238 RECORD_TIMESTAMP(start_select_independent_sets); 239 mp.multiplexSelectedSets(function); 240 RECORD_TIMESTAMP(end_select_independent_sets); 241 LOG("MultiplexSelectedSets: " << (end_select_independent_sets  start_select_independent_sets)); 242 243 #ifndef NDEBUG 244 PabloVerifier::verify(function, "postmultiplexing"); 245 #endif 246 247 Simplifier::optimize(function); 248 } 249 250 LOG_NUMBER_OF_ADVANCES(function.getEntryBlock()); 251 252 return multiplex; 253 } 254 255 /**  * 256 * @brief initialize 85 PabloVerifier::verify(function, "premultiplexing"); 86 87 Z3_config cfg = Z3_mk_config(); 88 Z3_context ctx = Z3_mk_context_rc(cfg); 89 Z3_del_config(cfg); 90 Z3_solver solver = Z3_mk_solver(ctx); 91 Z3_solver_inc_ref(ctx, solver); 92 93 MultiplexingPass mp(function, Seed, ctx, solver); 94 95 mp.characterize(function); 96 97 Z3_solver_dec_ref(ctx, solver); 98 Z3_del_context(ctx); 99 100 PabloVerifier::verify(function, "postmultiplexing"); 101 102 return true; 103 } 104 105 /**  * 106 * @brief characterize 257 107 * @param function the function to optimize 258 * @returns true if there are fewer than three advances in this function 259 * 260 * Scan through the program to identify any advances and calls then initialize the BDD engine with 261 * the proper variable ordering. 262 **  */ 263 unsigned MultiplexingPass::initialize(PabloFunction & function, const bool independent) { 264 265 std::stack<Statement *> scope; 266 unsigned variableCount = 0; // number of statements that cannot always be categorized without generating a new variable 267 268 // Scan through and collect all the advances, calls, scanthrus and matchstars ... 269 unsigned statements = 0, advances = 0; 270 for (Statement * stmt = function.getEntryBlock()>front(); ; ) { 271 while ( stmt ) { 272 ++statements; 273 if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) { 274 scope.push(stmt>getNextNode()); 275 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)>getBody() : cast<While>(stmt)>getBody(); 276 stmt = nested>front(); 277 assert (stmt); 278 continue; 279 } 280 switch (stmt>getClassTypeId()) { 281 case TypeId::Advance: 282 ++advances; 283 case TypeId::ScanThru: 284 case TypeId::Call: 285 case TypeId::MatchStar: 286 ++variableCount; 287 break; 288 default: 289 break; 290 } 291 stmt = stmt>getNextNode(); 292 } 293 if (scope.empty()) { 294 break; 295 } 296 stmt = scope.top(); 297 scope.pop(); 298 } 299 300 // If there are fewer than three Advances in this program, just abort. We cannot reduce it. 301 if (advances < 3) { 302 return 0; 303 } 304 305 initializeBaseConstraintGraph(function.getEntryBlock(), statements, advances); 306 307 mSubsetGraph = SubsetGraph(advances); 308 309 initializeAdvanceDepth(function.getEntryBlock(), advances); 310 311 // Initialize the BDD engine ... 312 bdd_init(10000000, 100000); 313 bdd_setvarnum(variableCount + function.getNumOfParameters()); 314 bdd_setcacheratio(64); 315 bdd_setmaxincrease(10000000); 316 bdd_autoreorder(BDD_REORDER_SIFT); // BDD_REORDER_SIFT 317 108 **  */ 109 void MultiplexingPass::characterize(PabloFunction & function) { 318 110 // Map the constants and input variables 319 mCharacterization[PabloBlock::createZeroes()] = bdd_zero(); 320 mCharacterization[PabloBlock::createOnes()] = bdd_one(); 321 mVariables = function.getNumOfParameters(); 322 323 // TODO: record information in the function to indicate which pairs of input variables are independent 324 if (independent) { 325 for (unsigned i = 0; i != mVariables; ++i) { 326 BDD Vi = bdd_ithvar(i); 327 BDD Ni = bdd_zero(); 328 for (unsigned j = 0; j != i; ++j) { 329 Ni = bdd_addref(bdd_or(Ni, bdd_ithvar(j))); 330 } 331 for (unsigned j = i + 1; j != mVariables; ++j) { 332 Ni = bdd_addref(bdd_or(Ni, bdd_ithvar(j))); 333 } 334 Ni = bdd_addref(bdd_not(Ni)); 335 mCharacterization[function.getParameter(i)] = bdd_addref(bdd_imp(Vi, Ni)); 336 } 337 } else { 338 for (unsigned i = 0; i != mVariables; ++i) { 339 mCharacterization[function.getParameter(i)] = bdd_ithvar(i); 340 } 341 } 342 343 return advances; 344 } 345 346 /**  * 347 * @brief initializeBaseConstraintGraph 348 **  */ 349 inline void MultiplexingPass::initializeBaseConstraintGraph(PabloBlock * const block, const unsigned statements, const unsigned advances) { 350 351 std::stack<Statement *> scope; 352 flat_map<const PabloAST *, unsigned> M; 353 M.reserve(statements); 354 matrix<bool> G(statements, advances, false); 355 for (unsigned i = 0; i != advances; ++i) { 356 G(i, i) = true; 357 } 358 359 unsigned n = advances; 360 unsigned k = 0; 361 for (const Statement * stmt = block>front();;) { 362 while ( stmt ) { 363 unsigned u = 0; 364 if (LLVM_UNLIKELY(isa<Advance>(stmt))) { 365 u = k++; 366 } else { 367 u = n++; 368 } 369 M.emplace(stmt, u); 370 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 371 const PabloAST * const op = stmt>getOperand(i); 372 if (LLVM_LIKELY(isa<Statement>(op))) { 373 const unsigned v = M[op]; 374 for (unsigned w = 0; w != k; ++w) { 375 G(u, w) = G(v, w); 376 } 377 } 378 } 379 if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) { 380 scope.push(stmt>getNextNode()); 381 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)>getBody() : cast<While>(stmt)>getBody(); 382 stmt = nested>front(); 383 assert (stmt); 384 continue; 385 } 386 stmt = stmt>getNextNode(); 387 } 388 if (scope.empty()) { 389 break; 390 } 391 stmt = scope.top(); 392 scope.pop(); 393 } 394 395 assert (k == advances); 396 397 // Initialize the base constraint graph by effectively transposing G and removing reflective loops 398 mConstraintGraph = ConstraintGraph(advances); 399 for (unsigned i = 0; i != advances; ++i) { 400 for (unsigned j = 0; j < i; ++j) { 401 if (G(i, j)) { 402 add_edge(j, i, true, mConstraintGraph); 403 } 404 } 405 for (unsigned j = i + 1; j < advances; ++j) { 406 if (G(i, j)) { 407 add_edge(j, i, true, mConstraintGraph); 408 } 409 } 410 } 411 412 } 413 414 /**  * 415 * @brief initializeAdvanceDepth 416 **  */ 417 inline void MultiplexingPass::initializeAdvanceDepth(PabloBlock * const entryBlock, const unsigned advances) { 418 419 std::stack<Statement *> scope; 420 unsigned k = 0; 421 int maxDepth = 0; 422 const PabloBlock * advanceScope[advances]; 423 mAdvance.resize(advances, nullptr); 424 mAdvanceRank.resize(advances, 0); 425 mAdvanceNegatedVariable.reserve(advances); 426 for (Statement * stmt = entryBlock>front(); ; ) { 427 while ( stmt ) { 428 if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) { 429 scope.push(stmt>getNextNode()); 430 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)>getBody() : cast<While>(stmt)>getBody(); 431 stmt = nested>front(); 432 assert (stmt); 433 continue; 434 } else if (LLVM_UNLIKELY(isa<Advance>(stmt))) { 435 int depth = 0; 436 mAdvance[k] = cast<Advance>(stmt); 437 advanceScope[k] = cast<Advance>(stmt)>getParent(); 438 for (unsigned i = 0; i != k; ++i) { 439 if (edge(i, k, mConstraintGraph).second  (advanceScope[i] != advanceScope[k])) { 440 depth = std::max<int>(depth, mAdvanceRank[i]); 441 } 442 } 443 mAdvanceRank[k++] = ++depth; 444 maxDepth = std::max(maxDepth, depth); 445 } 446 stmt = stmt>getNextNode(); 447 } 448 if (scope.empty()) { 449 break; 450 } 451 stmt = scope.top(); 452 scope.pop(); 453 } 454 assert (k == advances); 455 456 LOG("Window Size / Max Depth: " << WindowSize << " of " << maxDepth); 111 Z3_sort boolTy = Z3_mk_bool_sort(mContext); 112 113 Z3_ast F = Z3_mk_const(mContext, Z3_mk_int_symbol(mContext, 0), boolTy); 114 Z3_inc_ref(mContext, F); 115 add(PabloBlock::createZeroes(), F); 116 117 Z3_ast T = Z3_mk_const(mContext, Z3_mk_int_symbol(mContext, 1), boolTy); 118 Z3_inc_ref(mContext, T); 119 add(PabloBlock::createOnes(), T); 120 121 for (unsigned i = 0; i < function.getNumOfParameters(); ++i) { 122 make(function.getParameter(i)); 123 } 124 125 characterize(function.getEntryBlock()); 457 126 } 458 127 459 128 /**  * 460 129 * @brief characterize 461 * @param vars the input vars for this program462 *463 * Scan through the program and iteratively compute the BDDs for each statement.464 130 **  */ 465 131 void MultiplexingPass::characterize(PabloBlock * const block) { 132 Statement * end = initialize(block>front()); 466 133 for (Statement * stmt : *block) { 467 if (LLVM_UNLIKELY(isa<If>(stmt))) { 468 characterize(cast<If>(stmt)>getBody()); 469 for (const Assign * def : cast<If>(stmt)>getDefined()) { 470 if (LLVM_LIKELY(escapes(def))) { 471 bdd_addref(get(def)); 472 } 473 } 474 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 475 characterize(cast<While>(stmt)>getBody()); 476 for (const Next * var : cast<While>(stmt)>getVariants()) { 477 if (LLVM_LIKELY(escapes(var))) { 478 BDD & initial = get(var>getInitial()); 479 BDD & escaped = get(var); 480 initial = bdd_addref(bdd_or(initial, escaped)); 481 escaped = initial; 482 } 483 } 134 if (LLVM_UNLIKELY(stmt == end)) { 135 Statement * const next = stmt>getNextNode(); 136 multiplex(block); 137 if (isa<If>(stmt)) { 138 characterize(cast<If>(stmt)>getBody()); 139 } else if (isa<While>(stmt)) { 140 for (const Next * var : cast<While>(stmt)>getVariants()) { 141 Z3_inc_ref(mContext, get(var>getInitial())); 142 } 143 characterize(cast<While>(stmt)>getBody()); 144 // since we cannot be certain that we'll always execute at least one iteration of a loop, we must 145 // assume that the variants could either be their initial value or their resulting value. 146 for (const Next * var : cast<While>(stmt)>getVariants()) { 147 Z3_ast v0 = get(var>getInitial()); 148 Z3_ast & v1 = get(var); 149 Z3_ast merge[2] = { v0, v1 }; 150 Z3_ast r = Z3_mk_or(mContext, 2, merge); 151 Z3_inc_ref(mContext, r); 152 Z3_dec_ref(mContext, v0); 153 Z3_dec_ref(mContext, v1); 154 v1 = r; 155 assert (get(var) == r); 156 } 157 } 158 end = initialize(next); 484 159 } else { 485 mCharacterization.insert(std::make_pair(stmt, characterize(stmt))); 486 } 487 } 488 } 489 490 /**  * 491 * @brief throwUnexpectedStatementTypeError 492 **  */ 493 static void throwUnexpectedStatementTypeError(const Statement * const stmt) { 160 characterize(stmt); 161 } 162 } 163 multiplex(block); 164 } 165 166 /**  * 167 * @brief multiplex 168 **  */ 169 void MultiplexingPass::multiplex(PabloBlock * const block) { 170 if (generateCandidateSets()) { 171 selectMultiplexSetsGreedy(); 172 eliminateSubsetConstraints(); 173 multiplexSelectedSets(block); 174 } 175 } 176 177 /**  * 178 * @brief equals 179 **  */ 180 inline bool MultiplexingPass::equals(Z3_ast a, Z3_ast b) { 181 Z3_solver_push(mContext, mSolver); 182 Z3_ast test = Z3_mk_eq(mContext, a, b); 183 Z3_inc_ref(mContext, test); 184 Z3_solver_assert(mContext, mSolver, test); 185 const auto r = Z3_solver_check(mContext, mSolver); 186 Z3_dec_ref(mContext, test); 187 Z3_solver_pop(mContext, mSolver, 1); 188 return (r == Z3_L_TRUE); 189 } 190 191 /**  * 192 * @brief handle_unexpected_statement 193 **  */ 194 static void handle_unexpected_statement(Statement * const stmt) { 494 195 std::string tmp; 495 196 raw_string_ostream err(tmp); 496 err << "Unexpected statement type ";197 err << "Unexpected statement type: "; 497 198 PabloPrinter::print(stmt, err); 498 199 throw std::runtime_error(err.str()); … … 502 203 * @brief characterize 503 204 **  */ 504 inline BDD MultiplexingPass::characterize(Statement * const stmt) { 505 assert (stmt>getNumOperands() > 0); 506 BDD bdd = get(stmt>getOperand(0)); 205 inline Z3_ast MultiplexingPass::characterize(Statement * const stmt) { 206 207 const size_t n = stmt>getNumOperands(); assert (n > 0); 208 Z3_ast operands[n] = {}; 209 for (size_t i = 0; i < n; ++i) { 210 PabloAST * op = stmt>getOperand(i); 211 if (LLVM_LIKELY(isa<Statement>(op)  isa<Var>(op))) { 212 operands[i] = get(op, true); 213 } 214 } 215 216 Z3_ast node = operands[0]; 507 217 switch (stmt>getClassTypeId()) { 508 218 case TypeId::Assign: 509 219 case TypeId::Next: 510 break; 220 case TypeId::AtEOF: 221 case TypeId::InFile: 222 node = operands[0]; break; 511 223 case TypeId::And: 512 for (unsigned i = 1; i != stmt>getNumOperands(); ++i) { 513 bdd = bdd_and(bdd, get(stmt>getOperand(i))); 514 } 515 break; 224 node = Z3_mk_and(mContext, n, operands); break; 516 225 case TypeId::Or: 517 for (unsigned i = 1; i != stmt>getNumOperands(); ++i) { 518 bdd = bdd_or(bdd, get(stmt>getOperand(i))); 519 } 520 break; 226 node = Z3_mk_or(mContext, n, operands); break; 521 227 case TypeId::Xor: 522 for (unsigned i = 1; i != stmt>getNumOperands(); ++i) { 523 bdd = bdd_xor(bdd, get(stmt>getOperand(i))); 524 } 525 break; 228 node = Z3_mk_xor(mContext, operands[0], operands[1]); 229 Z3_inc_ref(mContext, node); 230 for (unsigned i = 2; LLVM_UNLIKELY(i < n); ++i) { 231 Z3_ast temp = Z3_mk_xor(mContext, node, operands[i]); 232 Z3_inc_ref(mContext, temp); 233 Z3_dec_ref(mContext, node); 234 node = temp; 235 } 236 return add(stmt, node); 526 237 case TypeId::Not: 527 bdd = bdd_not(bdd);238 node = Z3_mk_not(mContext, node); 528 239 break; 529 240 case TypeId::Sel: 530 bdd = bdd_ite(bdd, get(stmt>getOperand(1)), get(stmt>getOperand(2)));241 node = Z3_mk_ite(mContext, operands[0], operands[1], operands[2]); 531 242 break; 243 case TypeId::Advance: 244 return characterize(cast<Advance>(stmt), operands[0]); 532 245 case TypeId::ScanThru: 533 246 // ScanThru(c, m) := (c + m) â§ Â¬m. Thus we can conservatively represent this statement using the BDD 534 247 // for Â¬m  provided no derivative of this statement is negated in any fashion. 535 248 case TypeId::MatchStar: 536 case TypeId::Call: 537 return bdd_ithvar(mVariables++); 538 case TypeId::Advance: 539 return characterize(cast<Advance>(stmt), bdd); 249 case TypeId::Count: 250 return make(stmt); 540 251 default: 541 throwUnexpectedStatementTypeError(stmt); 542 } 543 return bdd_addref(bdd); 544 } 252 handle_unexpected_statement(stmt); 253 } 254 Z3_inc_ref(mContext, node); 255 return add(stmt, node); 256 } 257 545 258 546 259 /**  * 547 260 * @brief characterize 548 261 **  */ 549 inline BDD MultiplexingPass::characterize(Advance * const adv, const BDDIk) {262 inline Z3_ast MultiplexingPass::characterize(Advance * const adv, Z3_ast Ik) { 550 263 const auto k = mAdvanceNegatedVariable.size(); 551 assert (mAdvance[k] == adv); 552 std::vector<bool> unconstrained(k , false); 553 for (unsigned i = 0; i != k; ++i) { 554 555 // Are we interested in testing these streams to see whether they are mutually exclusive? 556 if (exceedsWindowSize(i, k)) continue; 264 265 assert (adv); 266 assert (mConstraintGraph[k] == adv); 267 268 bool unconstrained[k] = {}; 269 270 Z3_solver_push(mContext, mSolver); 271 272 for (size_t i = 0; i < k; ++i) { 557 273 558 274 // Have we already proven that they are unconstrained by their subset relationship? … … 561 277 // If these Advances are mutually exclusive, in the same scope, transitively independent, and shift their 562 278 // values by the same amount, we can safely multiplex them. Otherwise mark the constraint in the graph. 563 const Advance * const ithAdv = mAdvance[i]; 564 if ((mTestConstrainedAdvances  independent(i, k)) && (ithAdv>getOperand(1) == adv>getOperand(1))) { 565 const BDD Ii = get(ithAdv>getOperand(0)); 566 const BDD IiIk = bdd_addref(bdd_and(Ii, Ik)); 279 const Advance * const ithAdv = mConstraintGraph[i]; 280 if (ithAdv>getOperand(1) == adv>getOperand(1)) { 281 282 Z3_ast Ii = get(ithAdv>getOperand(0)); 283 567 284 // Is there any satisfying truth assignment? If not, these streams are mutually exclusive. 568 if (bdd_satone(IiIk) == bdd_zero()) { 285 286 Z3_solver_push(mContext, mSolver); 287 Z3_ast conj[2] = { Ii, Ik }; 288 Z3_ast IiIk = Z3_mk_and(mContext, 2, conj); 289 Z3_inc_ref(mContext, IiIk); 290 Z3_solver_assert(mContext, mSolver, IiIk); 291 if (Z3_solver_check(mContext, mSolver) == Z3_L_FALSE) { 569 292 // If Ai â© Ak = â 570 293 and Aj â Ai, Aj â© Ak = â 571 294 . 572 295 for (auto e : make_iterator_range(in_edges(i, mSubsetGraph))) { 573 const auto j = source(e, mSubsetGraph); 574 if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) { 575 continue; 576 } 577 unconstrained[j] = true; 296 unconstrained[source(e, mSubsetGraph)] = true; 578 297 } 579 298 unconstrained[i] = true; 580 } else if (Ii == IiIk) { 581 // If Ii = Ii â© Ik then Ii â Ik. Record this in the subset graph with the arc (i, k). 299 300 } else if (equals(Ii, IiIk)) { 301 // If Ii = Ii â© Ik then Ii â Ik. Record this in the subset graph with the arc (i, k). 582 302 // Note: the AST will be modified to make these mutually exclusive if Ai and Ak end up in 583 303 // the same multiplexing set. … … 587 307 const auto j = source(e, mSubsetGraph); 588 308 add_edge(j, k, mSubsetGraph); 589 if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {590 continue;591 }592 309 unconstrained[j] = true; 593 310 } 594 311 unconstrained[i] = true; 595 } else if (Ik == IiIk) { 596 // If Ik = Ii â© Ik then Ik â Ii. Record this in the subset graph with the arc (k, i). 312 313 } else if (equals(Ik, IiIk)) { 314 // If Ik = Ii â© Ik then Ik â Ii. Record this in the subset graph with the arc (k, i). 597 315 add_edge(k, i, mSubsetGraph); 598 316 // If Ak â Ai and Ai â Aj, Ak â Aj. … … 600 318 const auto j = target(e, mSubsetGraph); 601 319 add_edge(k, j, mSubsetGraph); 602 if (mSubsetImplicationsAdhereToWindowingSizeConstraint && exceedsWindowSize(j, k)) {603 continue;604 }605 320 unconstrained[j] = true; 606 321 } 607 322 unconstrained[i] = true; 608 323 } 609 bdd_delref(IiIk); 610 } 611 } 612 613 BDD Ak = bdd_ithvar(mVariables++); 614 const BDD Nk = bdd_addref(bdd_not(Ak)); 615 for (unsigned i = 0; i != k; ++i) { 324 325 Z3_dec_ref(mContext, IiIk); 326 Z3_solver_pop(mContext, mSolver, 1); 327 } 328 } 329 330 Z3_solver_pop(mContext, mSolver, 1); 331 332 Z3_ast Ak0 = make(adv); 333 Z3_inc_ref(mContext, Ak0); 334 Z3_ast Nk = Z3_mk_not(mContext, Ak0); 335 Z3_inc_ref(mContext, Nk); 336 337 Z3_ast vars[k + 1]; 338 vars[0] = Ak0; 339 340 unsigned m = 1; 341 for (unsigned i = 0; i < k; ++i) { 616 342 if (unconstrained[i]) { 617 // This algorithm deems two streams mutually exclusive if and only if the conjuntion of their BDDsis a contradiction.343 // This algorithm deems two streams mutually exclusive if and only if their conjuntion is a contradiction. 618 344 // To generate a contradiction when comparing Advances, the BDD of each Advance is represented by the conjunction of 619 345 // variables representing the kth Advance and the negation of all variables for the Advances whose inputs are mutually … … 624 350 // (assuming that the jth and kth Advance are not mutually exclusive.) 625 351 626 const BDD Ni = mAdvanceNegatedVariable[i]; 627 BDD & Ai = get(mAdvance[i]); 628 Ai = bdd_addref(bdd_and(Ai, Nk)); 629 Ak = bdd_addref(bdd_and(Ak, Ni)); 630 if (independent(i, k) && (adv>getParent() == mAdvance[i]>getParent())) { 631 continue; 632 } 633 } 634 add_edge(i, k, false, mConstraintGraph); 352 Z3_ast & Ai0 = get(mConstraintGraph[i]); 353 Z3_ast conj[2] = { Ai0, Nk }; 354 Z3_ast Ai = Z3_mk_and(mContext, 2, conj); 355 Z3_inc_ref(mContext, Ai); 356 Z3_dec_ref(mContext, Ai0); // if this doesn't work, we'll have to scan from the output variables. 357 Ai0 = Ai; 358 assert (get(mConstraintGraph[i]) == Ai); 359 360 vars[m++] = mAdvanceNegatedVariable[i]; 361 362 continue; // note: if these Advances aren't transtively independent, an edge will still exist. 363 } 364 add_edge(i, k, mConstraintGraph); 635 365 } 636 366 // To minimize the number of BDD computations, we store the negated variable instead of negating it each time. 637 367 mAdvanceNegatedVariable.emplace_back(Nk); 638 return Ak; 639 } 640 641 /**  * 642 * @brief independent 643 **  */ 644 inline bool MultiplexingPass::independent(const ConstraintVertex i, const ConstraintVertex j) const { 645 assert (i < num_vertices(mConstraintGraph) && j < num_vertices(mConstraintGraph)); 646 return mConstraintGraph.get_edge(i, j).first == false; 647 } 648 649 /**  * 650 * @brief exceedsWindowSize 651 **  */ 652 inline bool MultiplexingPass::exceedsWindowSize(const ConstraintVertex i, const ConstraintVertex j) const { 653 assert (i < mAdvanceRank.size() && j < mAdvanceRank.size()); 654 return (std::abs<int>(mAdvanceRank[i]  mAdvanceRank[j]) > WindowSize); 368 Z3_ast Ak = Z3_mk_and(mContext, m, vars); 369 if (LLVM_UNLIKELY(Ak != Ak0)) { 370 Z3_inc_ref(mContext, Ak); 371 Z3_dec_ref(mContext, Ak0); 372 } 373 return add(adv, Ak); 374 } 375 376 /**  * 377 * @brief initialize 378 **  */ 379 Statement * MultiplexingPass::initialize(Statement * const initial) { 380 381 // clean up any unneeded refs / characterizations. 382 for (auto i = mCharacterization.begin(); i != mCharacterization.end(); ) { 383 const CharacterizationRef & r = std::get<1>(*i); 384 if (LLVM_UNLIKELY(std::get<1>(r) == 0)) { 385 Z3_dec_ref(mContext, std::get<0>(r)); 386 auto j = i++; 387 mCharacterization.erase(j); 388 } else { 389 ++i; 390 } 391 } 392 393 for (Z3_ast var : mAdvanceNegatedVariable) { 394 Z3_dec_ref(mContext, var); 395 } 396 mAdvanceNegatedVariable.clear(); 397 398 // Scan through and count all the advances and statements ... 399 unsigned statements = 0, advances = 0; 400 Statement * last = nullptr; 401 for (Statement * stmt = initial; stmt; stmt = stmt>getNextNode()) { 402 if (LLVM_UNLIKELY(isa<If>(stmt)  isa<While>(stmt))) { 403 last = stmt; 404 break; 405 } else if (LLVM_UNLIKELY(isa<Advance>(stmt))) { 406 ++advances; 407 } 408 ++statements; 409 } 410 411 flat_map<const PabloAST *, unsigned> M; 412 M.reserve(statements); 413 matrix<bool> G(statements, advances, false); 414 for (unsigned i = 0; i != advances; ++i) { 415 G(i, i) = true; 416 } 417 418 mConstraintGraph = ConstraintGraph(advances); 419 unsigned n = advances; 420 unsigned k = 0; 421 for (Statement * stmt = initial; stmt != last; stmt = stmt>getNextNode()) { 422 assert (!isa<If>(stmt) && !isa<While>(stmt)); 423 unsigned u = 0; 424 if (LLVM_UNLIKELY(isa<Advance>(stmt))) { 425 mConstraintGraph[k] = cast<Advance>(stmt); 426 u = k++; 427 } else { 428 u = n++; 429 } 430 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 431 const PabloAST * const op = stmt>getOperand(i); 432 if (LLVM_LIKELY(isa<Statement>(op))) { 433 auto f = M.find(op); 434 if (f != M.end()) { 435 const unsigned v = std::get<1>(*f); 436 for (unsigned w = 0; w != k; ++w) { 437 G(u, w) = G(v, w); 438 } 439 } 440 } 441 } 442 M.emplace(stmt, u); 443 } 444 445 assert (k == advances); 446 447 // Initialize the base constraint graph by transposing G and removing reflective loops 448 for (unsigned i = 0; i != advances; ++i) { 449 for (unsigned j = 0; j < i; ++j) { 450 if (G(i, j)) { 451 add_edge(j, i, mConstraintGraph); 452 } 453 } 454 for (unsigned j = i + 1; j < advances; ++j) { 455 if (G(i, j)) { 456 add_edge(j, i, mConstraintGraph); 457 } 458 } 459 } 460 461 mSubsetGraph = SubsetGraph(advances); 462 mAdvanceNegatedVariable.reserve(advances); 463 464 return last; 655 465 } 656 466 … … 664 474 665 475 /**  * 666 * @brief generateUsageWeightingGraph667 *668 * Prior to generating our candidate sets, scan through the constraint graph and generate a clique in the usage669 * weighting graph each set of constraintfree Advance nodes that have the same users. We may be able optimize670 * the demultiplexing calculations using range expressions.671 *672 * Note: it'd be preferable to contract vertices in the constraint graph prior to scanning through it but that673 * leaves us with a more difficult problem. Namely, Advance nodes may belong to more than one clique but it'd be674 * useless to compute a value twice; furthermore, we want to avoid generating a multiplexing set whose size is 2^n675 * for any n â â€* but don't want to needlessly limit the size of any clique. Look into this further later.676 **  */677 void MultiplexingPass::generateUsageWeightingGraph() {678 const auto n = num_vertices(mConstraintGraph); assert (n > 2);679 // Let G be the complement of the constraint graph âª the subset graph restricted to only the edges corresponding680 // to pairs of Advances with the same users.681 CliqueGraph G(n);682 for (unsigned i = 0; i != (n  1); ++i) {683 const Advance * const advI = mAdvance[i];684 for (unsigned j = i + 1; j != n; ++j) {685 const Advance * const advJ = mAdvance[j];686 if (LLVM_UNLIKELY(advI>getNumUses() == advJ>getNumUses()) && independent(i, j)) {687 if (LLVM_UNLIKELY(std::equal(advI>user_begin(), advI>user_end(), advJ>user_begin()))) {688 // INVESTIGATE: we should be able to ignore subset relations if these are going to become a689 // range expression. Look into making a proof for it once the range expression calculation690 // is finished.691 if (!(edge(i, j, mSubsetGraph).second  edge(j, i, mSubsetGraph).second)) {692 add_edge(i, j, G);693 }694 }695 }696 }697 }698 if (num_edges(G) > 0) {699 const CliqueSets S = findMaximalCliques(G);700 for (unsigned i = 0; i != n; ++i) {701 clear_vertex(i, G);702 }703 for (const std::vector<CliqueGraph::vertex_descriptor> & C : S) {704 const unsigned m = C.size(); assert (m > 1);705 for (unsigned i = 1; i != m; ++i) {706 for (unsigned j = 0; j != i; ++j) {707 add_edge(C[j], C[i], G);708 }709 }710 }711 }712 mUsageGraph = G;713 }714 715 /**  *716 476 * @brief generateCandidateSets 717 477 **  */ 718 478 bool MultiplexingPass::generateCandidateSets() { 719 479 480 const auto n = mAdvanceNegatedVariable.size(); 481 if (n < 3) { 482 return false; 483 } 484 assert (num_vertices(mConstraintGraph) == n); 485 720 486 Constraints S; 721 487 722 ConstraintGraph::degree_size_type D[n um_vertices(mConstraintGraph)];723 724 mCandidateGraph = CandidateGraph(n um_vertices(mConstraintGraph));488 ConstraintGraph::degree_size_type D[n]; 489 490 mCandidateGraph = CandidateGraph(n); 725 491 726 492 for (unsigned r = Samples; r; r) { … … 773 539 } 774 540 775 #ifdef PRINT_DEBUG_OUTPUT776 const auto n = num_vertices(mConstraintGraph);777 const auto m = num_vertices(mCandidateGraph);778 unsigned sets = 0;779 for (auto i = n; i < m; ++i) {780 if (degree(i, mCandidateGraph) > 0) {781 ++sets;782 }783 }784 LOG("Unique Candidate Sets: " << (sets));785 #endif786 787 541 return num_vertices(mCandidateGraph) > num_vertices(mConstraintGraph); 788 542 } … … 929 683 const size_t n = num_vertices(mCandidateGraph)  m; 930 684 931 std::vector<bool> chosen(n, false);685 bool chosen[n] = {}; 932 686 933 687 for (;;) { … … 942 696 if (LLVM_LIKELY(r >= 3)) { // if this set has at least 3 elements. 943 697 r *= r; 944 AdjIterator begin, end;945 std::tie(begin, end) = adjacent_vertices(t, mCandidateGraph);946 for (auto ei = begin; ei != end; ++ei) {947 for (auto ej = ei; ++ej != end; ) {948 if (edge(*ei, *ej, mUsageGraph).second) {949 ++r;950 }951 }952 }953 698 if (w < r) { 954 699 u = t; … … 989 734 } 990 735 991 if (Samples > 1) {992 removePotentialCycles(u);993 }994 736 } 995 737 … … 1015 757 // we may be able to reduce register pressure. 1016 758 1017 // using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, Statement *, unsigned>; 1018 // using Map = flat_map<const Statement *, typename Graph::vertex_descriptor>; 1019 1020 // const size_t m = num_vertices(mConstraintGraph); 1021 // const size_t n = num_vertices(mMultiplexSetGraph)  m; 1022 1023 // for (unsigned i = 0; i != n; ++i) { 1024 1025 // Map M; 1026 // Graph G = construct<Graph>(block, M); 1027 1028 1029 1030 1031 1032 1033 1034 // } 1035 1036 1037 1038 1039 1040 1041 } 1042 1043 /**  * 1044 * @brief removePotentialCycles 1045 * 1046 * If Samples > 1, our candidate sets were generated by more than one traversal through the constraint DAG. 1047 * Multiplexing disjoint sets generated by differing traversals can induce a cycle in the AST. For example, 1048 * suppose sets {A,B} and {C,D} and A is dependent on C and D on B; multiplexing both will result in a cycle. 1049 * 1050 * Eliminating all potential cycles will likely lead to the removal of many candidate sets. Instead we "fix" 1051 * the candidate sets after the selection of a particular candidate set. 1052 **  */ 1053 void MultiplexingPass::removePotentialCycles(const CandidateGraph::vertex_descriptor i) { 1054 1055 using AdjIterator = graph_traits<CandidateGraph>::adjacency_iterator; 1056 1057 const auto m = num_vertices(mConstraintGraph); 1058 const auto n = num_vertices(mCandidateGraph); 1059 1060 // Suppose we construct a graph G that indicates whether selecting candidate set V will induce a cycle, given 1061 // that we've already chosen candidate set U. This can occur here only because some elements of V are dependent 1062 // on U and vice versa. 1063 1064 // We want the minimal minimum weight feedback arc set of G; however, we also know any edge will either have 1065 // 1066 1067 for (auto j = m; j < n; ++j) { 1068 if (LLVM_UNLIKELY(i == j)) continue; 1069 AdjIterator begin, end; 1070 std::tie(begin, end) = adjacent_vertices(j, mCandidateGraph); 1071 for (auto ui = begin; ui != end; ) { 1072 const auto u = *ui++; 1073 unsigned outgoing = 0; 1074 unsigned incoming = 0; 1075 for (auto v : make_iterator_range(adjacent_vertices(i, mCandidateGraph))) { 1076 if (dependent(u, v)) { 1077 ++outgoing; 1078 } else if (dependent(v, u)) { 1079 ++incoming; 1080 } 1081 } 1082 if (LLVM_UNLIKELY(outgoing > 0 && incoming > 0)) { 1083 remove_edge(j, u, mCandidateGraph); 1084 } 1085 } 1086 } 1087 } 1088 1089 /**  * 1090 * @brief dependent 1091 **  */ 1092 inline bool MultiplexingPass::dependent(const ConstraintVertex i, const ConstraintVertex j) const { 1093 const auto e = mConstraintGraph.get_edge(i, j); 1094 return (e.second && e.first); 759 1095 760 } 1096 761 … … 1129 794 // Afterwards modify the AST to ensure that multiplexing algorithm can ignore any subset constraints 1130 795 for (auto e : make_iterator_range(edges(mSubsetGraph))) { 1131 Advance * const adv1 = m Advance[source(e, mSubsetGraph)];1132 Advance * const adv2 = m Advance[target(e, mSubsetGraph)];796 Advance * const adv1 = mConstraintGraph[source(e, mSubsetGraph)]; 797 Advance * const adv2 = mConstraintGraph[target(e, mSubsetGraph)]; 1133 798 assert (adv1>getParent() == adv2>getParent()); 1134 799 PabloBlock * const pb = adv1>getParent(); … … 1143 808 1144 809 /**  * 810 * @brief dominates 811 * 812 * does Statement a dominate Statement b? 813 **  */ 814 bool dominates(const Statement * const a, const Statement * const b) { 815 816 if (LLVM_UNLIKELY(b == nullptr)) { 817 return true; 818 } else if (LLVM_UNLIKELY(a == nullptr)) { 819 return false; 820 } 821 822 assert (a>getParent()); 823 assert (b>getParent()); 824 825 const PabloBlock * const parent = a>getParent(); 826 if (LLVM_LIKELY(parent == b>getParent())) { 827 for (const Statement * t : *parent) { 828 if (t == a) { 829 return true; 830 } else if (t == b) { 831 break; 832 } 833 } 834 return false; 835 } else { 836 const PabloBlock * block = b>getParent(); 837 for (;;) { 838 Statement * br = block>getBranch(); 839 if (br == nullptr) { 840 return dominates(parent>getBranch(), b); 841 } 842 block = br>getParent(); 843 if (block == parent) { 844 return dominates(a, br); 845 } 846 } 847 } 848 } 849 850 /**  * 1145 851 * @brief multiplexSelectedSets 1146 852 **  */ 1147 void MultiplexingPass::multiplexSelectedSets(PabloFunction & function) { 1148 flat_set<PabloBlock *> modified; 853 inline void MultiplexingPass::multiplexSelectedSets(PabloBlock * const block) { 854 855 856 // Z3_config cfg = Z3_mk_config(); 857 // Z3_context ctx = Z3_mk_context_rc(cfg); 858 // Z3_del_config(cfg); 859 // Z3_solver solver = Z3_mk_solver(ctx); 860 // Z3_solver_inc_ref(ctx, solver); 861 862 1149 863 const auto first_set = num_vertices(mConstraintGraph); 1150 864 const auto last_set = num_vertices(mCandidateGraph); … … 1159 873 // The multiplex set graph is a DAG with edges denoting the set relationships of our independent sets. 1160 874 unsigned i = 0; 1161 for (const auto u : orderMultiplexSet(idx)) {1162 input[i++] = m Advance[u];875 for (const auto u : make_iterator_range(adjacent_vertices(idx, mCandidateGraph))) { // orderMultiplexSet(idx)) { 876 input[i++] = mConstraintGraph[u]; 1163 877 } 1164 878 Advance * const adv = input[0]; 1165 PabloBlock * const block = adv>getParent(); assert (block); 1166 modified.insert(block); 879 assert (block == adv>getParent()); 1167 880 1168 881 circular_buffer<PabloAST *> Q(n); … … 1207 920 } 1208 921 } 1209 for (PabloBlock * block : modified) { 1210 rewriteAST(block); 1211 } 1212 } 1213 1214 /**  * 1215 * @brief orderMultiplexSet 1216 **  */ 1217 inline MultiplexingPass::Candidates MultiplexingPass::orderMultiplexSet(const CandidateGraph::vertex_descriptor u) { 1218 Candidates set; 1219 set.reserve(degree(u, mCandidateGraph)); 1220 for (const auto e : make_iterator_range(adjacent_vertices(u, mCandidateGraph))) { 1221 set.push_back(e); 1222 } 1223 std::sort(set.begin(), set.end()); 1224 Candidates clique; 1225 Candidates result; 1226 result.reserve(degree(u, mCandidateGraph)); 1227 while (set.size() > 0) { 1228 const auto v = *set.begin(); 1229 clique.push_back(v); 1230 set.erase(set.begin()); 1231 for (const auto w : make_iterator_range(adjacent_vertices(v, mUsageGraph))) { 1232 auto f = std::lower_bound(set.begin(), set.end(), w); 1233 // Is w in our multiplexing set? 1234 if (f == set.end()  *f != w) { 1235 continue; 1236 } 1237 // Is our subgraph still a clique after adding w to it? 1238 bool valid = true; 1239 for (const auto y : clique) { 1240 if (!edge(w, y, mUsageGraph).second) { 1241 valid = false; 1242 break; 1243 } 1244 } 1245 if (valid) { 1246 clique.push_back(w); 1247 set.erase(f); 1248 } 1249 } 1250 result.insert(result.end(), clique.begin(), clique.end()); 1251 clique.clear(); 1252 } 1253 return result; 1254 } 1255 1256 1257 1258 1259 /**  * 1260 * @brief rewriteAST 1261 * 1262 * Multiplexing ignores defuse ordering when muxing and demuxing the Advances; this will likely lead to an illegal 1263 * ordering but, by virtue of the multiplexing algorithm, some ordering of the IR must be legal. However, an 1264 * arbritary topological ordering will likely lead to poor performance due to excessive register spills; this 1265 * algorithm attempts to mitigate this by using a simple bottomup ordering scheme. 1266 **  */ 1267 void MultiplexingPass::rewriteAST(PabloBlock * const block) { 1268 1269 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, Statement *>; 1270 using Vertex = Graph::vertex_descriptor; 1271 using ReadySet = std::vector<Vertex>; 1272 using TypeId = PabloAST::ClassTypeId; 1273 1274 Graph G = construct<Graph>(block); 1275 1276 1277 std::vector<unsigned> rank(num_vertices(G), 0); 1278 1279 { 1280 circular_buffer<Vertex> Q(num_vertices(G)); 1281 // Compute the rank of each statement 1282 for (const Vertex u : make_iterator_range(vertices(G))) { 1283 if (out_degree(u, G) == 0 && rank[u] == 0) { 1284 Q.push_back(u); 1285 } 1286 } 1287 1288 while (Q.size() > 0) { 1289 1290 const Vertex u = Q.front(); 1291 Q.pop_front(); 1292 1293 assert (rank[u] == 0); 1294 1295 unsigned work = 0; 1296 switch (G[u]>getClassTypeId()) { 1297 case TypeId::And: 1298 case TypeId::Or: 1299 case TypeId::Xor: 1300 work = 2; 1301 break; 1302 case TypeId::Not: 1303 case TypeId::Assign: 1304 case TypeId::Next: 1305 work = 1; 1306 break; 1307 case TypeId::Sel: 1308 work = 6; 1309 break; 1310 case TypeId::Advance: 1311 case TypeId::ScanThru: 1312 work = 33; 1313 break; 1314 case TypeId::MatchStar: 1315 work = 51; 1316 break; 1317 case TypeId::If: 1318 case TypeId::While: 1319 case TypeId::Call: 1320 work = 10000; // < try to push If, While and Call nodes as high as possible 1321 break; 1322 default: break; 1323 } 1324 1325 unsigned r = 0; 1326 for (const auto e : make_iterator_range(out_edges(u, G))) { 1327 r = std::max(r, rank[target(e, G)]); 1328 } 1329 1330 rank[u] = work + r; 1331 1332 for (const auto ei : make_iterator_range(in_edges(u, G))) { 1333 const auto v = source(ei, G); 1334 assert (rank[v] == 0); 1335 bool ready = true; 1336 for (const auto ej : make_iterator_range(out_edges(v, G))) { 1337 if (rank[target(ej, G)] == 0) { 1338 ready = false; 922 923 flat_set<PabloAST *> encountered; 924 for (Statement * stmt = block>front(); stmt; ) { 925 926 assert (stmt>getParent() == block); 927 Statement * const next = stmt>getNextNode(); 928 929 bool unmoved = true; 930 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 931 PabloAST * const op = stmt>getOperand(i); 932 if (isa<Statement>(op)) { 933 Statement * ip = cast<Statement>(op); 934 if (ip>getParent() != block) { 935 // If we haven't already encountered the Assign or Next node, it must come from a If or 936 // While node that we haven't processed yet. Scan ahead and try to locate it. 937 if (isa<Assign>(op)) { 938 for (PabloAST * user : cast<Assign>(op)>users()) { 939 if (isa<If>(user) && cast<If>(user)>getParent() == block) { 940 const auto & defs = cast<If>(user)>getDefined(); 941 if (LLVM_LIKELY(std::find(defs.begin(), defs.end(), op) != defs.end())) { 942 ip = cast<If>(user); 943 break; 944 } 945 } 946 } 947 } else if (isa<Next>(op)) { 948 for (PabloAST * user : cast<Next>(op)>users()) { 949 if (isa<While>(user) && cast<While>(user)>getParent() == block) { 950 const auto & vars = cast<While>(user)>getVariants(); 951 if (LLVM_LIKELY(std::find(vars.begin(), vars.end(), op) != vars.end())) { 952 ip = cast<While>(user); 953 break; 954 } 955 } 956 } 957 } 958 } 959 if (encountered.count(ip) == 0) { 960 if (dominates(ip, stmt)) { 961 encountered.insert(ip); 962 } else { 963 assert (ip>getParent() == block); 964 stmt>insertAfter(ip); 965 unmoved = false; 1339 966 break; 1340 967 } 1341 968 } 1342 if (ready) { 1343 Q.push_back(v); 1344 } 1345 } 1346 1347 } 1348 } 1349 1350 // Compute the initial ready set 1351 ReadySet readySet; 1352 for (const Vertex u : make_iterator_range(vertices(G))) { 1353 if (in_degree(u, G) == 0) { 1354 readySet.emplace_back(u); 1355 } 1356 } 1357 1358 auto by_nonincreasing_rank = [&rank](const Vertex u, const Vertex v) > bool { 1359 return rank[u] > rank[v]; 1360 }; 1361 1362 std::sort(readySet.begin(), readySet.end(), by_nonincreasing_rank); 1363 1364 block>setInsertPoint(nullptr); 1365 // Rewrite the AST using the bottomup ordering 1366 while (readySet.size() > 0) { 1367 // Scan through the ready set to determine which one 'kills' the greatest number of inputs 1368 double best = 0.0; 1369 auto rk = readySet.begin(); 1370 for (auto ri = readySet.begin(); ri != readySet.end(); ++ri) { 1371 double p = 0.0; 1372 assert (rank[*ri] != 0); 1373 for (auto ei : make_iterator_range(in_edges(*ri, G))) { 1374 const auto v = source(ei, G); 1375 unsigned unscheduled = 0; 1376 for (auto ej : make_iterator_range(out_edges(v, G))) { 1377 if (rank[target(ej, G)] != 0) { // if this edge leads to an unscheduled statement 1378 ++unscheduled; 1379 } 1380 } 1381 assert (unscheduled > 0); 1382 assert (unscheduled <= out_degree(v, G)); 1383 const double uses = out_degree(v, G); 1384 p += std::pow((uses  (double)(unscheduled  1)) / uses, 2); 1385 } 1386 if (p > best) { 1387 rk = ri; 1388 best = p; 1389 } 1390 } 1391 const auto u = *rk; 1392 readySet.erase(rk); 1393 // Write the statement back to the AST ... 1394 block>insert(G[u]); 1395 // ... and mark it as written 1396 rank[u] = 0; 1397 // Now check whether any new statements are ready 1398 for (auto ei : make_iterator_range(out_edges(u, G))) { 1399 bool ready = true; 1400 const auto v = target(ei, G); 1401 assert (rank[v] != 0); 1402 for (auto ej : make_iterator_range(in_edges(v, G))) { 1403 if (rank[source(ej, G)] != 0) { 1404 ready = false; 1405 break; 1406 } 1407 } 1408 if (ready) { 1409 readySet.insert(std::lower_bound(readySet.begin(), readySet.end(), v, by_nonincreasing_rank), v); 1410 assert (std::is_sorted(readySet.cbegin(), readySet.cend(), by_nonincreasing_rank)); 1411 } 1412 } 1413 } 969 } 970 } 971 if (unmoved) { 972 encountered.insert(stmt); 973 } 974 stmt = next; 975 } 976 977 // Z3_solver_dec_ref(ctx, solver); 978 // Z3_del_context(ctx); 979 1414 980 1415 981 } … … 1446 1012 } 1447 1013 1448 1449 /**  *1450 * @brief findMaximalCliques1451 *1452 * Adaptation of the BronKerbosch algorithm.1453 **  */1454 inline MultiplexingPass::CliqueSets MultiplexingPass::findMaximalCliques(const CliqueGraph & G) {1455 CliqueSets S;1456 const auto n = num_vertices(G);1457 std::vector<CliqueGraph::vertex_descriptor> ordering;1458 ordering.reserve(n);1459 for (auto u : make_iterator_range(vertices(G))) {1460 if (degree(u, G)) {1461 ordering.push_back(u);1462 }1463 }1464 CliqueSet R;1465 CliqueSet P(ordering.begin(), ordering.end());1466 CliqueSet X;1467 X.reserve(ordering.size());1468 // compute a degeneracy ordering of G1469 std::sort(ordering.begin(), ordering.end(), [&G](const CliqueGraph::vertex_descriptor i, const CliqueGraph::vertex_descriptor j){ return degree(i, G) < degree(j, G); });1470 for (auto v : ordering) {1471 R.insert(v);1472 CliqueSet PN, XN;1473 for (const auto u : make_iterator_range(adjacent_vertices(v, G))) {1474 if (P.count(u)) PN.insert(u);1475 if (X.count(u)) XN.insert(u);1476 }1477 findMaximalCliques(G, R, std::move(PN), std::move(XN), S); // ({v}, P â© N(v), X â© N(v))1478 R.clear();1479 P.erase(v);1480 X.insert(v);1481 }1482 return S;1483 }1484 1485 /**  *1486 * @brief findMaximalCliques1487 **  */1488 void MultiplexingPass::findMaximalCliques(const CliqueGraph & G, CliqueSet & R, CliqueSet && P, CliqueSet && X, CliqueSets & S) {1489 if (LLVM_UNLIKELY(P.empty() && X.empty())) { // Report R as a maximal clique1490 S.emplace(R.begin(), R.end());1491 } else {1492 // choose the pivot vertex u in P âª X as the vertex with the highest number of neighbors in P (Tomita et al. 2006.)1493 CliqueSet N;1494 CliqueGraph::degree_size_type size = 0;1495 for (const CliqueGraph::vertex_descriptor u : P) {1496 if (degree(u, G) >= size) {1497 CliqueGraph::degree_size_type neighbours = 0;1498 for (const CliqueGraph::vertex_descriptor v : make_iterator_range(adjacent_vertices(u, G))) {1499 neighbours += P.count(v);1500 }1501 if (size <= neighbours) {1502 if (size < neighbours) {1503 size = neighbours;1504 N.clear();1505 }1506 N.insert(u);1507 }1508 }1509 }1510 for (const CliqueGraph::vertex_descriptor u : X) {1511 if (degree(u, G) >= size) {1512 CliqueGraph::degree_size_type neighbours = 0;1513 for (const CliqueGraph::vertex_descriptor v : make_iterator_range(adjacent_vertices(u, G))) {1514 neighbours += P.count(v);1515 }1516 if (size <= neighbours) {1517 if (size < neighbours) {1518 size = neighbours;1519 N.clear();1520 }1521 N.insert(u);1522 }1523 }1524 }1525 const CliqueGraph::vertex_descriptor u = *(N.nth(IntDistribution(0, N.size()  1)(mRNG)));1526 // for each vertex v in P \ N(u):1527 for (auto v = P.begin(); v != P.end(); v = P.erase(v)) {1528 if (edge(u, *v, G).second) continue;1529 const bool added = R.insert(*v).second;1530 CliqueSet PN, XN;1531 for (const CliqueGraph::vertex_descriptor u : make_iterator_range(adjacent_vertices(*v, G))) {1532 if (P.count(u)) PN.insert(u);1533 if (X.count(u)) XN.insert(u);1534 }1535 findMaximalCliques(G, R, std::move(PN), std::move(XN), S); // (R âª {v}, P â© N(v), X â© N(v))1536 if (LLVM_LIKELY(added)) R.erase(*v);1537 X.insert(*v);1538 }1539 }1540 }1541 1542 1014 /**  * 1543 1015 * @brief get 1544 1016 **  */ 1545 inline BDD & MultiplexingPass::get(const PabloAST * const expr) {1017 inline Z3_ast & MultiplexingPass::get(const PabloAST * const expr, const bool deref) { 1546 1018 assert (expr); 1547 1019 auto f = mCharacterization.find(expr); 1548 1020 assert (f != mCharacterization.end()); 1549 return f>second; 1550 } 1551 1552 /**  * 1553 * @brief computeDAG 1554 **  */ 1555 template<typename Graph, typename Map> 1556 Graph construct(PabloBlock * const block, Map & M) { 1557 1558 using Vertex = typename Graph::vertex_descriptor; 1559 1560 const auto size = std::distance(block>begin(), block>end()); 1561 1562 Graph G(size); 1563 M.reserve(size); 1564 1565 Vertex u = 0; 1566 for (Statement * stmt : *block ) { 1567 G[u] = stmt; 1568 M.emplace(stmt, u); 1569 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1570 for (Assign * def : cast<If>(stmt)>getDefined()) { 1571 M.emplace(def, u); 1572 } 1573 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1574 for (Next * var : cast<While>(stmt)>getVariants()) { 1575 M.emplace(var, u); 1576 } 1577 } 1578 ++u; 1579 } 1580 1581 /// The following is a lamda function that adds any users of "stmt" to the graph after resolving 1582 /// which vertex it maps to w.r.t. the current block. 1583 auto addUsers = [&](const Vertex u, const Statement * const stmt) > void { 1584 for (const PabloAST * user : stmt>users()) { 1585 if (LLVM_LIKELY(isa<Statement>(user))) { 1586 const Statement * use = cast<Statement>(user); 1587 auto f = M.find(use); 1588 if (LLVM_UNLIKELY(f == M.end())) { 1589 const PabloBlock * parent = use>getParent(); 1590 for (;;) { 1591 if (parent == block) { 1592 break; 1593 } 1594 use = parent>getBranch(); 1595 parent = parent>getParent(); 1596 if (parent == nullptr) { 1597 return; 1598 } 1599 } 1600 f = M.find(use); 1601 assert (f != M.end()); 1602 M.emplace(use, f>second); 1603 } 1604 const auto v = f>second; 1605 if (LLVM_UNLIKELY(u != v)) { 1606 add_edge(u, v, G); 1607 } 1608 } 1609 } 1610 }; 1611 1612 u = 0; 1613 for (Statement * stmt : *block ) { 1614 1615 for (unsigned i = 0; i != stmt>getNumOperands(); ++i) { 1616 PabloAST * const op = stmt>getOperand(i); 1617 if (isa<Statement>(op)) { 1618 auto f = M.find(cast<Statement>(op)); 1619 if (f != M.end()) { 1620 add_edge(f>second, u, G); 1621 } 1622 } 1623 } 1624 1625 if (LLVM_UNLIKELY(isa<If>(stmt))) { 1626 for (Assign * def : cast<If>(stmt)>getDefined()) { 1627 addUsers(u, def); 1628 } 1629 } else if (LLVM_UNLIKELY(isa<While>(stmt))) { 1630 for (Next * var : cast<While>(stmt)>getVariants()) { 1631 addUsers(u, var); 1632 } 1633 } else { 1634 addUsers(u, stmt); 1635 } 1636 1637 ++u; 1638 } 1639 1640 #ifndef NDEBUG 1641 std::vector<Vertex> nothing; 1642 topological_sort(G, std::back_inserter(nothing)); 1643 #endif 1644 1645 return G; 1646 } 1647 1648 1649 /**  * 1650 * @brief computeDAG 1651 **  */ 1652 template<typename Graph> 1653 Graph construct(PabloBlock * const block) { 1654 using Map = flat_map<const Statement *, typename Graph::vertex_descriptor>; 1655 Map M; 1656 return construct<Graph, Map>(block, M); 1657 } 1021 auto & val = f>second; 1022 if (deref) { 1023 unsigned & refs = std::get<1>(val); 1024 assert (refs > 0); 1025 refs; 1026 } 1027 return std::get<0>(val); 1028 } 1029 1030 /**  * 1031 * @brief make 1032 **  */ 1033 inline Z3_ast MultiplexingPass::make(const PabloAST * const expr) { 1034 assert (expr); 1035 Z3_sort ty = Z3_mk_bool_sort(mContext); 1036 Z3_symbol s = Z3_mk_string_symbol(mContext, nullptr); // expr>getName()>to_string().c_str() 1037 Z3_ast node = Z3_mk_const(mContext, s, ty); 1038 Z3_inc_ref(mContext, node); 1039 return add(expr, node); 1040 } 1041 1042 /**  * 1043 * @brief add 1044 **  */ 1045 inline Z3_ast MultiplexingPass::add(const PabloAST * const expr, Z3_ast node) { 1046 mCharacterization.insert(std::make_pair(expr, std::make_pair(node, expr>getNumUses()))); 1047 return node; 1048 } 1049 1050 /**  * 1051 * @brief constructor 1052 **  */ 1053 inline MultiplexingPass::MultiplexingPass(PabloFunction & f, const RNG::result_type seed, Z3_context context, Z3_solver solver) 1054 : mContext(context) 1055 , mSolver(solver) 1056 , mFunction(f) 1057 , mRNG(seed) 1058 , mConstraintGraph(0) 1059 { 1060 1061 } 1062 1658 1063 1659 1064 } // end of namespace pablo 
icGREP/icgrepdevel/icgrep/pablo/optimizers/pablo_automultiplexing.hpp
r4983 r5112 5 5 #include <util/slab_allocator.h> 6 6 #include <boost/graph/adjacency_list.hpp> 7 #include <boost/type_traits/ice.hpp> 7 8 #include <boost/graph/adjacency_matrix.hpp> 8 9 #include <boost/container/flat_set.hpp> … … 13 14 #include <llvm/ADT/DenseMap.h> 14 15 #include <llvm/ADT/DenseSet.h> 15 16 typedef int BDD; 16 #include <llvm/ADT/SmallVector.h> 17 #include <z3.h> 18 #include <stack> 17 19 18 20 namespace pablo { … … 23 25 class MultiplexingPass { 24 26 25 using CharacterizationMap = llvm::DenseMap<const PabloAST *, BDD>; 27 using CharacterizationRef = std::pair<Z3_ast, unsigned>; 28 using CharacterizationMap = llvm::DenseMap<const PabloAST *, CharacterizationRef>; 26 29 27 using ConstraintGraph = boost::adjacency_matrix<boost::directedS, boost::no_property, bool>;30 using ConstraintGraph = boost::adjacency_matrix<boost::directedS, Advance *>; 28 31 using ConstraintVertex = ConstraintGraph::vertex_descriptor; 29 32 using Constraints = std::vector<ConstraintVertex>; 33 using ConstraintMap = boost::container::flat_map<Advance *, ConstraintVertex>; 30 34 31 35 using RNG = std::mt19937; … … 43 47 using AdvanceVector = std::vector<Advance *>; 44 48 using AdvanceRank = std::vector<int>; 45 using AdvanceVariable = std::vector< BDD>;49 using AdvanceVariable = std::vector<Z3_ast>; 46 50 47 51 public: 48 52 49 static bool optimize(PabloFunction & function, const bool independent = false); 50 #ifdef PRINT_TIMING_INFORMATION 51 using seed_t = RNG::result_type; 52 static seed_t SEED; 53 static unsigned NODES_ALLOCATED; 54 static unsigned NODES_USED; 55 #endif 53 static bool optimize(PabloFunction & function); 54 56 55 protected: 57 56 58 unsigned initialize(PabloFunction & function, const bool independent); 59 void initializeBaseConstraintGraph(PabloBlock * const block, const unsigned statements, const unsigned advances); 60 void initializeAdvanceDepth(PabloBlock * const block, const unsigned advances) ; 57 unsigned initialize(PabloFunction & function); 58 Statement * initialize(Statement * const initial); 61 59 60 void reset(); 61 62 void characterize(PabloFunction & function); 62 63 void characterize(PabloBlock * const block); 63 BDD characterize(Statement * const stmt); 64 BDD characterize(Advance * const adv, const BDD Ik); 65 bool independent(const ConstraintVertex i, const ConstraintVertex j) const; 66 bool exceedsWindowSize(const ConstraintVertex i, const ConstraintVertex j) const; 67 68 void generateUsageWeightingGraph(); 69 CliqueSets findMaximalCliques(const CliqueGraph & G); 70 void findMaximalCliques(const CliqueGraph & G, CliqueSet & R, CliqueSet && P, CliqueSet && X, CliqueSets & S); 64 Z3_ast characterize(Statement * const stmt); 65 Z3_ast characterize(Advance * const adv, Z3_ast Ik); 66 void multiplex(PabloBlock * const block); 71 67 72 68 bool generateCandidateSets(); … … 78 74 void selectMultiplexSetsWorkingSet(); 79 75 80 void removePotentialCycles(const CandidateGraph::vertex_descriptor u);81 bool dependent(const ConstraintVertex i, const ConstraintVertex j) const;82 83 76 void eliminateSubsetConstraints(); 84 77 void doTransitiveReductionOfSubsetGraph(); 85 78 86 Candidates orderMultiplexSet(const CandidateGraph::vertex_descriptor u); 87 void multiplexSelectedSets(PabloFunction & function); 79 void multiplexSelectedSets(PabloBlock * const block); 88 80 89 static void rewriteAST(PabloBlock * const block);90 81 91 BDD & get(const PabloAST * const expr); 82 Z3_ast make(const PabloAST * const expr); 83 Z3_ast add(const PabloAST * const expr, Z3_ast node); 84 Z3_ast & get(const PabloAST * const expr, const bool deref = false); 85 bool equals(Z3_ast a, Z3_ast b); 92 86 93 inline MultiplexingPass(const RNG::result_type seed) 94 : mTestConstrainedAdvances(true) 95 , mSubsetImplicationsAdhereToWindowingSizeConstraint(false) 96 , mVariables(0) 97 , mRNG(seed) 98 , mConstraintGraph(0) 99 , mAdvance(0, nullptr) 100 , mAdvanceRank(0, 0) 101 , mAdvanceNegatedVariable(0, 0) 102 { 103 104 } 87 MultiplexingPass(PabloFunction & f, const RNG::result_type seed, Z3_context context, Z3_solver solver); 105 88 106 89 private: 107 const bool mTestConstrainedAdvances; 108 const bool mSubsetImplicationsAdhereToWindowingSizeConstraint; 109 unsigned mVariables; 90 91 Z3_context mContext; 92 Z3_solver mSolver; 93 PabloFunction & mFunction; 110 94 RNG mRNG; 95 111 96 CharacterizationMap mCharacterization; 112 ConstraintGraph mConstraintGraph; 113 AdvanceVector mAdvance; 114 AdvanceRank mAdvanceRank; 97 ConstraintGraph mConstraintGraph; 98 115 99 AdvanceVariable mAdvanceNegatedVariable; 116 100 SubsetGraph mSubsetGraph; 117 CliqueGraph mUsageGraph;118 101 CandidateGraph mCandidateGraph; 119 102 };
Note: See TracChangeset
for help on using the changeset viewer.