Ignore:
Timestamp:
Mar 7, 2016, 3:37:30 PM (3 years ago)
Author:
nmedfort
Message:

Initial modifications to Pablo Compiler and Kernel Builder to support circular buffers for Lookahead.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4954 r4959  
    7878static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
    7979static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
    80 static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
    81                                             "These options control printing of intermediate Pablo code.");
     80
     81static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code.");
    8282
    8383static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
    8484static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
    8585static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
     86static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions));
     87
     88static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
     89
     90
     91static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
     92                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
    8693
    8794static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
    8895
    89 static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
    90                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
     96static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false),
     97                                     cl::desc("Disable Pablo Simplification pass (not recommended)"),
    9198                                     cl::cat(cPabloOptimizationsOptions));
     99
    92100static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
    93101                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
     
    104112                                         cl::desc("coalesce associative functions prior to optimization passes."),
    105113                                         cl::cat(cPabloOptimizationsOptions));
     114
    106115static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
    107                                          cl::desc("apply distribution law optimization."),
     116                                         cl::desc("apply distribution law optimization prior to multiplexing."),
    108117                                         cl::cat(cPabloOptimizationsOptions));
     118
    109119static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
    110                                          cl::desc("apply distribution law optimization."),
     120                                         cl::desc("apply distribution law optimization after multiplexing."),
     121                                         cl::cat(cPabloOptimizationsOptions));
     122
     123static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false),
     124                                         cl::desc("apply pre-pass scheduling prior to LLVM IR generation."),
    111125                                         cl::cat(cPabloOptimizationsOptions));
    112126#endif
     
    162176}
    163177
     178#ifdef PRINT_TIMING_INFORMATION
     179#define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
     180#else
     181#define READ_CYCLE_COUNTER(name)
     182#endif
     183
     184#ifdef PRINT_TIMING_INFORMATION
     185unsigned COUNT_STATEMENTS(const PabloFunction * const entry) {
     186    std::stack<const Statement *> scope;
     187    unsigned statements = 0;
     188    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     189    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     190        while ( stmt ) {
     191            ++statements;
     192            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     193                // Set the next statement to be the first statement of the inner scope and push the
     194                // next statement of the current statement into the scope stack.
     195                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     196                scope.push(stmt->getNextNode());
     197                stmt = nested->front();
     198                assert (stmt);
     199                continue;
     200            }
     201            stmt = stmt->getNextNode();
     202        }
     203        if (scope.empty()) {
     204            break;
     205        }
     206        stmt = scope.top();
     207        scope.pop();
     208    }
     209    return statements;
     210}
     211
     212unsigned COUNT_ADVANCES(const PabloFunction * const entry) {
     213
     214    std::stack<const Statement *> scope;
     215    unsigned advances = 0;
     216
     217    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     218    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     219        while ( stmt ) {
     220            if (isa<Advance>(stmt)) {
     221                ++advances;
     222            }
     223            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     224                // Set the next statement to be the first statement of the inner scope and push the
     225                // next statement of the current statement into the scope stack.
     226                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     227                scope.push(stmt->getNextNode());
     228                stmt = nested->front();
     229                assert (stmt);
     230                continue;
     231            }
     232            stmt = stmt->getNextNode();
     233        }
     234        if (scope.empty()) {
     235            break;
     236        }
     237        stmt = scope.top();
     238        scope.pop();
     239    }
     240    return advances;
     241}
     242
     243using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
     244
     245DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) {
     246    std::stack<const Statement *> scope;
     247    DistributionMap distribution;
     248    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     249    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     250        while ( stmt ) {
     251            if (isa<Variadic>(stmt)) {
     252                auto f = distribution.find(stmt->getNumOperands());
     253                if (f == distribution.end()) {
     254                    distribution.emplace(stmt->getNumOperands(), 1);
     255                } else {
     256                    f->second += 1;
     257                }
     258            }
     259            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     260                // Set the next statement to be the first statement of the inner scope and push the
     261                // next statement of the current statement into the scope stack.
     262                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     263                scope.push(stmt->getNextNode());
     264                stmt = nested->front();
     265                assert (stmt);
     266                continue;
     267            }
     268            stmt = stmt->getNextNode();
     269        }
     270        if (scope.empty()) {
     271            break;
     272        }
     273        stmt = scope.top();
     274        scope.pop();
     275    }
     276    return distribution;
     277}
     278#endif
     279
    164280void pablo_function_passes(PabloFunction * function) {
    165281    // Scan through the pablo code and perform DCE and CSE
    166     if (!DisablePabloCSE) {
     282
     283#ifdef PRINT_TIMING_INFORMATION
     284    timestamp_t simplification_start = 0, simplification_end = 0;
     285    timestamp_t coalescing_start = 0, coalescing_end = 0;
     286    timestamp_t sinking_start = 0, sinking_end = 0;
     287    timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
     288    timestamp_t multiplexing_start = 0, multiplexing_end = 0;
     289    timestamp_t post_distribution_start = 0, post_distribution_end = 0;
     290    timestamp_t lowering_start = 0, lowering_end = 0;
     291    timestamp_t scheduling_start = 0, scheduling_end = 0;
     292    DistributionMap distribution;
     293    const timestamp_t optimization_start = read_cycle_counter();
     294#endif
     295    if (!DisableSimplification) {
     296        READ_CYCLE_COUNTER(simplification_start);
    167297        Simplifier::optimize(*function);
     298        READ_CYCLE_COUNTER(simplification_end);
    168299    }
    169300#ifdef ENABLE_MULTIPLEXING
    170     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
    171         FlattenAssociativeDFG::transform(*function);
     301    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     302        READ_CYCLE_COUNTER(coalescing_start);
     303        CanonicalizeDFG::transform(*function);
     304        READ_CYCLE_COUNTER(coalescing_end);
     305    }
     306    if (EnablePreDistribution) {
     307        READ_CYCLE_COUNTER(pre_distribution_start);
     308        DistributivePass::optimize(*function);
     309        READ_CYCLE_COUNTER(pre_distribution_end);
     310    }
     311    if (EnableMultiplexing) {
     312        READ_CYCLE_COUNTER(multiplexing_start);
     313        MultiplexingPass::optimize(*function);
     314        READ_CYCLE_COUNTER(multiplexing_end);
     315        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     316            CanonicalizeDFG::transform(*function);
     317        }
     318    }
     319    if (EnablePostDistribution) {
     320        READ_CYCLE_COUNTER(post_distribution_start);
     321        DistributivePass::optimize(*function);
     322        READ_CYCLE_COUNTER(post_distribution_end);
    172323    }
    173324#endif
    174325    if (PabloSinkingPass) {
     326        READ_CYCLE_COUNTER(sinking_start);
    175327        CodeMotionPass::optimize(*function);
    176     }
    177 #ifdef ENABLE_MULTIPLEXING   
    178     if (EnablePreDistribution) {
    179         DistributivePass::optimize(*function);
    180     }
    181     if (EnableMultiplexing) {
    182         MultiplexingPass::optimize(*function);
    183     }
    184     if (EnablePostDistribution) {
    185         DistributivePass::optimize(*function);
    186     }
    187     SchedulingPrePass::optimize(*function);
     328        READ_CYCLE_COUNTER(sinking_end);
     329    }
     330#ifdef ENABLE_MULTIPLEXING
    188331    if (PrintUnloweredCode) {
    189332        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     
    192335        PabloPrinter::print(*function, cerr);
    193336    }
    194     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
     337    #ifdef PRINT_TIMING_INFORMATION
     338    distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
     339    #endif
     340    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     341        READ_CYCLE_COUNTER(lowering_start);
    195342        FactorizeDFG::transform(*function);
    196     }
     343        READ_CYCLE_COUNTER(lowering_end);
     344    }
     345    if (EnablePrePassScheduling) {
     346        READ_CYCLE_COUNTER(scheduling_start);
     347        SchedulingPrePass::optimize(*function);
     348        READ_CYCLE_COUNTER(scheduling_end);
     349    }
     350#endif
     351#ifdef PRINT_TIMING_INFORMATION
     352    const timestamp_t optimization_end = read_cycle_counter();
    197353#endif
    198354    if (PrintOptimizedREcode) {
    199         PabloVerifier::verify(*function, "post-optimization");
    200         //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    201         llvm::raw_os_ostream cerr(std::cerr);
    202         cerr << "Final Pablo AST:\n";
    203         PabloPrinter::print(*function, cerr);
    204     }
     355        if (PabloOutputFilename.empty()) {
     356            //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     357            llvm::raw_os_ostream cerr(std::cerr);
     358            cerr << "Final Pablo AST:\n";
     359            PabloPrinter::print(*function, cerr);
     360        } else {
     361            std::error_code error;
     362            llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None);
     363            PabloPrinter::print(*function, out);
     364        }
     365    }
     366#ifdef PRINT_TIMING_INFORMATION
     367    std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl;
     368    std::cerr << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl;
     369    std::cerr << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl;
     370    std::cerr << "  SINKING TIME: " << (sinking_end - sinking_start) << std::endl;
     371    std::cerr << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl;
     372    std::cerr << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl;
     373    std::cerr << "  MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl;
     374    std::cerr << "  MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl;
     375    std::cerr << "  MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl;
     376    std::cerr << "  LOWERING TIME: " << (lowering_end - lowering_start) << std::endl;
     377    std::cerr << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl;
     378    std::cerr << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl;
     379    std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl;
     380    std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl;
     381    std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
     382    bool join = false;
     383    for (auto dist : distribution) {
     384        if (join) {
     385            std::cerr << ';';
     386        }
     387        std::cerr << dist.first << '|' << dist.second;
     388        join = true;
     389    }
     390    std::cerr << std::endl;
     391#endif
    205392}
    206393
     
    242429    builder.setErrorStr(&errMessage);
    243430    builder.setMCPU(sys::getHostCPUName());
    244     builder.setOptLevel(CodeGenOpt::Level::None);
     431    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
     432    switch (OptLevel) {
     433        case '0': optLevel = CodeGenOpt::None; break;
     434        case '1': optLevel = CodeGenOpt::Less; break;
     435        case '2': optLevel = CodeGenOpt::Default; break;
     436        case '3': optLevel = CodeGenOpt::Aggressive; break;
     437        default: errs() << OptLevel << " is an invalid optimization level.\n";
     438    }
     439    builder.setOptLevel(optLevel);
    245440
    246441    if (!DisableAVX2 && (strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
Note: See TracChangeset for help on using the changeset viewer.