Ignore:
Timestamp:
Jan 21, 2016, 5:15:33 PM (3 years ago)
Author:
nmedfort
Message:

Work on lowering + some timing and papi information that will be cleaned up later.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4909 r4919  
    2222#include <llvm/Support/TargetSelect.h>
    2323#include <llvm/Support/Host.h>
     24#include <llvm/Support/FileSystem.h>
     25
    2426
    2527#include <IDISA/idisa_avx_builder.h>
     
    5355#include <pablo/printer_pablos.h>
    5456
    55 #include "do_grep.h"
     57#include <hrtime.h>
     58#include <do_grep.h>
    5659
    5760using namespace pablo;
     
    6568static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
    6669static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
    67 static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
    68                                             "These options control printing of intermediate Pablo code.");
     70static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code.");
    6971
    7072static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
    7173static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
    7274static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
     75static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions));
    7376
    7477static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
    7578
    76 static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
    77                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
     79static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false),
     80                                     cl::desc("Disable Pablo Simplification pass (not recommended)"),
    7881                                     cl::cat(cPabloOptimizationsOptions));
     82
    7983static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
    8084                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
    8185                                      cl::cat(cPabloOptimizationsOptions));
    8286
     87static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
     88
     89
     90static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
     91                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
     92
    8393#ifdef ENABLE_MULTIPLEXING
    8494static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions));
     
    91101                                        cl::desc("maximum size of any candidate multiplexing set."),
    92102                                        cl::cat(cPabloOptimizationsOptions));
     103
    93104static cl::opt<unsigned> MultiplexingSelectionLimit("multiplexing-selection-limit", cl::init(100),
    94105                                        cl::desc("maximum number of selections from any partial candidate multiplexing set."),
    95106                                        cl::cat(cPabloOptimizationsOptions));
     107
    96108static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(1),
    97109                                        cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."),
     
    101113                                         cl::desc("coalesce associative functions prior to optimization passes."),
    102114                                         cl::cat(cPabloOptimizationsOptions));
     115
    103116static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
    104                                          cl::desc("apply distribution law optimization."),
     117                                         cl::desc("apply distribution law optimization prior to multiplexing."),
    105118                                         cl::cat(cPabloOptimizationsOptions));
     119
    106120static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
    107                                          cl::desc("apply distribution law optimization."),
     121                                         cl::desc("apply distribution law optimization after multiplexing."),
     122                                         cl::cat(cPabloOptimizationsOptions));
     123
     124static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false),
     125                                         cl::desc("apply pre-pass scheduling prior to LLVM IR generation."),
    108126                                         cl::cat(cPabloOptimizationsOptions));
    109127#endif
     
    154172}
    155173
    156 void pablo_function_passes(PabloFunction * function) {
     174#ifdef PRINT_TIMING_INFORMATION
     175#define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
     176#else
     177#define READ_CYCLE_COUNTER(name)
     178#endif
     179
     180#ifdef PRINT_TIMING_INFORMATION
     181unsigned COUNT_STATEMENTS(const PabloFunction * const entry) {
     182    std::stack<const Statement *> scope;
     183    unsigned statements = 0;
     184    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     185    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     186        while ( stmt ) {
     187            ++statements;
     188            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     189                // Set the next statement to be the first statement of the inner scope and push the
     190                // next statement of the current statement into the scope stack.
     191                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     192                scope.push(stmt->getNextNode());
     193                stmt = nested->front();
     194                assert (stmt);
     195                continue;
     196            }
     197            stmt = stmt->getNextNode();
     198        }
     199        if (scope.empty()) {
     200            break;
     201        }
     202        stmt = scope.top();
     203        scope.pop();
     204    }
     205    return statements;
     206}
     207
     208unsigned COUNT_ADVANCES(const PabloFunction * const entry) {
     209
     210    std::stack<const Statement *> scope;
     211    unsigned advances = 0;
     212
     213    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     214    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     215        while ( stmt ) {
     216            if (isa<Advance>(stmt)) {
     217                ++advances;
     218            }
     219            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     220                // Set the next statement to be the first statement of the inner scope and push the
     221                // next statement of the current statement into the scope stack.
     222                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     223                scope.push(stmt->getNextNode());
     224                stmt = nested->front();
     225                assert (stmt);
     226                continue;
     227            }
     228            stmt = stmt->getNextNode();
     229        }
     230        if (scope.empty()) {
     231            break;
     232        }
     233        stmt = scope.top();
     234        scope.pop();
     235    }
     236    return advances;
     237}
     238
     239using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
     240
     241DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) {
     242    std::stack<const Statement *> scope;
     243    DistributionMap distribution;
     244    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
     245    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
     246        while ( stmt ) {
     247            if (isa<Variadic>(stmt)) {
     248                auto f = distribution.find(stmt->getNumOperands());
     249                if (f == distribution.end()) {
     250                    distribution.emplace(stmt->getNumOperands(), 1);
     251                } else {
     252                    f->second += 1;
     253                }
     254            }
     255            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
     256                // Set the next statement to be the first statement of the inner scope and push the
     257                // next statement of the current statement into the scope stack.
     258                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
     259                scope.push(stmt->getNextNode());
     260                stmt = nested->front();
     261                assert (stmt);
     262                continue;
     263            }
     264            stmt = stmt->getNextNode();
     265        }
     266        if (scope.empty()) {
     267            break;
     268        }
     269        stmt = scope.top();
     270        scope.pop();
     271    }
     272    return distribution;
     273}
     274#endif
     275
     276void pablo_function_passes(PabloFunction * function) {   
    157277    // Scan through the pablo code and perform DCE and CSE
    158     if (!DisablePabloCSE) {
     278
     279#ifdef PRINT_TIMING_INFORMATION
     280    timestamp_t simplification_start = 0, simplification_end = 0;
     281    timestamp_t coalescing_start = 0, coalescing_end = 0;
     282    timestamp_t sinking_start = 0, sinking_end = 0;
     283    timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
     284    timestamp_t multiplexing_start = 0, multiplexing_end = 0;
     285    timestamp_t post_distribution_start = 0, post_distribution_end = 0;
     286    timestamp_t lowering_start = 0, lowering_end = 0;
     287    timestamp_t scheduling_start = 0, scheduling_end = 0;
     288    DistributionMap distribution;
     289    const timestamp_t optimization_start = read_cycle_counter();
     290#endif
     291    if (!DisableSimplification) {
     292        READ_CYCLE_COUNTER(simplification_start);
    159293        Simplifier::optimize(*function);
     294        READ_CYCLE_COUNTER(simplification_end);
    160295    }
    161296#ifdef ENABLE_MULTIPLEXING
    162     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
    163         FlattenAssociativeDFG::transform(*function);
     297    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     298        READ_CYCLE_COUNTER(coalescing_start);
     299        CoalesceDFG::transform(*function);
     300        READ_CYCLE_COUNTER(coalescing_end);
     301    }
     302    if (EnablePreDistribution) {
     303        READ_CYCLE_COUNTER(pre_distribution_start);
     304        DistributivePass::optimize(*function);
     305        READ_CYCLE_COUNTER(pre_distribution_end);
     306    }
     307    if (EnableMultiplexing) {
     308        READ_CYCLE_COUNTER(multiplexing_start);
     309        MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
     310        READ_CYCLE_COUNTER(multiplexing_end);
     311        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     312            CoalesceDFG::transform(*function);
     313        }
     314    }
     315    if (EnablePostDistribution) {
     316        READ_CYCLE_COUNTER(post_distribution_start);
     317        DistributivePass::optimize(*function);
     318        READ_CYCLE_COUNTER(post_distribution_end);
    164319    }
    165320#endif
    166321    if (PabloSinkingPass) {
     322        READ_CYCLE_COUNTER(sinking_start);
    167323        CodeMotionPass::optimize(*function);
    168     }
    169 #ifdef ENABLE_MULTIPLEXING   
    170     if (EnablePreDistribution) {
    171         DistributivePass::optimize(*function);
    172     }
    173     if (EnableMultiplexing) {
    174         MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
    175     }
    176     if (EnablePostDistribution) {
    177         DistributivePass::optimize(*function);
    178     }
    179     SchedulingPrePass::optimize(*function);
     324        READ_CYCLE_COUNTER(sinking_end);
     325    }
     326#ifdef ENABLE_MULTIPLEXING
    180327    if (PrintUnloweredCode) {
    181328        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     
    183330        cerr << "Unlowered Pablo AST:\n";
    184331        PabloPrinter::print(*function, cerr);
    185     }
    186     if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
     332    }   
     333    #ifdef PRINT_TIMING_INFORMATION
     334    distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
     335    #endif
     336    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
     337        READ_CYCLE_COUNTER(lowering_start);
    187338        FactorizeDFG::transform(*function);
    188     }
     339        READ_CYCLE_COUNTER(lowering_end);
     340    }
     341    if (EnablePrePassScheduling) {
     342        READ_CYCLE_COUNTER(scheduling_start);
     343        SchedulingPrePass::optimize(*function);
     344        READ_CYCLE_COUNTER(scheduling_end);
     345    }
     346#endif
     347#ifdef PRINT_TIMING_INFORMATION
     348    const timestamp_t optimization_end = read_cycle_counter();
    189349#endif
    190350    if (PrintOptimizedREcode) {
    191         PabloVerifier::verify(*function, "post-optimization");
    192         //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    193         llvm::raw_os_ostream cerr(std::cerr);
    194         cerr << "Final Pablo AST:\n";
    195         PabloPrinter::print(*function, cerr);
    196     }
     351        if (PabloOutputFilename.empty()) {
     352            //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     353            llvm::raw_os_ostream cerr(std::cerr);
     354            cerr << "Final Pablo AST:\n";
     355            PabloPrinter::print(*function, cerr);
     356        } else {
     357            std::error_code error;
     358            llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None);
     359            PabloPrinter::print(*function, out);
     360        }
     361    }
     362#ifdef PRINT_TIMING_INFORMATION
     363    std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl;
     364    std::cerr << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl;
     365    std::cerr << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl;
     366    std::cerr << "  SINKING TIME: " << (sinking_end - sinking_start) << std::endl;
     367    std::cerr << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl;
     368    std::cerr << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl;
     369    std::cerr << "  MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl;
     370    std::cerr << "  MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl;
     371    std::cerr << "  MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl;
     372    std::cerr << "  LOWERING TIME: " << (lowering_end - lowering_start) << std::endl;
     373    std::cerr << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl;
     374    std::cerr << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl;
     375    std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl;
     376    std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl;
     377    std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
     378    bool join = false;
     379    for (auto dist : distribution) {
     380        if (join) {
     381            std::cerr << ';';
     382        }
     383        std::cerr << dist.first << '|' << dist.second;
     384        join = true;
     385    }
     386    std::cerr << std::endl;
     387#endif
    197388}
    198389
     
    232423    builder.setErrorStr(&errMessage);
    233424    builder.setMCPU(sys::getHostCPUName());
    234     builder.setOptLevel(CodeGenOpt::Level::None);
     425    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
     426    switch (OptLevel) {
     427        case '0': optLevel = CodeGenOpt::None; break;
     428        case '1': optLevel = CodeGenOpt::Less; break;
     429        case '2': optLevel = CodeGenOpt::Default; break;
     430        case '3': optLevel = CodeGenOpt::Aggressive; break;
     431        default: errs() << OptLevel << " is an invalid optimization level.\n";
     432    }
     433    builder.setOptLevel(optLevel);
    235434
    236435#if (BLOCK_SIZE == 256)
Note: See TracChangeset for help on using the changeset viewer.