source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 4973

Last change on this file since 4973 was 4973, checked in by lindanl, 3 years ago

Bug fixed for large file.

File size: 25.9 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include <llvm/ExecutionEngine/MCJIT.h>
15#include <llvm/IRReader/IRReader.h>
16#include <llvm/Support/CommandLine.h>
17#include <llvm/CodeGen/CommandFlags.h>
18#include <llvm/Support/SourceMgr.h>
19#include <llvm/Support/TargetSelect.h>
20#include <llvm/Support/Host.h>
21
22#include <IDISA/idisa_avx_builder.h>
23#include <IDISA/idisa_sse_builder.h>
24#include <IDISA/idisa_i64_builder.h>
25#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
26#include <UCD/precompiled_properties.h>
27#endif
28#include <re/re_cc.h>
29#include <re/re_nullable.h>
30#include <re/re_simplifier.h>
31#include <re/re_alt.h>
32#include <re/parsefailure.h>
33#include <re/re_parser.h>
34#include <re/re_compiler.h>
35#include <utf8_encoder.h>
36#include <cc/cc_compiler.h>
37#include <pablo/pablo_compiler.h>
38#include <pablo/optimizers/pablo_simplifier.hpp>
39#include <pablo/optimizers/codemotionpass.h>
40#include <pablo/passes/flattenassociativedfg.h>
41#include <pablo/passes/factorizedfg.h>
42#ifdef ENABLE_MULTIPLEXING
43#include <pablo/optimizers/pablo_automultiplexing.hpp>
44#include <pablo/optimizers/pablo_bddminimization.h>
45#include <pablo/optimizers/distributivepass.h>
46#include <pablo/optimizers/schedulingprepass.h>
47#endif
48#include <pablo/function.h>
49#include <pablo/analysis/pabloverifier.hpp>
50#include <re/printer_re.h>
51#include <pablo/printer_pablos.h>
52#include <object_cache.h>
53// Dynamic processor detection
54#define ISPC_LLVM_VERSION ISPC_LLVM_3_6
55#include "ispc.cpp"
56#include <sstream>
57
58using namespace pablo;
59
60static cl::OptionCategory bGrepOutputOptions("Output Options",
61                                      "These options control the output.");
62
63static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(bGrepOutputOptions));
64static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
65static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
66
67static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
68static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
69
70static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
71static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
72
73
74static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
75                                              "These options control printing of intermediate regular expression structures.");
76static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
77static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
78static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
79static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
80static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
81static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
82
83static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code.");
84
85static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
86static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
87static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
88static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions));
89
90static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
91
92
93static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
94                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
95
96static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
97
98static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false),
99                                     cl::desc("Disable Pablo Simplification pass (not recommended)"),
100                                     cl::cat(cPabloOptimizationsOptions));
101
102static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
103                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
104                                      cl::cat(cPabloOptimizationsOptions));
105
106#ifdef ENABLE_MULTIPLEXING
107static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions));
108
109static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
110                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
111                                        cl::cat(cPabloOptimizationsOptions));
112
113static cl::opt<bool> EnableLowering("lowering", cl::init(false),
114                                         cl::desc("coalesce associative functions prior to optimization passes."),
115                                         cl::cat(cPabloOptimizationsOptions));
116
117static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
118                                         cl::desc("apply distribution law optimization prior to multiplexing."),
119                                         cl::cat(cPabloOptimizationsOptions));
120
121static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
122                                         cl::desc("apply distribution law optimization after multiplexing."),
123                                         cl::cat(cPabloOptimizationsOptions));
124
125static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false),
126                                         cl::desc("apply pre-pass scheduling prior to LLVM IR generation."),
127                                         cl::cat(cPabloOptimizationsOptions));
128#endif
129
130static cl::OptionCategory dCodeGenOptions("Code Generation Options", "These options control code generation.");
131
132static cl::opt<bool> DisableAVX2("disable-AVX2", cl::init(false), cl::desc("disable AVX2 instruction set."), cl::cat(dCodeGenOptions));
133
134static cl::opt<int> BlockSize("BlockSize", cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(dCodeGenOptions));
135
136
137static cl::OptionCategory cObjectCache("Object Caching", "These options control back-end object caching behaviours.");
138
139static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(cObjectCache));
140
141static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(cObjectCache));
142
143re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
144    if (PrintAllREs || PrintParsedREs) {
145        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
146    }
147
148    //Optimization passes to simplify the AST.
149    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
150    if (PrintAllREs || PrintStrippedREs) {
151        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
152    }
153    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
154    if (PrintAllREs || PrintStrippedREs) {
155        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
156    }
157
158    re_ast = re::RE_Simplifier::simplify(re_ast);
159    if (PrintAllREs || PrintSimplifiedREs) {
160        //Print to the terminal the AST that was generated by the simplifier.
161        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
162    }
163    return re_ast;
164}
165   
166PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
167    PabloFunction * function = PabloFunction::Create("process_block", 8, 2);
168    cc::CC_Compiler cc_compiler(*function, encoding);
169    re::RE_Compiler re_compiler(*function, cc_compiler);
170    re_compiler.initializeRequiredStreams();
171    re_compiler.compileUnicodeNames(re_ast);
172    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
173
174    if (PrintCompiledREcode) {
175        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
176        llvm::raw_os_ostream cerr(std::cerr);
177        cerr << "Initial Pablo AST:\n";
178        PabloPrinter::print(*function, cerr);
179    }
180    #ifndef NDEBUG
181    PabloVerifier::verify(*function, "creation");
182    #endif
183    return function;
184}
185
186#ifdef PRINT_TIMING_INFORMATION
187#define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
188#else
189#define READ_CYCLE_COUNTER(name)
190#endif
191
192#ifdef PRINT_TIMING_INFORMATION
193unsigned COUNT_STATEMENTS(const PabloFunction * const entry) {
194    std::stack<const Statement *> scope;
195    unsigned statements = 0;
196    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
197    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
198        while ( stmt ) {
199            ++statements;
200            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
201                // Set the next statement to be the first statement of the inner scope and push the
202                // next statement of the current statement into the scope stack.
203                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
204                scope.push(stmt->getNextNode());
205                stmt = nested->front();
206                assert (stmt);
207                continue;
208            }
209            stmt = stmt->getNextNode();
210        }
211        if (scope.empty()) {
212            break;
213        }
214        stmt = scope.top();
215        scope.pop();
216    }
217    return statements;
218}
219
220unsigned COUNT_ADVANCES(const PabloFunction * const entry) {
221
222    std::stack<const Statement *> scope;
223    unsigned advances = 0;
224
225    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
226    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
227        while ( stmt ) {
228            if (isa<Advance>(stmt)) {
229                ++advances;
230            }
231            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
232                // Set the next statement to be the first statement of the inner scope and push the
233                // next statement of the current statement into the scope stack.
234                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
235                scope.push(stmt->getNextNode());
236                stmt = nested->front();
237                assert (stmt);
238                continue;
239            }
240            stmt = stmt->getNextNode();
241        }
242        if (scope.empty()) {
243            break;
244        }
245        stmt = scope.top();
246        scope.pop();
247    }
248    return advances;
249}
250
251using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
252
253DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) {
254    std::stack<const Statement *> scope;
255    DistributionMap distribution;
256    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
257    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
258        while ( stmt ) {
259            if (isa<Variadic>(stmt)) {
260                auto f = distribution.find(stmt->getNumOperands());
261                if (f == distribution.end()) {
262                    distribution.emplace(stmt->getNumOperands(), 1);
263                } else {
264                    f->second += 1;
265                }
266            }
267            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
268                // Set the next statement to be the first statement of the inner scope and push the
269                // next statement of the current statement into the scope stack.
270                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
271                scope.push(stmt->getNextNode());
272                stmt = nested->front();
273                assert (stmt);
274                continue;
275            }
276            stmt = stmt->getNextNode();
277        }
278        if (scope.empty()) {
279            break;
280        }
281        stmt = scope.top();
282        scope.pop();
283    }
284    return distribution;
285}
286#endif
287
288void pablo_function_passes(PabloFunction * function) {
289    // Scan through the pablo code and perform DCE and CSE
290
291#ifdef PRINT_TIMING_INFORMATION
292    timestamp_t simplification_start = 0, simplification_end = 0;
293    timestamp_t coalescing_start = 0, coalescing_end = 0;
294    timestamp_t sinking_start = 0, sinking_end = 0;
295    timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
296    timestamp_t multiplexing_start = 0, multiplexing_end = 0;
297    timestamp_t post_distribution_start = 0, post_distribution_end = 0;
298    timestamp_t lowering_start = 0, lowering_end = 0;
299    timestamp_t scheduling_start = 0, scheduling_end = 0;
300    DistributionMap distribution;
301    const timestamp_t optimization_start = read_cycle_counter();
302#endif
303    if (!DisableSimplification) {
304        READ_CYCLE_COUNTER(simplification_start);
305        Simplifier::optimize(*function);
306        READ_CYCLE_COUNTER(simplification_end);
307    }
308#ifdef ENABLE_MULTIPLEXING
309    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
310        READ_CYCLE_COUNTER(coalescing_start);
311        CanonicalizeDFG::transform(*function);
312        READ_CYCLE_COUNTER(coalescing_end);
313    }
314    if (EnablePreDistribution) {
315        READ_CYCLE_COUNTER(pre_distribution_start);
316        DistributivePass::optimize(*function);
317        READ_CYCLE_COUNTER(pre_distribution_end);
318    }
319    if (EnableMultiplexing) {
320        READ_CYCLE_COUNTER(multiplexing_start);
321        MultiplexingPass::optimize(*function);
322        READ_CYCLE_COUNTER(multiplexing_end);
323        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
324            CanonicalizeDFG::transform(*function);
325        }
326    }
327    if (EnablePostDistribution) {
328        READ_CYCLE_COUNTER(post_distribution_start);
329        DistributivePass::optimize(*function);
330        READ_CYCLE_COUNTER(post_distribution_end);
331    }
332#endif
333    if (PabloSinkingPass) {
334        READ_CYCLE_COUNTER(sinking_start);
335        CodeMotionPass::optimize(*function);
336        READ_CYCLE_COUNTER(sinking_end);
337    }
338#ifdef ENABLE_MULTIPLEXING
339    if (PrintUnloweredCode) {
340        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
341        llvm::raw_os_ostream cerr(std::cerr);
342        cerr << "Unlowered Pablo AST:\n";
343        PabloPrinter::print(*function, cerr);
344    }
345    #ifdef PRINT_TIMING_INFORMATION
346    distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
347    #endif
348    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
349        READ_CYCLE_COUNTER(lowering_start);
350        FactorizeDFG::transform(*function);
351        READ_CYCLE_COUNTER(lowering_end);
352    }
353    if (EnablePrePassScheduling) {
354        READ_CYCLE_COUNTER(scheduling_start);
355        SchedulingPrePass::optimize(*function);
356        READ_CYCLE_COUNTER(scheduling_end);
357    }
358#endif
359#ifdef PRINT_TIMING_INFORMATION
360    const timestamp_t optimization_end = read_cycle_counter();
361#endif
362    if (PrintOptimizedREcode) {
363        if (PabloOutputFilename.empty()) {
364            //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
365            llvm::raw_os_ostream cerr(std::cerr);
366            cerr << "Final Pablo AST:\n";
367            PabloPrinter::print(*function, cerr);
368        } else {
369            std::error_code error;
370            llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None);
371            PabloPrinter::print(*function, out);
372        }
373    }
374#ifdef PRINT_TIMING_INFORMATION
375    std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl;
376    std::cerr << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl;
377    std::cerr << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl;
378    std::cerr << "  SINKING TIME: " << (sinking_end - sinking_start) << std::endl;
379    std::cerr << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl;
380    std::cerr << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl;
381    std::cerr << "  MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl;
382    std::cerr << "  MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl;
383    std::cerr << "  MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl;
384    std::cerr << "  LOWERING TIME: " << (lowering_end - lowering_start) << std::endl;
385    std::cerr << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl;
386    std::cerr << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl;
387    std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl;
388    std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl;
389    std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
390    bool join = false;
391    for (auto dist : distribution) {
392        if (join) {
393            std::cerr << ';';
394        }
395        std::cerr << dist.first << '|' << dist.second;
396        join = true;
397    }
398    std::cerr << std::endl;
399#endif
400}
401
402
403IDISA::IDISA_Builder * GetIDISA_Builder(Module * mod) {
404    bool hasAVX2 = (strncmp(lGetSystemISA(), "avx2", 4) == 0);
405   
406    unsigned theBlockSize = BlockSize;  // from command line
407   
408    if (theBlockSize == 0) {  // No BlockSize override: use processor SIMD width
409        theBlockSize = hasAVX2 ? 256 : 128;
410    }
411    Type * bitBlockType = VectorType::get(IntegerType::get(getGlobalContext(), 64), theBlockSize/64);
412   
413    int blockSize = bitBlockType->isIntegerTy() ? cast<IntegerType>(bitBlockType)->getIntegerBitWidth() : cast<VectorType>(bitBlockType)->getBitWidth();
414    if (blockSize >= 256) {
415        if (hasAVX2) {
416            return new IDISA::IDISA_AVX2_Builder(mod, bitBlockType);
417        }
418        else{
419            return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
420        }
421    }
422    else if (blockSize == 64)
423        return new IDISA::IDISA_I64_Builder(mod, bitBlockType); 
424    return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
425}
426
427
428
429ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
430
431    InitializeNativeTarget();
432    InitializeNativeTargetAsmPrinter();
433    InitializeNativeTargetAsmParser();
434
435    PassRegistry * Registry = PassRegistry::getPassRegistry();
436    initializeCore(*Registry);
437    initializeCodeGen(*Registry);
438    initializeLowerIntrinsicsPass(*Registry);
439
440    std::string errMessage;
441    EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
442    builder.setErrorStr(&errMessage);
443    builder.setMCPU(sys::getHostCPUName());
444    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
445    switch (OptLevel) {
446        case '0': optLevel = CodeGenOpt::None; break;
447        case '1': optLevel = CodeGenOpt::Less; break;
448        case '2': optLevel = CodeGenOpt::Default; break;
449        case '3': optLevel = CodeGenOpt::Aggressive; break;
450        default: errs() << OptLevel << " is an invalid optimization level.\n";
451    }
452    builder.setOptLevel(optLevel);
453
454    if (!DisableAVX2 && (strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
455            std::vector<std::string> attrs;
456            attrs.push_back("avx2");
457            builder.setMAttrs(attrs);
458    }
459
460    // builder.selectTarget();
461
462    //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
463    ExecutionEngine * engine = builder.create();
464    ICGrepObjectCache * cache = nullptr;
465    if (engine == nullptr) {
466        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
467    }
468    if (EnableObjectCache) {
469        if (ObjectCacheDir.empty())
470            // Default is $HOME/.cache/icgrep
471            cache = new ICGrepObjectCache();
472        else
473            cache = new ICGrepObjectCache(ObjectCacheDir);
474        engine->setObjectCache(cache);
475    }
476    return engine;
477}
478
479
480
481static int * total_count;
482static std::stringstream * resultStrs = nullptr;
483static std::vector<std::string> inputFiles;
484
485void initResult(std::vector<std::string> filenames, const int n){
486
487    inputFiles = filenames;
488    resultStrs = new std::stringstream[n];
489    total_count = new int[n];
490    for (int i=1; i<inputFiles.size(); i++){
491        total_count[i-1] = 0;
492    }
493
494}
495
496extern "C" {
497    void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, uint64_t filesize, char * filename) {
498
499        int idx = 0;
500        for (int i=1; i<inputFiles.size(); i++){
501            if (inputFiles[i] == filename){
502                idx = i-1;
503                break;
504            }
505        }
506
507        if(CountOnly){
508            total_count[idx]++;
509            return;
510        }
511
512        if (ShowFileNames) {
513            resultStrs[idx] << filename << ':';
514        }
515        if (ShowLineNumbers) {
516            resultStrs[idx] << lineNum << ":";
517        }
518
519        if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
520            // The line "starts" on the LF of a CRLF.  Really the end of the last line.
521            line_start++;
522        }
523        if (line_end == filesize) {
524            // The match position is at end-of-file.   We have a final unterminated line.
525            resultStrs[idx].write(&buffer[line_start], line_end - line_start);
526            if (NormalizeLineBreaks) {
527                resultStrs[idx] << '\n';  // terminate it
528            }
529            return;
530        }
531        unsigned char end_byte = (unsigned char)buffer[line_end]; 
532        if (NormalizeLineBreaks) {
533            if (end_byte == 0x85) {
534                // Line terminated with NEL, on the second byte.  Back up 1.
535                line_end--;
536            } else if (end_byte > 0xD) {
537                // Line terminated with PS or LS, on the third byte.  Back up 2.
538                line_end -= 2;
539            }
540            resultStrs[idx].write(&buffer[line_start], line_end - line_start);
541            resultStrs[idx] << '\n';
542        }
543        else{   
544            if (end_byte == 0x0D) {
545                // Check for line_end on first byte of CRLF;  note that we don't
546                // want to access past the end of buffer.
547                if ((line_end + 1 < filesize) && (buffer[line_end + 1] == 0x0A)) {
548                    // Found CRLF; preserve both bytes.
549                    line_end++;
550                }
551            }
552            resultStrs[idx].write(&buffer[line_start], line_end - line_start + 1);
553        }
554    }
555}
556
557void PrintResult(){
558    if(CountOnly){
559        for (int i=1; i<inputFiles.size(); i++){
560            std::cout << total_count[i-1] << std::endl;
561        }
562        return;
563    }
564
565    std::string out;
566    for (int i=1; i<inputFiles.size(); i++){
567        std::cout << resultStrs[i-1].str();
568    }
569}
570
571re::CC * parsedCodePointSet;
572
573extern "C" {
574    void insert_codepoints(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer) {
575       re::codepoint_t c = 0;
576        ssize_t line_pos = line_start;
577        while (isxdigit(buffer[line_pos])) {
578            if (isdigit(buffer[line_pos])) {
579                c = (c << 4) | (buffer[line_pos] - '0');
580            }
581            else {
582                c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
583            }
584            line_pos++;
585        }
586        assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
587        parsedCodePointSet->insert(c);
588    }
589}
590
591void setParsedCodePointSet(){
592    parsedCodePointSet = re::makeCC();
593}
594
595re::CC * getParsedCodePointSet(){
596    return parsedCodePointSet;
597}
598
599// extern "C" {
600//   void wrapped_print_register(char * regName, BitBlock bit_block) {
601//       print_register<BitBlock>(regName, bit_block);
602//   }
603// }
604
605void icgrep_Linking(Module * m, ExecutionEngine * e) {
606    Module::FunctionListType & fns = m->getFunctionList();
607    for (Module::FunctionListType::iterator it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
608        std::string fnName = it->getName().str();
609        if (fnName == "s2p_block") continue;
610        if (fnName == "process_block") continue;
611        if (fnName == "process_block_initialize_carries") continue;
612       
613        // if (fnName == "wrapped_print_register") {
614        //     e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_print_register);
615        // }
616        if (fnName == "wrapped_report_match") {
617            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
618        }
619        if (fnName == "insert_codepoints") {
620            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
621        }
622#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
623        else {
624            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(fnName);
625            e->addGlobalMapping(cast<GlobalValue>(it), std::get<0>(ep));
626        }
627#endif
628    }
629}
Note: See TracBrowser for help on using the repository browser.