Changeset 5033


Ignore:
Timestamp:
May 6, 2016, 3:28:07 PM (17 months ago)
Author:
cameron
Message:

Refactor: move grep-specific code out of toolchain

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
15 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5032 r5033  
    66# The version number.
    77set (icgrep_VERSION_MAJOR 1)
    8 set (icgrep_VERSION_MINOR 0)
     8set (icgrep_VERSION_MINOR 5)
    99
    1010option(ENABLE_MULTIPLEXING "Compiling the Multiplexing Module")
     
    5555SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOOST")
    5656
     57SET(IDISA_SRC IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_target.cpp)
     58
    5759SET(PABLO_SRC pablo/pabloAST.cpp pablo/ps_if.cpp pablo/ps_while.cpp pablo/function.cpp pablo/codegenstate.cpp pablo/builder.cpp pablo/symbol_generator.cpp pablo/printer_pablos.cpp pablo/pablo_toolchain.cpp pablo/passes/flattenif.cpp)
    58 SET(PABLO_SRC ${PABLO_SRC} pablo/pablo_compiler.cpp pablo/carry_manager.cpp pablo/carry_data.cpp IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_target.cpp)
     60SET(PABLO_SRC ${PABLO_SRC} pablo/pablo_compiler.cpp pablo/carry_manager.cpp pablo/carry_data.cpp )
    5961SET(PABLO_SRC ${PABLO_SRC} kernels/s2p_kernel.cpp kernels/kernel.cpp kernels/instance.cpp)
    6062SET(PABLO_SRC ${PABLO_SRC} pablo/analysis/pabloverifier.cpp)
     
    7173add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
    7274
     75add_library(CodeGen object_cache.cpp util/ispc.cpp ${IDISA_SRC})
     76
    7377# add the executable
    74 target_link_libraries (PabloADT ${REQ_LLVM_LIBRARIES})
     78target_link_libraries (PabloADT CodeGen ${REQ_LLVM_LIBRARIES})
    7579target_link_libraries (CCADT PabloADT)
    7680target_link_libraries (UCDlib RegExpADT PabloADT CCADT)
     
    108112ENDIF()
    109113
    110 add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp object_cache.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp ${PRECOMPILED_FILES})
    111 add_executable(casefold casefold.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/casefold_pipeline.cpp)
    112 add_executable(u8u16 u8u16.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/u8u16_pipeline.cpp kernels/deletion.cpp)
    113 add_executable(symtbl symboltable.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp kernels/symboltablepipeline.cpp kernels/stdout_kernel.cpp toolchain.cpp grep_engine.cpp object_cache.cpp ${PRECOMPILED_FILES})
    114 add_executable(wc wc.cpp ${PRECOMPILED_FILES})
     114add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp ${PRECOMPILED_FILES})
     115add_executable(casefold casefold.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/casefold_pipeline.cpp)
     116add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp kernels/u8u16_pipeline.cpp kernels/deletion.cpp)
     117add_executable(symtbl symboltable.cpp toolchain.cpp kernels/symboltablepipeline.cpp kernels/stdout_kernel.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp ${PRECOMPILED_FILES})
     118add_executable(wc wc.cpp toolchain.cpp)
    115119
    116120IF(ENABLE_PREGENERATED_UCD_FUNCTIONS)
     
    130134target_link_libraries(wc ${Boost_LIBRARIES})
    131135
    132 target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    133 target_link_libraries (symtbl UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    134 target_link_libraries (casefold UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    135 target_link_libraries (u8u16 UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    136 target_link_libraries (wc UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
     136target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
     137target_link_libraries (symtbl UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
     138target_link_libraries (casefold UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
     139target_link_libraries (u8u16 UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
     140target_link_libraries (wc UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
    137141
    138142IF(ENABLE_MULTIPLEXING)
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_target.cpp

    r5001 r5033  
    44 */
    55
    6 #include <llvm/Support/CommandLine.h>
    7 
     6#include <toolchain.h>
    87#include <IDISA/idisa_avx_builder.h>
    98#include <IDISA/idisa_sse_builder.h>
    109#include <IDISA/idisa_i64_builder.h>
    1110
     11
    1212// Dynamic processor detection
    13 #define ISPC_LLVM_VERSION ISPC_LLVM_3_6
    14 #include <util/ispc.cpp>
     13//#define ISPC_LLVM_VERSION ISPC_LLVM_3_6
     14#include <util/ispc.h>
    1515
    16 static cl::OptionCategory dCodeGenOptions("Code Generation Options", "These options control code generation.");
    17 
    18 static cl::opt<int> BlockSize("BlockSize", cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(dCodeGenOptions));
     16namespace IDISA {
     17   
    1918
    2019
    21 IDISA::IDISA_Builder * GetIDISA_Builder(Module * mod) {
     20IDISA_Builder * GetIDISA_Builder(Module * mod) {
    2221    bool hasAVX2 = (strncmp(lGetSystemISA(), "avx2", 4) == 0);
    2322   
    24     unsigned theBlockSize = BlockSize;  // from command line
     23    unsigned theBlockSize = codegen::BlockSize;  // from command line
    2524   
    2625    if (theBlockSize == 0) {  // No BlockSize override: use processor SIMD width
     
    3231    if (blockSize >= 256) {
    3332        if (hasAVX2) {
    34             return new IDISA::IDISA_AVX2_Builder(mod, bitBlockType);
     33            return new IDISA_AVX2_Builder(mod, bitBlockType);
    3534        }
    3635        else{
    37             return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
     36            return new IDISA_SSE2_Builder(mod, bitBlockType);
    3837        }
    3938    }
    4039    else if (blockSize == 64)
    41         return new IDISA::IDISA_I64_Builder(mod, bitBlockType);
    42     return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
     40        return new IDISA_I64_Builder(mod, bitBlockType);
     41    return new IDISA_SSE2_Builder(mod, bitBlockType);
    4342}
    4443
     44}
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_target.h

    r4984 r5033  
    1111#include <IDISA/idisa_builder.h>
    1212
     13namespace IDISA {
     14   
    1315IDISA::IDISA_Builder * GetIDISA_Builder(Module * m);
    1416
     17}
     18
    1519#endif
  • icGREP/icgrep-devel/icgrep/casefold.cpp

    r5026 r5033  
    1010#include <sstream>
    1111
     12#include <toolchain.h>
    1213#include <llvm/IR/Function.h>
    1314#include <llvm/IR/Module.h>
    1415#include <llvm/ExecutionEngine/ExecutionEngine.h>
    15 #include <llvm/ExecutionEngine/MCJIT.h>
    16 #include <llvm/IRReader/IRReader.h>
    1716#include <llvm/Support/CommandLine.h>
    18 #include <llvm/CodeGen/CommandFlags.h>
    19 #include <llvm/Support/SourceMgr.h>
    20 #include <llvm/Support/TargetSelect.h>
    21 #include <llvm/Support/Host.h>
    22 #include <llvm/Support/raw_ostream.h>
    23 
    2417#include <re/re_cc.h>
    2518#include <cc/cc_compiler.h>
     
    2821#include <IDISA/idisa_target.h>
    2922#include <kernels/casefold_pipeline.h>
    30 
    31 // Dynamic processor detection
    32 #define ISPC_LLVM_VERSION ISPC_LLVM_3_6
    33 #include <util/ispc.cpp>
    3423
    3524#include <utf_encoding.h>
     
    4534static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore);
    4635
    47 
    48 static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
    49 
    50 static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
    51                               cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
    52 
    53 
    54 //
    55 //  Functions taken from toolchain.cpp and modified for casefold
    56 //  JIT_t_ExecutionEngine : remove object cache
    57 //  icgrep_Linking:   unneeded?
    58 //  all others: definitely unneeded
    59 //
    60 
    61 ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
    62 
    63     InitializeNativeTarget();
    64     InitializeNativeTargetAsmPrinter();
    65     InitializeNativeTargetAsmParser();
    66 
    67     PassRegistry * Registry = PassRegistry::getPassRegistry();
    68     initializeCore(*Registry);
    69     initializeCodeGen(*Registry);
    70     initializeLowerIntrinsicsPass(*Registry);
    71 
    72     std::string errMessage;
    73     EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
    74     builder.setErrorStr(&errMessage);
    75     builder.setMCPU(sys::getHostCPUName());
    76     CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    77     switch (OptLevel) {
    78         case '0': optLevel = CodeGenOpt::None; break;
    79         case '1': optLevel = CodeGenOpt::Less; break;
    80         case '2': optLevel = CodeGenOpt::Default; break;
    81         case '3': optLevel = CodeGenOpt::Aggressive; break;
    82         default: errs() << OptLevel << " is an invalid optimization level.\n";
    83     }
    84     builder.setOptLevel(optLevel);
    85 
    86     if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
    87             std::vector<std::string> attrs;
    88             attrs.push_back("avx2");
    89             builder.setMAttrs(attrs);
    90     }
    91 
    92     // builder.selectTarget();
    93 
    94     //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
    95     ExecutionEngine * engine = builder.create();
    96     if (engine == nullptr) {
    97         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    98     }
    99     return engine;
    100 }
    10136
    10237
     
    13267    Module * M = new Module("casefold", getGlobalContext());
    13368   
    134     IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
     69    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    13570
    13671    kernel::PipelineBuilder pipelineBuilder(M, idb);
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5030 r5033  
    88#include <IDISA/idisa_builder.h>
    99#include <IDISA/idisa_target.h>
     10#include <llvm/Support/CommandLine.h>
    1011#include <re/re_toolchain.h>
     12#include <re/re_cc.h>
     13
    1114#include <pablo/pablo_toolchain.h>
    1215#include <toolchain.h>
     
    5053
    5154#include <kernels/kernel.h>
     55
     56static cl::OptionCategory bGrepOutputOptions("Output Options",
     57                                             "These options control the output.");
     58
     59static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
     60
     61static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
     62static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
     63
     64static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
     65static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
     66
    5267
    5368
     
    111126    Module * M = new Module(moduleName, getGlobalContext());
    112127   
    113     IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
     128    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    114129
    115130    kernel::PipelineBuilder pipelineBuilder(M, idb);
     
    126141
    127142    mEngine = JIT_to_ExecutionEngine(M);
    128    
     143    ApplyObjectCache(mEngine);
    129144    icgrep_Linking(M, mEngine);
     145   
    130146    #ifndef NDEBUG
    131147    verifyModule(*M, &dbgs());
     
    143159}
    144160
     161
     162
    145163re::CC *  GrepEngine::grepCodepoints() {
    146164
     
    161179    delete mEngine;
    162180}
     181
     182
     183static int * total_count;
     184static std::stringstream * resultStrs = nullptr;
     185static std::vector<std::string> inputFiles;
     186
     187void initResult(std::vector<std::string> filenames){
     188    const int n = filenames.size();
     189    if (n > 1) {
     190        ShowFileNames = true;
     191    }
     192    inputFiles = filenames;
     193    resultStrs = new std::stringstream[n];
     194    total_count = new int[n];
     195    for (int i=0; i<inputFiles.size(); i++){
     196        total_count[i] = 0;
     197    }
     198   
     199}
     200
     201extern "C" {
     202    void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, uint64_t filesize, int fileIdx) {
     203       
     204        int idx = fileIdx;
     205       
     206        if (ShowFileNames) {
     207            resultStrs[idx] << inputFiles[idx] << ':';
     208        }
     209        if (ShowLineNumbers) {
     210            resultStrs[idx] << lineNum << ":";
     211        }
     212       
     213        if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
     214            // The line "starts" on the LF of a CRLF.  Really the end of the last line.
     215            line_start++;
     216        }
     217        if (line_end == filesize) {
     218            // The match position is at end-of-file.   We have a final unterminated line.
     219            resultStrs[idx].write(&buffer[line_start], line_end - line_start);
     220            if (NormalizeLineBreaks) {
     221                resultStrs[idx] << '\n';  // terminate it
     222            }
     223            return;
     224        }
     225        unsigned char end_byte = (unsigned char)buffer[line_end];
     226        if (NormalizeLineBreaks) {
     227            if (end_byte == 0x85) {
     228                // Line terminated with NEL, on the second byte.  Back up 1.
     229                line_end--;
     230            } else if (end_byte > 0xD) {
     231                // Line terminated with PS or LS, on the third byte.  Back up 2.
     232                line_end -= 2;
     233            }
     234            resultStrs[idx].write(&buffer[line_start], line_end - line_start);
     235            resultStrs[idx] << '\n';
     236        }
     237        else{   
     238            if (end_byte == 0x0D) {
     239                // Check for line_end on first byte of CRLF;  note that we don't
     240                // want to access past the end of buffer.
     241                if ((line_end + 1 < filesize) && (buffer[line_end + 1] == 0x0A)) {
     242                    // Found CRLF; preserve both bytes.
     243                    line_end++;
     244                }
     245            }
     246            resultStrs[idx].write(&buffer[line_start], line_end - line_start + 1);
     247        }
     248    }
     249}
     250
     251void PrintResult(bool CountOnly, std::vector<int> & total_CountOnly){
     252    if(CountOnly){
     253        if (!ShowFileNames) {
     254            for (int i=0; i<inputFiles.size(); i++){
     255                std::cout << total_CountOnly[i] << std::endl;
     256            }
     257        }
     258        else {
     259            for (int i=0; i<inputFiles.size(); i++){
     260                std::cout << inputFiles[i] << ':' << total_CountOnly[i] << std::endl;
     261            };
     262        }
     263        return;
     264    }
     265   
     266    std::string out;
     267    for (int i=0; i<inputFiles.size(); i++){
     268        std::cout << resultStrs[i].str();
     269    }
     270}
     271
     272re::CC * parsedCodePointSet;
     273
     274extern "C" {
     275    void insert_codepoints(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer) {
     276        re::codepoint_t c = 0;
     277        ssize_t line_pos = line_start;
     278        while (isxdigit(buffer[line_pos])) {
     279            if (isdigit(buffer[line_pos])) {
     280                c = (c << 4) | (buffer[line_pos] - '0');
     281            }
     282            else {
     283                c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
     284            }
     285            line_pos++;
     286        }
     287        assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
     288        parsedCodePointSet->insert(c);
     289    }
     290}
     291
     292void setParsedCodePointSet(){
     293    parsedCodePointSet = re::makeCC();
     294}
     295
     296re::CC * getParsedCodePointSet(){
     297    return parsedCodePointSet;
     298}
     299
     300
     301
     302
     303void icgrep_Linking(Module * m, ExecutionEngine * e) {
     304    Module::FunctionListType & fns = m->getFunctionList();
     305    for (Module::FunctionListType::iterator it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
     306        std::string fnName = it->getName().str();
     307        if (fnName == "s2p_block") continue;
     308        if (fnName == "process_block") continue;
     309        if (fnName == "process_block_initialize_carries") continue;
     310       
     311        if (fnName == "wrapped_report_match") {
     312            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
     313        }
     314        if (fnName == "insert_codepoints") {
     315            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
     316        }
     317#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     318        else {
     319            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(fnName);
     320            e->addGlobalMapping(cast<GlobalValue>(it), std::get<0>(ep));
     321        }
     322#endif
     323    }
     324}
     325
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5025 r5033  
    1414
    1515namespace llvm { class raw_ostream; }
     16
     17
    1618
    1719class GrepEngine {
     
    4143};
    4244
     45void icgrep_Linking(Module * m, ExecutionEngine * e);
     46
     47
     48re::CC * getParsedCodePointSet();
     49void setParsedCodePointSet();
     50
     51void initResult(std::vector<std::string> filenames);
     52void PrintResult(bool CountOnly, std::vector<int> & total_CountOnly);
    4353
    4454#endif
  • icGREP/icgrep-devel/icgrep/kernels/casefold_pipeline.cpp

    r5014 r5033  
    44 */
    55
     6#include <toolchain.h>
    67#include <kernels/casefold_pipeline.h>
    78#include <utf_encoding.h>
     
    1516#include <pablo/pablo_compiler.h>
    1617#include <pablo/pablo_toolchain.h>
    17 
    18 static cl::opt<unsigned> SegmentSize("segment-size", cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
    1918
    2019
     
    3837
    3938void PipelineBuilder::CreateKernels(PabloFunction * function){
    40     mS2PKernel = new KernelBuilder(iBuilder, "s2p", SegmentSize);
    41     mP2SKernel = new KernelBuilder(iBuilder, "p2s", SegmentSize);
    42     mCaseFoldKernel = new KernelBuilder(iBuilder, "casefold", SegmentSize);
    43     mStdOutKernel = new KernelBuilder(iBuilder, "stddout", SegmentSize);
     39    mS2PKernel = new KernelBuilder(iBuilder, "s2p", codegen::SegmentSize);
     40    mP2SKernel = new KernelBuilder(iBuilder, "p2s", codegen::SegmentSize);
     41    mCaseFoldKernel = new KernelBuilder(iBuilder, "casefold", codegen::SegmentSize);
     42    mStdOutKernel = new KernelBuilder(iBuilder, "stddout", codegen::SegmentSize);
    4443
    4544    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    8382    BasicBlock * segmentCondBlock = nullptr;
    8483    BasicBlock * segmentBodyBlock = nullptr;
    85     const unsigned segmentSize = SegmentSize;
     84    const unsigned segmentSize = codegen::SegmentSize;
    8685    if (segmentSize > 1) {
    8786        segmentCondBlock = BasicBlock::Create(mMod->getContext(), "segmentCond", main, 0);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5025 r5033  
    44 */
    55
     6
     7#include <toolchain.h>
    68#include "pipeline.h"
    79#include "utf_encoding.h"
     
    1517#include <pablo/pablo_toolchain.h>
    1618
    17 #include <llvm/Support/CommandLine.h>
    1819#include <llvm/IR/Intrinsics.h>
    19 
    20 static cl::opt<unsigned> SegmentSize("segment-size", cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
    2120
    2221using namespace pablo;
     
    3837
    3938void PipelineBuilder::CreateKernels(PabloFunction * function, bool isNameExpression){
    40     mS2PKernel = new KernelBuilder(iBuilder, "s2p", SegmentSize);
    41     mICgrepKernel = new KernelBuilder(iBuilder, "icgrep", SegmentSize);
    42     mScanMatchKernel = new KernelBuilder(iBuilder, "scanMatch", SegmentSize);
     39    mS2PKernel = new KernelBuilder(iBuilder, "s2p", codegen::SegmentSize);
     40    mICgrepKernel = new KernelBuilder(iBuilder, "icgrep", codegen::SegmentSize);
     41    mScanMatchKernel = new KernelBuilder(iBuilder, "scanMatch", codegen::SegmentSize);
    4342    generateS2PKernel(mMod, iBuilder, mS2PKernel);
    4443    generateScanMatch(mMod, iBuilder, 64, mScanMatchKernel, isNameExpression);
     
    9796    BasicBlock * segmentCondBlock = nullptr;
    9897    BasicBlock * segmentBodyBlock = nullptr;
    99     const unsigned segmentSize = SegmentSize;
     98    const unsigned segmentSize = codegen::SegmentSize;
    10099    if (segmentSize > 1) {
    101100        segmentCondBlock = BasicBlock::Create(mMod->getContext(), "segmentCond", main, 0);
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5030 r5033  
    4444                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
    4545                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only"),
    46                               clEnumVal(InvertMatches, "select non-matching lines"),
     46                              clEnumValN(InvertMatches, "v", "select non-matching lines"),
    4747#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    4848                              clEnumVal(UsePregeneratedUnicode, "use fixed pregenerated Unicode character class sets instead"),
     
    5050                              clEnumValEnd),
    5151                   cl::cat(RegexOptions));
    52    
     52
    5353bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
    5454    return AlgorithmOptions.isSet(flag);
  • icGREP/icgrep-devel/icgrep/symboltable.cpp

    r5026 r5033  
    5757    Module * M = new Module("symboltable", getGlobalContext());
    5858
    59     IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
     59    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    6060
    6161    kernel::SymbolTableBuilder pipelineBuilder(M, idb);
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5025 r5033  
    11/*
    2  *  Copyright (c) 2015 International Characters.
     2 *  Copyright (c) 2016 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    1010#include <sstream>
    1111
     12#include <toolchain.h>
    1213#include <llvm/IR/Function.h>
    1314#include <llvm/IR/Module.h>
     
    2223#include <llvm/Support/raw_ostream.h>
    2324
    24 // Dynamic processor detection
    25 #define ISPC_LLVM_VERSION ISPC_LLVM_3_6
    26 #include <util/ispc.cpp>
     25#include <util/ispc.h>
    2726
    28 #include <re/re_cc.h>
    2927#include <object_cache.h>
    3028
    31 static cl::OptionCategory bGrepOutputOptions("Output Options",
    32                                       "These options control the output.");
     29using namespace llvm;
    3330
    34 static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
     31namespace codegen {
    3532
    36 static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
    37 static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
    38 
    39 static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
    40 static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
    41 
    42 static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
    43 static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(eIRDumpOptions));
    44 static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(eIRDumpOptions));
     33static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
    4534
    4635
    47 static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
     36static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(CodeGenOptions));
     37static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
     38
     39char OptLevel;
     40static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
     41                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
    4842
    4943
    50 static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"),
    51                               cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
     44static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
     45
     46static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
    5247
    5348
    54 static cl::OptionCategory cObjectCache("Object Caching", "These options control back-end object caching behaviours.");
     49int BlockSize;
     50int SegmentSize;
    5551
    56 static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(cObjectCache));
     52static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
     53static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
    5754
    58 static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(cObjectCache));
     55const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
    5956
    60 
     57}
    6158
    6259ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
     
    8178    builder.setTargetOptions(opts);
    8279    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    83     switch (OptLevel) {
     80    switch (codegen::OptLevel) {
    8481        case '0': optLevel = CodeGenOpt::None; break;
    8582        case '1': optLevel = CodeGenOpt::Less; break;
    8683        case '2': optLevel = CodeGenOpt::Default; break;
    8784        case '3': optLevel = CodeGenOpt::Aggressive; break;
    88         default: errs() << OptLevel << " is an invalid optimization level.\n";
     85        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
    8986    }
    9087    builder.setOptLevel(optLevel);
     
    9794    // builder.selectTarget();
    9895
    99     if (LLVM_UNLIKELY(DumpGeneratedIR)) {
    100         if (IROutputFilename.empty()) {
     96    if (LLVM_UNLIKELY(codegen::DumpGeneratedIR)) {
     97        if (codegen::IROutputFilename.empty()) {
    10198            m->dump();
    10299        } else {
    103100            std::error_code error;
    104             llvm::raw_fd_ostream out(IROutputFilename, error, sys::fs::OpenFlags::F_None);
     101            llvm::raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
    105102            m->print(out, nullptr);
    106103        }
     
    108105
    109106    ExecutionEngine * engine = builder.create();
    110     ICGrepObjectCache * cache = nullptr;
    111107    if (engine == nullptr) {
    112108        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    113     }
    114     if (EnableObjectCache) {
    115         if (ObjectCacheDir.empty())
     109    }   
     110    return engine;
     111}
     112
     113void ApplyObjectCache(ExecutionEngine * e) {
     114    ICGrepObjectCache * cache = nullptr;
     115    if (codegen::EnableObjectCache) {
     116        if (codegen::ObjectCacheDir.empty())
    116117            // Default is $HOME/.cache/icgrep
    117118            cache = new ICGrepObjectCache();
    118119        else
    119             cache = new ICGrepObjectCache(ObjectCacheDir);
    120         engine->setObjectCache(cache);
    121     }
    122     return engine;
     120            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
     121        e->setObjectCache(cache);
     122    }   
    123123}
    124 
    125 
    126 
    127 static int * total_count;
    128 static std::stringstream * resultStrs = nullptr;
    129 static std::vector<std::string> inputFiles;
    130 
    131 void initResult(std::vector<std::string> filenames){
    132     const int n = filenames.size();
    133     if (n > 1) {
    134         ShowFileNames = true;
    135     }
    136     inputFiles = filenames;
    137     resultStrs = new std::stringstream[n];
    138     total_count = new int[n];
    139     for (int i=0; i<inputFiles.size(); i++){
    140         total_count[i] = 0;
    141     }
    142 
    143 }
    144 
    145 extern "C" {
    146     void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, uint64_t filesize, int fileIdx) {
    147 
    148         int idx = fileIdx;
    149 
    150         if (ShowFileNames) {
    151             resultStrs[idx] << inputFiles[idx] << ':';
    152         }
    153         if (ShowLineNumbers) {
    154             resultStrs[idx] << lineNum << ":";
    155         }
    156 
    157         if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
    158             // The line "starts" on the LF of a CRLF.  Really the end of the last line.
    159             line_start++;
    160         }
    161         if (line_end == filesize) {
    162             // The match position is at end-of-file.   We have a final unterminated line.
    163             resultStrs[idx].write(&buffer[line_start], line_end - line_start);
    164             if (NormalizeLineBreaks) {
    165                 resultStrs[idx] << '\n';  // terminate it
    166             }
    167             return;
    168         }
    169         unsigned char end_byte = (unsigned char)buffer[line_end];
    170         if (NormalizeLineBreaks) {
    171             if (end_byte == 0x85) {
    172                 // Line terminated with NEL, on the second byte.  Back up 1.
    173                 line_end--;
    174             } else if (end_byte > 0xD) {
    175                 // Line terminated with PS or LS, on the third byte.  Back up 2.
    176                 line_end -= 2;
    177             }
    178             resultStrs[idx].write(&buffer[line_start], line_end - line_start);
    179             resultStrs[idx] << '\n';
    180         }
    181         else{   
    182             if (end_byte == 0x0D) {
    183                 // Check for line_end on first byte of CRLF;  note that we don't
    184                 // want to access past the end of buffer.
    185                 if ((line_end + 1 < filesize) && (buffer[line_end + 1] == 0x0A)) {
    186                     // Found CRLF; preserve both bytes.
    187                     line_end++;
    188                 }
    189             }
    190             resultStrs[idx].write(&buffer[line_start], line_end - line_start + 1);
    191         }
    192     }
    193 }
    194 
    195 void PrintResult(bool CountOnly, std::vector<int> & total_CountOnly){
    196     if(CountOnly){
    197         if (!ShowFileNames) {
    198            for (int i=0; i<inputFiles.size(); i++){
    199                std::cout << total_CountOnly[i] << std::endl;
    200            }
    201         }
    202         else {
    203             for (int i=0; i<inputFiles.size(); i++){
    204                 std::cout << inputFiles[i] << ':' << total_CountOnly[i] << std::endl;
    205             };
    206         }
    207         return;
    208     }
    209 
    210     std::string out;
    211     for (int i=0; i<inputFiles.size(); i++){
    212         std::cout << resultStrs[i].str();
    213     }
    214 }
    215 
    216 re::CC * parsedCodePointSet;
    217 
    218 extern "C" {
    219     void insert_codepoints(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer) {
    220        re::codepoint_t c = 0;
    221         ssize_t line_pos = line_start;
    222         while (isxdigit(buffer[line_pos])) {
    223             if (isdigit(buffer[line_pos])) {
    224                 c = (c << 4) | (buffer[line_pos] - '0');
    225             }
    226             else {
    227                 c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
    228             }
    229             line_pos++;
    230         }
    231         assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
    232         parsedCodePointSet->insert(c);
    233     }
    234 }
    235 
    236 void setParsedCodePointSet(){
    237     parsedCodePointSet = re::makeCC();
    238 }
    239 
    240 re::CC * getParsedCodePointSet(){
    241     return parsedCodePointSet;
    242 }
    243 
    244 void icgrep_Linking(Module * m, ExecutionEngine * e) {
    245     Module::FunctionListType & fns = m->getFunctionList();
    246     for (Module::FunctionListType::iterator it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
    247         std::string fnName = it->getName().str();
    248         if (fnName == "s2p_block") continue;
    249         if (fnName == "process_block") continue;
    250         if (fnName == "process_block_initialize_carries") continue;
    251        
    252         if (fnName == "wrapped_report_match") {
    253             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
    254         }
    255         if (fnName == "insert_codepoints") {
    256             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
    257         }
    258 #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    259         else {
    260             const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(fnName);
    261             e->addGlobalMapping(cast<GlobalValue>(it), std::get<0>(ep));
    262         }
    263 #endif
    264     }
    265 }
  • icGREP/icgrep-devel/icgrep/toolchain.h

    r5025 r5033  
    88#define TOOLCHAIN_H
    99
    10 #include <re/re_re.h>
    1110#include <llvm/IR/Module.h>
    1211#include <llvm/ExecutionEngine/ExecutionEngine.h>
     12#include <llvm/Support/CommandLine.h>
    1313
     14namespace codegen {
     15const llvm::cl::OptionCategory * codegen_flags();
    1416
    15 Module * grepIRGen(std::string moduleName, re::RE * re_ast, bool isNameExpression = false);
     17// Command Parameters
     18extern char OptLevel;  // set from command line
     19extern int BlockSize;  // set from command line
     20extern int SegmentSize;  // set from command line
    1621
    17 ExecutionEngine * JIT_to_ExecutionEngine (Module * m);
     22}
    1823
    19 void icgrep_Linking(Module * m, ExecutionEngine * e);
     24llvm::ExecutionEngine * JIT_to_ExecutionEngine (llvm::Module * m);
    2025
    21 re::CC * getParsedCodePointSet();
    22 void setParsedCodePointSet();
    23 
    24 void initResult(std::vector<std::string> filenames);
    25 void PrintResult(bool CountOnly, std::vector<int> & total_CountOnly);
     26void ApplyObjectCache(llvm::ExecutionEngine * e);
    2627
    2728#endif
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5026 r5033  
    1919
    2020#include <llvm/Support/CommandLine.h>
    21 #include <llvm/CodeGen/CommandFlags.h>
    22 #include <llvm/Support/SourceMgr.h>
    23 #include <llvm/Support/TargetSelect.h>
    24 #include <llvm/Support/Host.h>
    25 #include <llvm/Support/raw_ostream.h>
    26 
     21
     22#include <toolchain.h>
    2723#include <re/re_cc.h>
    2824#include <cc/cc_compiler.h>
     
    3228#include <kernels/u8u16_pipeline.h>
    3329
    34 // Dynamic processor detection
    35 #define ISPC_LLVM_VERSION ISPC_LLVM_3_6
    36 #include <util/ispc.cpp>
    37 
    3830#include <utf_encoding.h>
    3931
     
    4840static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore);
    4941
    50 static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
    51 static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(eIRDumpOptions));
    52 
    53 static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
    54 
    55 static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
    56                               cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
    57 
    58 
    59 //
    60 //  Functions taken from toolchain.cpp and modified for casefold
    61 //  JIT_t_ExecutionEngine : remove object cache
    62 //  icgrep_Linking:   unneeded?
    63 //  all others: definitely unneeded
    64 //
    65 
    66 ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
    67 
    68     InitializeNativeTarget();
    69     InitializeNativeTargetAsmPrinter();
    70     InitializeNativeTargetAsmParser();
    71 
    72     PassRegistry * Registry = PassRegistry::getPassRegistry();
    73     initializeCore(*Registry);
    74     initializeCodeGen(*Registry);
    75     initializeLowerIntrinsicsPass(*Registry);
    76 
    77     std::string errMessage;
    78     EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
    79     builder.setErrorStr(&errMessage);
    80     builder.setMCPU(sys::getHostCPUName());
    81     CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    82     switch (OptLevel) {
    83         case '0': optLevel = CodeGenOpt::None; break;
    84         case '1': optLevel = CodeGenOpt::Less; break;
    85         case '2': optLevel = CodeGenOpt::Default; break;
    86         case '3': optLevel = CodeGenOpt::Aggressive; break;
    87         default: errs() << OptLevel << " is an invalid optimization level.\n";
    88     }
    89     builder.setOptLevel(optLevel);
    90 
    91     if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
    92             std::vector<std::string> attrs;
    93             attrs.push_back("avx2");
    94             builder.setMAttrs(attrs);
    95     }
    96 
    97     // builder.selectTarget();
    98 
    99     //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
    100     ExecutionEngine * engine = builder.create();
    101     if (engine == nullptr) {
    102         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    103     }
    104     return engine;
    105 }
    10642
    10743
     
    283219    Module * M = new Module("u8u16", getGlobalContext());
    284220   
    285     IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
     221    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    286222
    287223    kernel::PipelineBuilder pipelineBuilder(M, idb);
     
    295231
    296232    llvm::Function * main_IR = pipelineBuilder.ExecuteKernels();
    297    
    298     if (DumpGeneratedIR) {
    299         M->dump();
    300     }
    301233   
    302234    verifyModule(*M, &dbgs());
  • icGREP/icgrep-devel/icgrep/util/ispc.cpp

    r4983 r5033  
    11/*
    2   Copyright (c) 2010-2015, Intel Corporation
     2  Copyright (c) 2010-2016, Intel Corporation
    33  All rights reserved.
    44
     
    3636*/
    3737
    38 #if 0
    3938#include "ispc.h"
    40 #include "module.h"
    41 #include "util.h"
    42 #include "llvmutil.h"
    43 #include <stdio.h>
    44 #include <sstream>
    45 #include <stdarg.h>     /* va_list, va_start, va_arg, va_end */
    46 #ifdef ISPC_IS_WINDOWS
    47   #include <windows.h>
    48   #include <direct.h>
    49   #define strcasecmp stricmp
    50 #else
    51   #include <sys/types.h>
    52   #include <unistd.h>
    53 #endif
    54 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
    55   #include <llvm/LLVMContext.h>
    56   #include <llvm/Module.h>
    57   #include <llvm/Instructions.h>
    58 #else /* 3.3+ */
    59   #include <llvm/IR/LLVMContext.h>
    60   #include <llvm/IR/Module.h>
    61   #include <llvm/IR/Instructions.h>
    62 #endif
    63 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
    64   #include <llvm/Target/TargetSubtargetInfo.h>
    65   #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    66     #include <llvm/Target/TargetLowering.h>
    67   #endif
    68 #endif
    69 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
    70   #include <llvm/IR/DebugInfo.h>
    71   #include <llvm/IR/DIBuilder.h>
    72 #else // LLVM 3.2, 3.3, 3.4
    73   #include <llvm/DebugInfo.h>
    74   #include <llvm/DIBuilder.h>
    75 #endif
    76 #include <llvm/Support/Dwarf.h>
    77 #include <llvm/Target/TargetMachine.h>
    78 #include <llvm/Target/TargetOptions.h>
    79 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
    80   #include <llvm/DataLayout.h>
    81 #else // LLVM 3.3+
    82   #include <llvm/IR/DataLayout.h>
    83   #include <llvm/IR/Attributes.h>
    84 #endif
    85 #include <llvm/Support/TargetRegistry.h>
    86 #include <llvm/Support/TargetSelect.h>
    87 #include <llvm/Support/Host.h>
    88 
    89 Globals *g;
    90 Module *m;
    91 */
    92 #endif
     39#include <stdlib.h>
    9340
    9441///////////////////////////////////////////////////////////////////////////
     
    11360
    11461#if !defined(__arm__)
    115 static bool __os_has_avx_support() {
     62bool __os_has_avx_support() {
    11663#if defined(ISPC_IS_WINDOWS)
    11764    // Check if the OS will save the YMM registers
     
    12875}
    12976
    130 static bool __os_has_avx512_support() {
     77bool __os_has_avx512_support() {
    13178#if defined(ISPC_IS_WINDOWS)
    13279    // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
     
    14592#endif // !__arm__
    14693
    147 static const char *
     94const char *
    14895lGetSystemISA() {
    14996#ifdef __arm__
     
    169116            (info2[1] & (1 << 30)) != 0 && // AVX512 BW
    170117            (info2[1] & (1 << 31)) != 0) { // AVX512 VL
    171             return "skx";
     118            return "avx512skx-i32x16";
    172119        }
    173120        else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
     
    201148        return "sse2-i32x4";
    202149    else {
    203         fprintf(stderr, "Unable to detect supported SSE/AVX ISA.  Exiting.\n");
     150        //Error(SourcePos(), "Unable to detect supported SSE/AVX ISA.  Exiting.");
     151       
    204152        exit(1);
    205153    }
     
    207155}
    208156
    209 #if 0
    210 typedef enum {
    211     // Special value, indicates that no CPU is present.
    212     CPU_None = 0,
    213157
    214     // 'Generic' CPU without any hardware SIMD capabilities.
    215     CPU_Generic = 1,
    216 
    217     // Early Atom CPU. Supports SSSE3.
    218     CPU_Bonnell,
    219 
    220     // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
    221     // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
    222     CPU_Core2,
    223 
    224     // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
    225     CPU_Penryn,
    226 
    227     // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
    228     CPU_Nehalem,
    229 
    230     // Sandy Bridge. Supports AVX 1.
    231     CPU_SandyBridge,
    232 
    233     // Ivy Bridge. Supports AVX 1 + RDRAND.
    234     CPU_IvyBridge,
    235 
    236     // Haswell. Supports AVX 2.
    237     CPU_Haswell,
    238 
    239 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
    240     // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
    241     CPU_Broadwell,
    242 #endif
    243 
    244 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    245     // KNL. Supports AVX512.
    246     CPU_KNL,
    247 #endif
    248 
    249 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
    250     // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
    251     CPU_Silvermont,
    252 #endif
    253 
    254     // FIXME: LLVM supports a ton of different ARM CPU variants--not just
    255     // cortex-a9 and a15.  We should be able to handle any of them that also
    256     // have NEON support.
    257 #ifdef ISPC_ARM_ENABLED
    258     // ARM Cortex A15. Supports NEON VFPv4.
    259     CPU_CortexA15,
    260 
    261     // ARM Cortex A9. Supports NEON VFPv3.
    262     CPU_CortexA9,
    263 #endif
    264 
    265 #ifdef ISPC_NVPTX_ENABLED
    266     // NVidia CUDA-compatible SM-35 architecture.
    267     CPU_SM35,
    268 #endif
    269 
    270     sizeofCPUtype
    271 } CPUtype;
    272 
    273 
    274 class AllCPUs {
    275 private:
    276     std::vector<std::vector<std::string> > names;
    277     std::vector<std::set<CPUtype> > compat;
    278 
    279     std::set<CPUtype> Set(CPUtype type, ...) {
    280         std::set<CPUtype> retn;
    281         va_list args;
    282 
    283         retn.insert(type);
    284         va_start(args, type);
    285         while ((type = (CPUtype)va_arg(args, int)) != CPU_None)
    286             retn.insert(type);
    287         va_end(args);
    288 
    289         return retn;
    290     }
    291 
    292 public:
    293     AllCPUs() {
    294         names = std::vector<std::vector<std::string> >(sizeofCPUtype);
    295         compat = std::vector<std::set<CPUtype> >(sizeofCPUtype);
    296 
    297         names[CPU_None].push_back("");
    298 
    299         names[CPU_Generic].push_back("generic");
    300 
    301         names[CPU_Bonnell].push_back("atom");
    302         names[CPU_Bonnell].push_back("bonnell");
    303 
    304         names[CPU_Core2].push_back("core2");
    305 
    306         names[CPU_Penryn].push_back("penryn");
    307 
    308 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
    309         names[CPU_Silvermont].push_back("slm");
    310         names[CPU_Silvermont].push_back("silvermont");
    311 #endif
    312 
    313         names[CPU_Nehalem].push_back("corei7");
    314         names[CPU_Nehalem].push_back("nehalem");
    315 
    316         names[CPU_SandyBridge].push_back("corei7-avx");
    317         names[CPU_SandyBridge].push_back("sandybridge");
    318 
    319         names[CPU_IvyBridge].push_back("core-avx-i");
    320         names[CPU_IvyBridge].push_back("ivybridge");
    321 
    322         names[CPU_Haswell].push_back("core-avx2");
    323         names[CPU_Haswell].push_back("haswell");
    324 
    325 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
    326         names[CPU_Broadwell].push_back("broadwell");
    327 #endif
    328 
    329 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    330          names[CPU_KNL].push_back("knl");
    331 #endif
    332 
    333 #ifdef ISPC_ARM_ENABLED
    334         names[CPU_CortexA15].push_back("cortex-a15");
    335 
    336         names[CPU_CortexA9].push_back("cortex-a9");
    337 #endif
    338 
    339 #ifdef ISPC_NVPTX_ENABLED
    340         names[CPU_SM35].push_back("sm_35");
    341 #endif
    342 
    343 
    344 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
    345         #define CPU_Silvermont CPU_Nehalem
    346 #else /* LLVM 3.4+ */
    347         compat[CPU_Silvermont]  = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    348                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    349                                       CPU_None);
    350 #endif
    351 
    352 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    353         compat[CPU_KNL]         = Set(CPU_KNL, CPU_Generic, CPU_Bonnell, CPU_Penryn,
    354                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    355                                       CPU_SandyBridge, CPU_IvyBridge,
    356                                       CPU_Haswell, CPU_Broadwell, CPU_None);
    357 #endif
    358 
    359 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
    360         #define CPU_Broadwell CPU_Haswell
    361 #else /* LLVM 3.6+ */
    362         compat[CPU_Broadwell]   = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    363                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    364                                       CPU_SandyBridge, CPU_IvyBridge,
    365                                       CPU_Haswell, CPU_Broadwell, CPU_None);
    366 #endif
    367         compat[CPU_Haswell]     = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    368                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    369                                       CPU_SandyBridge, CPU_IvyBridge,
    370                                       CPU_Haswell, CPU_Broadwell, CPU_None);
    371         compat[CPU_IvyBridge]   = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    372                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    373                                       CPU_SandyBridge, CPU_IvyBridge,
    374                                       CPU_None);
    375         compat[CPU_SandyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    376                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    377                                       CPU_SandyBridge, CPU_None);
    378         compat[CPU_Nehalem]     = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    379                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    380                                       CPU_None);
    381         compat[CPU_Penryn]      = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
    382                                       CPU_Core2, CPU_Nehalem, CPU_Silvermont,
    383                                       CPU_None);
    384         compat[CPU_Core2]       = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
    385                                       CPU_None);
    386         compat[CPU_Bonnell]     = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
    387                                       CPU_None);
    388         compat[CPU_Generic]     = Set(CPU_Generic, CPU_None);
    389 
    390 #ifdef ISPC_ARM_ENABLED
    391         compat[CPU_CortexA15]   = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
    392                                       CPU_None);
    393         compat[CPU_CortexA9]    = Set(CPU_Generic, CPU_CortexA9, CPU_None);
    394 #endif
    395 
    396 #ifdef ISPC_NVPTX_ENABLED
    397         compat[CPU_SM35]        = Set(CPU_Generic, CPU_SM35, CPU_None);
    398 #endif
    399     }
    400 
    401     std::string HumanReadableListOfNames() {
    402         std::stringstream CPUs;
    403         for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
    404             CPUs << names[i][0];
    405             if (names[i].size() > 1) {
    406                 CPUs << " (synonyms: " << names[i][1];
    407                 for (int j = 2, je = names[i].size(); j < je; j++)
    408                     CPUs << ", " << names[i][j];
    409                 CPUs << ")";
    410             }
    411             if (i < sizeofCPUtype - 1)
    412                 CPUs << ", ";
    413         }
    414         return CPUs.str();
    415     }
    416 
    417     std::string &GetDefaultNameFromType(CPUtype type) {
    418         Assert((type >= CPU_None) && (type < sizeofCPUtype));
    419         return names[type][0];
    420     }
    421 
    422     CPUtype GetTypeFromName(std::string name) {
    423         CPUtype retn = CPU_None;
    424 
    425         for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
    426             for (int j = 0, je = names[i].size();
    427                 (retn == CPU_None) && (j < je); j++)
    428                 if (!name.compare(names[i][j]))
    429                     retn = (CPUtype)i;
    430         return retn;
    431     }
    432 
    433     bool BackwardCompatible(CPUtype what, CPUtype with) {
    434         Assert((what > CPU_None) && (what < sizeofCPUtype));
    435         Assert((with > CPU_None) && (with < sizeofCPUtype));
    436         return compat[what].find(with) != compat[what].end();
    437     }
    438 };
    439 
    440 
    441 Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genericAsSmth) :
    442     m_target(nullptr),
    443     m_targetMachine(nullptr),
    444     m_dataLayout(nullptr),
    445     m_valid(false),
    446     m_isa(SSE2),
    447     m_treatGenericAsSmth(genericAsSmth),
    448     m_arch(""),
    449     m_is32Bit(true),
    450     m_cpu(""),
    451     m_attributes(""),
    452 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
    453     m_tf_attributes(nullptr),
    454 #endif
    455     m_nativeVectorWidth(-1),
    456     m_nativeVectorAlignment(-1),
    457     m_dataTypeWidth(-1),
    458     m_vectorWidth(-1),
    459     m_generatePIC(pic),
    460     m_maskingIsFree(false),
    461     m_maskBitCount(-1),
    462     m_hasHalf(false),
    463     m_hasRand(false),
    464     m_hasGather(false),
    465     m_hasScatter(false),
    466     m_hasTranscendentals(false),
    467     m_hasTrigonometry(false),
    468     m_hasRsqrtd(false),
    469     m_hasRcpd(false),
    470     m_hasVecPrefetch(false)
    471 {
    472     CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
    473     AllCPUs a;
    474 
    475     if (cpu) {
    476         CPUID = a.GetTypeFromName(cpu);
    477         if (CPUID == CPU_None) {
    478             Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
    479                   " CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
    480             return;
    481         }
    482     }
    483 
    484     if (isa == nullptr) {
    485         // If a CPU was specified explicitly, try to pick the best
    486         // possible ISA based on that.
    487         switch (CPUID) {
    488             case CPU_None:
    489                 // No CPU and no ISA, so use system info to figure out
    490                 // what this CPU supports.
    491                 isa = lGetSystemISA();
    492                 Warning(SourcePos(), "No --target specified on command-line."
    493                         " Using default system target \"%s\".", isa);
    494                 break;
    495 
    496             case CPU_Generic:
    497                 isa = "generic-1";
    498                 break;
    499 
    500 #ifdef ISPC_NVPTX_ENABLED
    501             case CPU_SM35:
    502                 isa = "nvptx";
    503                 break;
    504 #endif
    505 
    506 #ifdef ISPC_ARM_ENABLED
    507             case CPU_CortexA9:
    508             case CPU_CortexA15:
    509                 isa = "neon-i32x4";
    510                 break;
    511 #endif
    512 
    513 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    514             case CPU_KNL:
    515                 isa = "avx512knl-i32x16";
    516                 break;
    517 #endif
    518 
    519 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
    520             case CPU_Broadwell:
    521 #endif
    522             case CPU_Haswell:
    523                 isa = "avx2-i32x8";
    524                 break;
    525 
    526             case CPU_IvyBridge:
    527                 isa = "avx1.1-i32x8";
    528                 break;
    529 
    530             case CPU_SandyBridge:
    531                 isa = "avx1-i32x8";
    532                 break;
    533 
    534             // Penryn is here because ISPC does not use SSE 4.2
    535             case CPU_Penryn:
    536             case CPU_Nehalem:
    537 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
    538             case CPU_Silvermont:
    539 #endif
    540                 isa = "sse4-i32x4";
    541                 break;
    542 
    543             default:
    544                 isa = "sse2-i32x4";
    545                 break;
    546         }
    547         if (CPUID != CPU_None)
    548             Warning(SourcePos(), "No --target specified on command-line."
    549                     " Using ISA \"%s\" based on specified CPU \"%s\".",
    550                     isa, cpu);
    551     }
    552 
    553     if (!strcasecmp(isa, "host")) {
    554         isa = lGetSystemISA();
    555     }
    556 
    557     if (arch == nullptr) {
    558 #ifdef ISPC_ARM_ENABLED
    559         if (!strncmp(isa, "neon", 4))
    560             arch = "arm";
    561         else
    562 #endif
    563 #ifdef ISPC_NVPTX_ENABLED
    564          if(!strncmp(isa, "nvptx", 5))
    565            arch = "nvptx64";
    566          else
    567 #endif /* ISPC_NVPTX_ENABLED */
    568             arch = "x86-64";
    569     }
    570 
    571     // Define arch alias
    572     if (std::string(arch) == "x86_64")
    573         arch = "x86-64";
    574 
    575     bool error = false;
    576 
    577     // Make sure the target architecture is a known one; print an error
    578     // with the valid ones otherwise.
    579 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    580     for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
    581          iter != llvm::TargetRegistry::targets().end(); ++iter) {
    582 #else
    583     for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
    584          iter != llvm::TargetRegistry::end(); ++iter) {
    585 #endif
    586         if (std::string(arch) == iter->getName()) {
    587             this->m_target = &*iter;
    588             break;
    589         }
    590     }
    591     if (this->m_target == nullptr) {
    592         fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
    593         llvm::TargetRegistry::iterator iter;
    594 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    595         for (iter = llvm::TargetRegistry::targets().begin();
    596              iter != llvm::TargetRegistry::targets().end(); ++iter)
    597 #else
    598         for (iter = llvm::TargetRegistry::begin();
    599              iter != llvm::TargetRegistry::end(); ++iter)
    600 #endif
    601             fprintf(stderr, "%s ", iter->getName());
    602         fprintf(stderr, "\n");
    603         error = true;
    604     }
    605     else {
    606         this->m_arch = arch;
    607     }
    608 
    609     // Check default LLVM generated targets
    610     if (!strcasecmp(isa, "sse2") ||
    611         !strcasecmp(isa, "sse2-i32x4")) {
    612         this->m_isa = Target::SSE2;
    613         this->m_nativeVectorWidth = 4;
    614         this->m_nativeVectorAlignment = 16;
    615         this->m_dataTypeWidth = 32;
    616         this->m_vectorWidth = 4;
    617         this->m_maskingIsFree = false;
    618         this->m_maskBitCount = 32;
    619         CPUfromISA = CPU_Core2;
    620     }
    621     else if (!strcasecmp(isa, "sse2-x2") ||
    622              !strcasecmp(isa, "sse2-i32x8")) {
    623         this->m_isa = Target::SSE2;
    624         this->m_nativeVectorWidth = 4;
    625         this->m_nativeVectorAlignment = 16;
    626         this->m_dataTypeWidth = 32;
    627         this->m_vectorWidth = 8;
    628         this->m_maskingIsFree = false;
    629         this->m_maskBitCount = 32;
    630         CPUfromISA = CPU_Core2;
    631     }
    632     else if (!strcasecmp(isa, "sse4") ||
    633              !strcasecmp(isa, "sse4-i32x4")) {
    634         this->m_isa = Target::SSE4;
    635         this->m_nativeVectorWidth = 4;
    636         this->m_nativeVectorAlignment = 16;
    637         this->m_dataTypeWidth = 32;
    638         this->m_vectorWidth = 4;
    639         this->m_maskingIsFree = false;
    640         this->m_maskBitCount = 32;
    641         CPUfromISA = CPU_Nehalem;
    642     }
    643     else if (!strcasecmp(isa, "sse4x2") ||
    644              !strcasecmp(isa, "sse4-x2") ||
    645              !strcasecmp(isa, "sse4-i32x8")) {
    646         this->m_isa = Target::SSE4;
    647         this->m_nativeVectorWidth = 4;
    648         this->m_nativeVectorAlignment = 16;
    649         this->m_dataTypeWidth = 32;
    650         this->m_vectorWidth = 8;
    651         this->m_maskingIsFree = false;
    652         this->m_maskBitCount = 32;
    653         CPUfromISA = CPU_Nehalem;
    654     }
    655     else if (!strcasecmp(isa, "sse4-i8x16")) {
    656         this->m_isa = Target::SSE4;
    657         this->m_nativeVectorWidth = 16;
    658         this->m_nativeVectorAlignment = 16;
    659         this->m_dataTypeWidth = 8;
    660         this->m_vectorWidth = 16;
    661         this->m_maskingIsFree = false;
    662         this->m_maskBitCount = 8;
    663         CPUfromISA = CPU_Nehalem;
    664     }
    665     else if (!strcasecmp(isa, "sse4-i16x8")) {
    666         this->m_isa = Target::SSE4;
    667         this->m_nativeVectorWidth = 8;
    668         this->m_nativeVectorAlignment = 16;
    669         this->m_dataTypeWidth = 16;
    670         this->m_vectorWidth = 8;
    671         this->m_maskingIsFree = false;
    672         this->m_maskBitCount = 16;
    673         CPUfromISA = CPU_Nehalem;
    674     }
    675     else if (!strcasecmp(isa, "generic-4") ||
    676              !strcasecmp(isa, "generic-x4")) {
    677         this->m_isa = Target::GENERIC;
    678         this->m_nativeVectorWidth = 4;
    679         this->m_nativeVectorAlignment = 16;
    680         this->m_vectorWidth = 4;
    681         this->m_maskingIsFree = true;
    682         this->m_maskBitCount = 1;
    683         this->m_hasHalf = true;
    684         this->m_hasTranscendentals = true;
    685         this->m_hasTrigonometry = true;
    686         this->m_hasGather = this->m_hasScatter = true;
    687         this->m_hasRsqrtd = this->m_hasRcpd = true;
    688         CPUfromISA = CPU_Generic;
    689     }
    690     else if (!strcasecmp(isa, "generic-8") ||
    691              !strcasecmp(isa, "generic-x8")) {
    692         this->m_isa = Target::GENERIC;
    693         this->m_nativeVectorWidth = 8;
    694         this->m_nativeVectorAlignment = 32;
    695         this->m_vectorWidth = 8;
    696         this->m_maskingIsFree = true;
    697         this->m_maskBitCount = 1;
    698         this->m_hasHalf = true;
    699         this->m_hasTranscendentals = true;
    700         this->m_hasTrigonometry = true;
    701         this->m_hasGather = this->m_hasScatter = true;
    702         this->m_hasRsqrtd = this->m_hasRcpd = true;
    703         CPUfromISA = CPU_Generic;
    704     }
    705     else if (!strcasecmp(isa, "generic-16") ||
    706              !strcasecmp(isa, "generic-x16") ||
    707              // We treat *-generic-16 as generic-16, but with special name mangling
    708              strstr(isa, "-generic-16") ||
    709              strstr(isa, "-generic-x16")) {
    710         this->m_isa = Target::GENERIC;
    711         if (strstr(isa, "-generic-16") ||
    712             strstr(isa, "-generic-x16")) {
    713             // It is used for appropriate name mangling and dispatch function during multitarget compilation
    714             this->m_treatGenericAsSmth = isa;
    715             // We need to create appropriate name for mangling.
    716             // Remove "-x16" or "-16" and replace "-" with "_".
    717             this->m_treatGenericAsSmth = this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
    718             std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
    719         }
    720         this->m_nativeVectorWidth = 16;
    721         this->m_nativeVectorAlignment = 64;
    722         this->m_vectorWidth = 16;
    723         this->m_maskingIsFree = true;
    724         this->m_maskBitCount = 1;
    725         this->m_hasHalf = true;
    726         this->m_hasTranscendentals = true;
    727         // It's set to false, because stdlib implementation of math functions
    728         // is faster on MIC, than "native" implementation provided by the
    729         // icc compiler.
    730         this->m_hasTrigonometry = false;
    731         this->m_hasGather = this->m_hasScatter = true;
    732         this->m_hasRsqrtd = this->m_hasRcpd = true;
    733         // It's set to true, because MIC has hardware vector prefetch instruction
    734         this->m_hasVecPrefetch = true;
    735         CPUfromISA = CPU_Generic;
    736     }
    737     else if (!strcasecmp(isa, "generic-32") ||
    738              !strcasecmp(isa, "generic-x32")) {
    739         this->m_isa = Target::GENERIC;
    740         this->m_nativeVectorWidth = 32;
    741         this->m_nativeVectorAlignment = 64;
    742         this->m_vectorWidth = 32;
    743         this->m_maskingIsFree = true;
    744         this->m_maskBitCount = 1;
    745         this->m_hasHalf = true;
    746         this->m_hasTranscendentals = true;
    747         this->m_hasTrigonometry = true;
    748         this->m_hasGather = this->m_hasScatter = true;
    749         this->m_hasRsqrtd = this->m_hasRcpd = true;
    750         CPUfromISA = CPU_Generic;
    751     }
    752     else if (!strcasecmp(isa, "generic-64") ||
    753              !strcasecmp(isa, "generic-x64")) {
    754         this->m_isa = Target::GENERIC;
    755         this->m_nativeVectorWidth = 64;
    756         this->m_nativeVectorAlignment = 64;
    757         this->m_vectorWidth = 64;
    758         this->m_maskingIsFree = true;
    759         this->m_maskBitCount = 1;
    760         this->m_hasHalf = true;
    761         this->m_hasTranscendentals = true;
    762         this->m_hasTrigonometry = true;
    763         this->m_hasGather = this->m_hasScatter = true;
    764         this->m_hasRsqrtd = this->m_hasRcpd = true;
    765         CPUfromISA = CPU_Generic;
    766     }
    767     else if (!strcasecmp(isa, "generic-1") ||
    768              !strcasecmp(isa, "generic-x1")) {
    769         this->m_isa = Target::GENERIC;
    770         this->m_nativeVectorWidth = 1;
    771         this->m_nativeVectorAlignment = 16;
    772         this->m_vectorWidth = 1;
    773         this->m_maskingIsFree = false;
    774         this->m_maskBitCount = 32;
    775         CPUfromISA = CPU_Generic;
    776     }
    777     else if (!strcasecmp(isa, "avx1-i32x4")) {
    778         this->m_isa = Target::AVX;
    779         this->m_nativeVectorWidth = 8;
    780         this->m_nativeVectorAlignment = 32;
    781         this->m_dataTypeWidth = 32;
    782         this->m_vectorWidth = 4;
    783         this->m_maskingIsFree = false;
    784         this->m_maskBitCount = 32;
    785         CPUfromISA = CPU_SandyBridge;
    786     }
    787     else if (!strcasecmp(isa, "avx") ||
    788              !strcasecmp(isa, "avx1") ||
    789              !strcasecmp(isa, "avx1-i32x8")) {
    790         this->m_isa = Target::AVX;
    791         this->m_nativeVectorWidth = 8;
    792         this->m_nativeVectorAlignment = 32;
    793         this->m_dataTypeWidth = 32;
    794         this->m_vectorWidth = 8;
    795         this->m_maskingIsFree = false;
    796         this->m_maskBitCount = 32;
    797         CPUfromISA = CPU_SandyBridge;
    798     }
    799     else if (!strcasecmp(isa, "avx-i64x4") ||
    800              !strcasecmp(isa, "avx1-i64x4")) {
    801         this->m_isa = Target::AVX;
    802         this->m_nativeVectorWidth = 8;  /* native vector width in terms of floats */
    803         this->m_nativeVectorAlignment = 32;
    804         this->m_dataTypeWidth = 64;
    805         this->m_vectorWidth = 4;
    806         this->m_maskingIsFree = false;
    807         this->m_maskBitCount = 64;
    808         CPUfromISA = CPU_SandyBridge;
    809     }
    810     else if (!strcasecmp(isa, "avx-x2") ||
    811              !strcasecmp(isa, "avx1-x2") ||
    812              !strcasecmp(isa, "avx1-i32x16")) {
    813         this->m_isa = Target::AVX;
    814         this->m_nativeVectorWidth = 8;
    815         this->m_nativeVectorAlignment = 32;
    816         this->m_dataTypeWidth = 32;
    817         this->m_vectorWidth = 16;
    818         this->m_maskingIsFree = false;
    819         this->m_maskBitCount = 32;
    820         CPUfromISA = CPU_SandyBridge;
    821     }
    822     else if (!strcasecmp(isa, "avx1.1") ||
    823              !strcasecmp(isa, "avx1.1-i32x8")) {
    824         this->m_isa = Target::AVX11;
    825         this->m_nativeVectorWidth = 8;
    826         this->m_nativeVectorAlignment = 32;
    827         this->m_dataTypeWidth = 32;
    828         this->m_vectorWidth = 8;
    829         this->m_maskingIsFree = false;
    830         this->m_maskBitCount = 32;
    831         this->m_hasHalf = true;
    832         this->m_hasRand = true;
    833         CPUfromISA = CPU_IvyBridge;
    834     }
    835     else if (!strcasecmp(isa, "avx1.1-x2") ||
    836              !strcasecmp(isa, "avx1.1-i32x16")) {
    837         this->m_isa = Target::AVX11;
    838         this->m_nativeVectorWidth = 8;
    839         this->m_nativeVectorAlignment = 32;
    840         this->m_dataTypeWidth = 32;
    841         this->m_vectorWidth = 16;
    842         this->m_maskingIsFree = false;
    843         this->m_maskBitCount = 32;
    844         this->m_hasHalf = true;
    845         this->m_hasRand = true;
    846         CPUfromISA = CPU_IvyBridge;
    847     }
    848     else if (!strcasecmp(isa, "avx1.1-i64x4")) {
    849         this->m_isa = Target::AVX11;
    850         this->m_nativeVectorWidth = 8;  /* native vector width in terms of floats */
    851         this->m_nativeVectorAlignment = 32;
    852         this->m_dataTypeWidth = 64;
    853         this->m_vectorWidth = 4;
    854         this->m_maskingIsFree = false;
    855         this->m_maskBitCount = 64;
    856         this->m_hasHalf = true;
    857         this->m_hasRand = true;
    858         CPUfromISA = CPU_IvyBridge;
    859     }
    860     else if (!strcasecmp(isa, "avx2") ||
    861              !strcasecmp(isa, "avx2-i32x8")) {
    862         this->m_isa = Target::AVX2;
    863         this->m_nativeVectorWidth = 8;
    864         this->m_nativeVectorAlignment = 32;
    865         this->m_dataTypeWidth = 32;
    866         this->m_vectorWidth = 8;
    867         this->m_maskingIsFree = false;
    868         this->m_maskBitCount = 32;
    869         this->m_hasHalf = true;
    870         this->m_hasRand = true;
    871         this->m_hasGather = true;
    872         CPUfromISA = CPU_Haswell;
    873     }
    874     else if (!strcasecmp(isa, "avx2-x2") ||
    875              !strcasecmp(isa, "avx2-i32x16")) {
    876         this->m_isa = Target::AVX2;
    877         this->m_nativeVectorWidth = 16;
    878         this->m_nativeVectorAlignment = 32;
    879         this->m_dataTypeWidth = 32;
    880         this->m_vectorWidth = 16;
    881         this->m_maskingIsFree = false;
    882         this->m_maskBitCount = 32;
    883         this->m_hasHalf = true;
    884         this->m_hasRand = true;
    885         this->m_hasGather = true;
    886         CPUfromISA = CPU_Haswell;
    887     }
    888     else if (!strcasecmp(isa, "avx2-i64x4")) {
    889         this->m_isa = Target::AVX2;
    890         this->m_nativeVectorWidth = 8;  /* native vector width in terms of floats */
    891         this->m_nativeVectorAlignment = 32;
    892         this->m_dataTypeWidth = 64;
    893         this->m_vectorWidth = 4;
    894         this->m_maskingIsFree = false;
    895         this->m_maskBitCount = 64;
    896         this->m_hasHalf = true;
    897         this->m_hasRand = true;
    898         this->m_hasGather = true;
    899         CPUfromISA = CPU_Haswell;
    900     }
    901 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    902     else if (!strcasecmp(isa, "avx512knl-i32x16")) {
    903         this->m_isa = Target::KNL_AVX512;
    904         this->m_nativeVectorWidth = 16;
    905         this->m_nativeVectorAlignment = 64;
    906         // ?? this->m_dataTypeWidth = 32;
    907         this->m_vectorWidth = 16;
    908         this->m_maskingIsFree = true;
    909         this->m_maskBitCount = 1;
    910         this->m_hasHalf = true;
    911         this->m_hasRand = true;
    912         this->m_hasGather = this->m_hasScatter = true;
    913         this->m_hasTranscendentals = false;
    914         // For MIC it is set to true due to performance reasons. The option should be tested.
    915         this->m_hasTrigonometry = false;
    916         this->m_hasRsqrtd = this->m_hasRcpd = false;
    917         this->m_hasVecPrefetch = false;
    918         CPUfromISA = CPU_KNL;
    919     }
    920 #endif
    921 
    922 #ifdef ISPC_ARM_ENABLED
    923     else if (!strcasecmp(isa, "neon-i8x16")) {
    924         this->m_isa = Target::NEON8;
    925         this->m_nativeVectorWidth = 16;
    926         this->m_nativeVectorAlignment = 16;
    927         this->m_dataTypeWidth = 8;
    928         this->m_vectorWidth = 16;
    929         this->m_attributes = "+neon,+fp16";
    930         this->m_hasHalf = true; // ??
    931         this->m_maskingIsFree = false;
    932         this->m_maskBitCount = 8;
    933     }
    934     else if (!strcasecmp(isa, "neon-i16x8")) {
    935         this->m_isa = Target::NEON16;
    936         this->m_nativeVectorWidth = 8;
    937         this->m_nativeVectorAlignment = 16;
    938         this->m_dataTypeWidth = 16;
    939         this->m_vectorWidth = 8;
    940         this->m_attributes = "+neon,+fp16";
    941         this->m_hasHalf = true; // ??
    942         this->m_maskingIsFree = false;
    943         this->m_maskBitCount = 16;
    944     }
    945     else if (!strcasecmp(isa, "neon") ||
    946              !strcasecmp(isa, "neon-i32x4")) {
    947         this->m_isa = Target::NEON32;
    948         this->m_nativeVectorWidth = 4;
    949         this->m_nativeVectorAlignment = 16;
    950         this->m_dataTypeWidth = 32;
    951         this->m_vectorWidth = 4;
    952         this->m_attributes = "+neon,+fp16";
    953         this->m_hasHalf = true; // ??
    954         this->m_maskingIsFree = false;
    955         this->m_maskBitCount = 32;
    956     }
    957 #endif
    958 #ifdef ISPC_NVPTX_ENABLED
    959     else if (!strcasecmp(isa, "nvptx")) {
    960         this->m_isa = Target::NVPTX;
    961         this->m_cpu = "sm_35";
    962         this->m_nativeVectorWidth = 32;
    963         this->m_nativeVectorAlignment = 32;
    964         this->m_vectorWidth = 1;
    965         this->m_hasHalf = true;
    966         this->m_maskingIsFree = true;
    967         this->m_maskBitCount = 1;
    968         this->m_hasTranscendentals = true;
    969         this->m_hasTrigonometry = true;
    970         this->m_hasGather = this->m_hasScatter = false;
    971         CPUfromISA = CPU_SM35;
    972     }
    973 #endif /* ISPC_NVPTX_ENABLED */
    974     else {
    975         Error(SourcePos(), "Target \"%s\" is unknown.  Choices are: %s.",
    976               isa, SupportedTargets());
    977         error = true;
    978     }
    979 
    980 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
    981     if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
    982         CPUID = CPU_CortexA9;
    983 #endif
    984 
    985     if (CPUID == CPU_None) {
    986 #ifndef ISPC_ARM_ENABLED
    987         if (isa == nullptr) {
    988 #endif
    989             std::string hostCPU = llvm::sys::getHostCPUName();
    990             if (hostCPU.size() > 0)
    991                 cpu = strdup(hostCPU.c_str());
    992             else {
    993                 Warning(SourcePos(), "Unable to determine host CPU!\n");
    994                 cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
    995             }
    996 #ifndef ISPC_ARM_ENABLED
    997         }
    998         else {
    999             cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
    1000         }
    1001 #endif
    1002     }
    1003     else {
    1004         if ((CPUfromISA != CPU_None) &&
    1005             !a.BackwardCompatible(CPUID, CPUfromISA)) {
    1006             Error(SourcePos(), "The requested CPU is incompatible"
    1007                   " with the CPU %s needs: %s vs. %s!\n",
    1008                   isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
    1009             return;
    1010         }
    1011         cpu = a.GetDefaultNameFromType(CPUID).c_str();
    1012     }
    1013     this->m_cpu = cpu;
    1014 
    1015     if (!error) {
    1016         // Create TargetMachine
    1017         std::string triple = GetTripleString();
    1018 
    1019         llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
    1020             llvm::Reloc::Default;
    1021         std::string featuresString = m_attributes;
    1022         llvm::TargetOptions options;
    1023 #ifdef ISPC_ARM_ENABLED
    1024         if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
    1025             m_isa == Target::NEON32)
    1026             options.FloatABIType = llvm::FloatABI::Hard;
    1027 #endif
    1028         if (g->opt.disableFMA == false)
    1029             options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
    1030 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
    1031 #ifdef ISPC_IS_WINDOWS
    1032         if (strcmp("x86", arch) == 0) {
    1033             // Workaround for issue #503 (LLVM issue 14646).
    1034             // It's Win32 specific.
    1035             options.NoFramePointerElim = true;
    1036         }
    1037 #endif
    1038 #endif
    1039         m_targetMachine =
    1040             m_target->createTargetMachine(triple, m_cpu, featuresString, options,
    1041                     relocModel);
    1042         Assert(m_targetMachine != nullptr);
    1043 
    1044 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
    1045         m_targetMachine->setAsmVerbosityDefault(true);
    1046 #else /* LLVM 3.7+ */
    1047         m_targetMachine->Options.MCOptions.AsmVerbose = true;
    1048 #endif
    1049         // Initialize TargetData/DataLayout in 3 steps.
    1050         // 1. Get default data layout first
    1051         std::string dl_string;
    1052 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
    1053         dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
    1054 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
    1055         dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
    1056 #else // LLVM 3.5- or LLVM 3.7
    1057         dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
    1058 #endif
    1059         // 2. Adjust for generic
    1060         if (m_isa == Target::GENERIC) {
    1061             // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
    1062             // that to the regular datalayout string for IA..
    1063             // For generic-4 target we need to treat <4 x i1> as 128 bit value
    1064             // in terms of required memory storage and alignment, as this is
    1065             // translated to __m128 type.
    1066             dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
    1067                 "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
    1068                 "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
    1069         }
    1070 #ifdef ISPC_NVPTX_ENABLED
    1071         else if (m_isa == Target::NVPTX)
    1072         {
    1073           dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
    1074         }
    1075 #endif
    1076 
    1077         // 3. Finally set member data
    1078         m_dataLayout = new llvm::DataLayout(dl_string);
    1079 
    1080         // Set is32Bit
    1081         // This indicates if we are compiling for 32 bit platform
    1082         // and can assume 32 bit runtime.
    1083         // FIXME: all generic targets are handled as 64 bit, which is incorrect.
    1084 
    1085         this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
    1086 
    1087 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
    1088         // This is LLVM 3.3+ feature.
    1089         // Initialize target-specific "target-feature" attribute.
    1090         if (!m_attributes.empty()) {
    1091             llvm::AttrBuilder attrBuilder;
    1092 #ifdef ISPC_NVPTX_ENABLED
    1093             if (m_isa != Target::NVPTX)
    1094 #endif
    1095             attrBuilder.addAttribute("target-cpu", this->m_cpu);
    1096             attrBuilder.addAttribute("target-features", this->m_attributes);
    1097             this->m_tf_attributes = new llvm::AttributeSet(
    1098                 llvm::AttributeSet::get(
    1099                     *g->ctx,
    1100                     llvm::AttributeSet::FunctionIndex,
    1101                     attrBuilder));
    1102         }
    1103 #endif
    1104 
    1105         Assert(this->m_vectorWidth <= ISPC_MAX_NVEC);
    1106     }
    1107 
    1108     m_valid = !error;
    1109 
    1110     if (printTarget) {
    1111         printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
    1112         printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
    1113         printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
    1114     }
    1115 
    1116     return;
    1117 }
    1118 
    1119 
    1120 std::string
    1121 Target::SupportedCPUs() {
    1122     AllCPUs a;
    1123     return a.HumanReadableListOfNames();
    1124 }
    1125 
    1126 
    1127 const char *
    1128 Target::SupportedArchs() {
    1129     return
    1130 #ifdef ISPC_ARM_ENABLED
    1131         "arm, "
    1132 #endif
    1133         "x86, x86-64";
    1134 }
    1135 
    1136 
    1137 const char *
    1138 Target::SupportedTargets() {
    1139     return
    1140         "host, sse2-i32x4, sse2-i32x8, "
    1141         "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
    1142         "avx1-i32x4, "
    1143         "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
    1144         "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4, "
    1145         "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
    1146 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    1147         "avx512knl-i32x16, "
    1148 #endif
    1149         "generic-x1, generic-x4, generic-x8, generic-x16, "
    1150         "generic-x32, generic-x64, *-generic-x16, "
    1151 #ifdef ISPC_ARM_ENABLED
    1152         ", neon-i8x16, neon-i16x8, neon-i32x4"
    1153 #endif
    1154 #ifdef ISPC_NVPTX_ENABLED
    1155         ", nvptx"
    1156 #endif
    1157 ;
    1158 
    1159 }
    1160 
    1161 
    1162 std::string
    1163 Target::GetTripleString() const {
    1164     llvm::Triple triple;
    1165 #ifdef ISPC_ARM_ENABLED
    1166     if (m_arch == "arm") {
    1167         triple.setTriple("armv7-eabi");
    1168     }
    1169     else
    1170 #endif
    1171     {
    1172         // Start with the host triple as the default
    1173         triple.setTriple(llvm::sys::getDefaultTargetTriple());
    1174 
    1175         // And override the arch in the host triple based on what the user
    1176         // specified.  Here we need to deal with the fact that LLVM uses one
    1177         // naming convention for targets TargetRegistry, but wants some
    1178         // slightly different ones for the triple.  TODO: is there a way to
    1179         // have it do this remapping, which would presumably be a bit less
    1180         // error prone?
    1181         if (m_arch == "x86")
    1182             triple.setArchName("i386");
    1183         else if (m_arch == "x86-64")
    1184             triple.setArchName("x86_64");
    1185 #ifdef ISPC_NVPTX_ENABLED
    1186         else if (m_arch == "nvptx64")
    1187           triple = llvm::Triple("nvptx64", "nvidia", "cuda");
    1188 #endif /* ISPC_NVPTX_ENABLED */
    1189         else
    1190             triple.setArchName(m_arch);
    1191     }
    1192     return triple.str();
    1193 }
    1194 
    1195 // This function returns string representation of ISA for the purpose of
    1196 // mangling. And may return any unique string, preferably short, like
    1197 // sse4, avx and etc.
    1198 const char *
    1199 Target::ISAToString(ISA isa) {
    1200     switch (isa) {
    1201 #ifdef ISPC_ARM_ENABLED
    1202     case Target::NEON8:
    1203         return "neon-8";
    1204     case Target::NEON16:
    1205         return "neon-16";
    1206     case Target::NEON32:
    1207         return "neon-32";
    1208 #endif
    1209     case Target::SSE2:
    1210         return "sse2";
    1211     case Target::SSE4:
    1212         return "sse4";
    1213     case Target::AVX:
    1214         return "avx";
    1215     case Target::AVX11:
    1216         return "avx11";
    1217     case Target::AVX2:
    1218         return "avx2";
    1219 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    1220     case Target::KNL_AVX512:
    1221         return "avx512knl-i32x16";
    1222 #endif
    1223     case Target::SKX:
    1224         return "skx";
    1225     case Target::GENERIC:
    1226         return "generic";
    1227 #ifdef ISPC_NVPTX_ENABLED
    1228     case Target::NVPTX:
    1229         return "nvptx";
    1230 #endif /* ISPC_NVPTX_ENABLED */
    1231     default:
    1232         FATAL("Unhandled target in ISAToString()");
    1233     }
    1234     return "";
    1235 }
    1236 
    1237 const char *
    1238 Target::GetISAString() const {
    1239     return ISAToString(m_isa);
    1240 }
    1241 
    1242 
    1243 // This function returns string representation of default target corresponding
    1244 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX11 it's avx1.1-i32x8. This
    1245 // string may be used to initialize Target.
    1246 const char *
    1247 Target::ISAToTargetString(ISA isa) {
    1248     switch (isa) {
    1249 #ifdef ISPC_ARM_ENABLED
    1250     case Target::NEON8:
    1251         return "neon-8";
    1252     case Target::NEON16:
    1253         return "neon-16";
    1254     case Target::NEON32:
    1255         return "neon-32";
    1256 #endif
    1257     case Target::SSE2:
    1258         return "sse2-i32x4";
    1259     case Target::SSE4:
    1260         return "sse4-i32x4";
    1261     case Target::AVX:
    1262         return "avx1-i32x8";
    1263     case Target::AVX11:
    1264         return "avx1.1-i32x8";
    1265     case Target::AVX2:
    1266         return "avx2-i32x8";
    1267 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
    1268     case Target::KNL_AVX512:
    1269         return "avx512knl-i32x16";
    1270 #endif
    1271     case Target::SKX:
    1272         return "avx2";
    1273     case Target::GENERIC:
    1274         return "generic-4";
    1275 #ifdef ISPC_NVPTX_ENABLED
    1276     case Target::NVPTX:
    1277         return "nvptx";
    1278 #endif /* ISPC_NVPTX_ENABLED */
    1279     default:
    1280         FATAL("Unhandled target in ISAToTargetString()");
    1281     }
    1282     return "";
    1283 }
    1284 
    1285 
    1286 const char *
    1287 Target::GetISATargetString() const {
    1288     return ISAToString(m_isa);
    1289 }
    1290 
    1291 
    1292 static bool
    1293 lGenericTypeLayoutIndeterminate(llvm::Type *type) {
    1294     if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() ||
    1295         type->isIntegerTy() || type->isLabelTy() || type->isMetadataTy())
    1296         return false;
    1297 
    1298     if (type == LLVMTypes::BoolVectorType ||
    1299         type == LLVMTypes::MaskType ||
    1300         type == LLVMTypes::Int1VectorType)
    1301         return true;
    1302 
    1303     llvm::ArrayType *at =
    1304         llvm::dyn_cast<llvm::ArrayType>(type);
    1305     if (at != nullptr)
    1306         return lGenericTypeLayoutIndeterminate(at->getElementType());
    1307 
    1308     llvm::PointerType *pt =
    1309         llvm::dyn_cast<llvm::PointerType>(type);
    1310     if (pt != nullptr)
    1311         return false;
    1312 
    1313     llvm::StructType *st =
    1314         llvm::dyn_cast<llvm::StructType>(type);
    1315     if (st != nullptr) {
    1316         for (int i = 0; i < (int)st->getNumElements(); ++i)
    1317             if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
    1318                 return true;
    1319         return false;
    1320     }
    1321 
    1322     Assert(llvm::isa<llvm::VectorType>(type));
    1323     return true;
    1324 }
    1325 
    1326 
    1327 llvm::Value *
    1328 Target::SizeOf(llvm::Type *type,
    1329                llvm::BasicBlock *insertAtEnd) {
    1330     if (m_isa == Target::GENERIC &&
    1331         lGenericTypeLayoutIndeterminate(type)) {
    1332         llvm::Value *index[1] = { LLVMInt32(1) };
    1333         llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
    1334         llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
    1335         llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
    1336 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
    1337         llvm::Instruction *gep =
    1338             llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
    1339                                             insertAtEnd);
    1340 #else /* LLVM 3.7+ */
    1341         llvm::Instruction *gep =
    1342             llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
    1343                                             arrayRef, "sizeof_gep",
    1344                                             insertAtEnd);
    1345 #endif
    1346         if (m_is32Bit || g->opt.force32BitAddressing)
    1347             return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
    1348                                           "sizeof_int", insertAtEnd);
    1349         else
    1350             return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
    1351                                           "sizeof_int", insertAtEnd);
    1352     }
    1353 
    1354     uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
    1355     if (m_is32Bit || g->opt.force32BitAddressing)
    1356         return LLVMInt32((int32_t)byteSize);
    1357     else
    1358         return LLVMInt64(byteSize);
    1359 }
    1360 
    1361 
    1362 llvm::Value *
    1363 Target::StructOffset(llvm::Type *type, int element,
    1364                      llvm::BasicBlock *insertAtEnd) {
    1365     if (m_isa == Target::GENERIC &&
    1366         lGenericTypeLayoutIndeterminate(type) == true) {
    1367         llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
    1368         llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
    1369         llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
    1370         llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
    1371 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
    1372         llvm::Instruction *gep =
    1373             llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
    1374                                             insertAtEnd);
    1375 #else /* LLVM 3.7+ */
    1376         llvm::Instruction *gep =
    1377             llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
    1378                                             arrayRef, "offset_gep",
    1379                                             insertAtEnd);
    1380 #endif
    1381         if (m_is32Bit || g->opt.force32BitAddressing)
    1382             return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
    1383                                           "offset_int", insertAtEnd);
    1384         else
    1385             return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
    1386                                           "offset_int", insertAtEnd);
    1387     }
    1388 
    1389     llvm::StructType *structType =
    1390         llvm::dyn_cast<llvm::StructType>(type);
    1391     if (structType == nullptr || structType->isSized() == false) {
    1392         Assert(m->errorCount > 0);
    1393         return nullptr;
    1394     }
    1395 
    1396     const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
    1397     Assert(sl != nullptr);
    1398 
    1399     uint64_t offset = sl->getElementOffset(element);
    1400     if (m_is32Bit || g->opt.force32BitAddressing)
    1401         return LLVMInt32((int32_t)offset);
    1402     else
    1403         return LLVMInt64(offset);
    1404 }
    1405 
    1406 void Target::markFuncWithTargetAttr(llvm::Function* func) {
    1407 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
    1408     if (m_tf_attributes) {
    1409         func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
    1410     }
    1411 #endif
    1412 }
    1413 
    1414 
    1415 ///////////////////////////////////////////////////////////////////////////
    1416 // Opt
    1417 
    1418 Opt::Opt() {
    1419     level = 1;
    1420     fastMath = false;
    1421     fastMaskedVload = false;
    1422     force32BitAddressing = true;
    1423     unrollLoops = true;
    1424     disableAsserts = false;
    1425     disableFMA = false;
    1426     forceAlignedMemory = false;
    1427     disableMaskAllOnOptimizations = false;
    1428     disableHandlePseudoMemoryOps = false;
    1429     disableBlendedMaskedStores = false;
    1430     disableCoherentControlFlow = false;
    1431     disableUniformControlFlow = false;
    1432     disableGatherScatterOptimizations = false;
    1433     disableMaskedStoreToStore = false;
    1434     disableGatherScatterFlattening = false;
    1435     disableUniformMemoryOptimizations = false;
    1436     disableCoalescing = false;
    1437 }
    1438 
    1439 ///////////////////////////////////////////////////////////////////////////
    1440 // Globals
    1441 
    1442 Globals::Globals() {
    1443     mathLib = Globals::Math_ISPC;
    1444 
    1445     includeStdlib = true;
    1446     runCPP = true;
    1447     debugPrint = false;
    1448     printTarget = false;
    1449     debugIR = -1;
    1450     disableWarnings = false;
    1451     warningsAsErrors = false;
    1452     quiet = false;
    1453     forceColoredOutput = false;
    1454     disableLineWrap = false;
    1455     emitPerfWarnings = true;
    1456     emitInstrumentation = false;
    1457     generateDebuggingSymbols = false;
    1458     enableFuzzTest = false;
    1459     fuzzTestSeed = -1;
    1460     mangleFunctionsWithTarget = false;
    1461 
    1462     ctx = new llvm::LLVMContext;
    1463 
    1464 #ifdef ISPC_IS_WINDOWS
    1465     _getcwd(currentDirectory, sizeof(currentDirectory));
    1466 #else
    1467     if (getcwd(currentDirectory, sizeof(currentDirectory)) == nullptr)
    1468         FATAL("Current directory path too long!");
    1469 #endif
    1470     forceAlignment = -1;
    1471     dllExport = false;
    1472 }
    1473 
    1474 ///////////////////////////////////////////////////////////////////////////
    1475 // SourcePos
    1476 
    1477 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
    1478     name = n;
    1479     if (name == nullptr) {
    1480         if (m != nullptr)
    1481             name = m->module->getModuleIdentifier().c_str();
    1482         else
    1483             name = "(unknown)";
    1484     }
    1485     first_line = fl;
    1486     first_column = fc;
    1487     last_line = ll != 0 ? ll : fl;
    1488     last_column = lc != 0 ? lc : fc;
    1489 }
    1490 
    1491 
    1492 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
    1493 llvm::DIFile
    1494 #else /* LLVM 3.7+ */
    1495 llvm::DIFile*
    1496 //llvm::MDFile*
    1497 #endif
    1498 SourcePos::GetDIFile() const {
    1499     std::string directory, filename;
    1500     GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
    1501 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
    1502     llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
    1503     Assert(ret.Verify());
    1504 #else /* LLVM 3.7+ */
    1505     llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
    1506     //llvm::MDFile *ret = m->diBuilder->createFile(filename, directory);
    1507 #endif
    1508     return ret;
    1509 }
    1510 
    1511 
    1512 void
    1513 SourcePos::Print() const {
    1514     printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column,
    1515            last_line, last_column);
    1516 }
    1517 
    1518 
    1519 bool
    1520 SourcePos::operator==(const SourcePos &p2) const {
    1521     return (!strcmp(name, p2.name) &&
    1522             first_line == p2.first_line &&
    1523             first_column == p2.first_column &&
    1524             last_line == p2.last_line &&
    1525             last_column == p2.last_column);
    1526 }
    1527 
    1528 
    1529 SourcePos
    1530 Union(const SourcePos &p1, const SourcePos &p2) {
    1531     if (strcmp(p1.name, p2.name) != 0)
    1532         return p1;
    1533 
    1534     SourcePos ret;
    1535     ret.name = p1.name;
    1536     ret.first_line = std::min(p1.first_line, p2.first_line);
    1537     ret.first_column = std::min(p1.first_column, p2.first_column);
    1538     ret.last_line = std::max(p1.last_line, p2.last_line);
    1539     ret.last_column = std::max(p1.last_column, p2.last_column);
    1540     return ret;
    1541 }
    1542 #endif
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5030 r5033  
    1111#include <sstream>
    1212
     13
     14#include <toolchain.h>
    1315#include <llvm/IR/Function.h>
    1416#include <llvm/IR/Module.h>
    1517#include <llvm/ExecutionEngine/ExecutionEngine.h>
    1618#include <llvm/ExecutionEngine/MCJIT.h>
    17 #include <llvm/IRReader/IRReader.h>
    18 #include <llvm/IR/Verifier.h>
    19 #include <llvm/Support/Debug.h>
    2019
    2120#include <llvm/Support/CommandLine.h>
    22 #include <llvm/CodeGen/CommandFlags.h>
    23 #include <llvm/Support/SourceMgr.h>
    24 #include <llvm/Support/TargetSelect.h>
    25 #include <llvm/Support/Host.h>
    2621#include <llvm/Support/raw_ostream.h>
    2722
     
    3934#include <pablo/pablo_toolchain.h>
    4035
    41 // Dynamic processor detection
    42 #define ISPC_LLVM_VERSION ISPC_LLVM_3_6
    43 #include <util/ispc.cpp>
    4436
    4537#include <utf_encoding.h>
     
    115107//
    116108
    117 ExecutionEngine * wcJIT_to_ExecutionEngine (Module * m) {
    118 
    119     InitializeNativeTarget();
    120     InitializeNativeTargetAsmPrinter();
    121     InitializeNativeTargetAsmParser();
    122 
    123     PassRegistry * Registry = PassRegistry::getPassRegistry();
    124     initializeCore(*Registry);
    125     initializeCodeGen(*Registry);
    126     initializeLowerIntrinsicsPass(*Registry);
    127 
    128     std::string errMessage;
    129     EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
    130     builder.setErrorStr(&errMessage);
    131     builder.setMCPU(sys::getHostCPUName());
    132     CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    133     switch (OptLevel) {
    134         case '0': optLevel = CodeGenOpt::None; break;
    135         case '1': optLevel = CodeGenOpt::Less; break;
    136         case '2': optLevel = CodeGenOpt::Default; break;
    137         case '3': optLevel = CodeGenOpt::Aggressive; break;
    138         default: errs() << OptLevel << " is an invalid optimization level.\n";
    139     }
    140     builder.setOptLevel(optLevel);
    141 
    142     if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
    143             std::vector<std::string> attrs;
    144             attrs.push_back("avx2");
    145             builder.setMAttrs(attrs);
    146     }
    147 
    148     // builder.selectTarget();
    149 
    150     //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
    151     ExecutionEngine * engine = builder.create();
    152     if (engine == nullptr) {
    153         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    154     }
    155     return engine;
    156 }
    157 
    158 
    159109pablo::PabloFunction * wc_gen(Encoding encoding) {
    160110    //  input: 8 basis bit streams
     
    382332    Module * M = new Module("wc", getGlobalContext());
    383333   
    384     IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
     334    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    385335
    386336    wcPipelineBuilder pipelineBuilder(M, idb);
     
    401351    //verifyModule(*M, &dbgs());
    402352    //std::cerr << "ExecuteKernels(); done\n";
    403     wcEngine = wcJIT_to_ExecutionEngine(M);
     353    wcEngine = JIT_to_ExecutionEngine(M);
    404354   
    405355    wcEngine->finalizeObject();
Note: See TracChangeset for help on using the changeset viewer.