source: icGREP/icgrep-devel/icgrep/ispc.cpp @ 4960

Last change on this file since 4960 was 4937, checked in by nmedfort, 4 years ago

Check in of misc changes prior to symbol table work.

File size: 50.5 KB
Line 
1/*
2  Copyright (c) 2010-2015, Intel Corporation
3  All rights reserved.
4
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8
9    * Redistributions of source code must retain the above copyright
10      notice, this list of conditions and the following disclaimer.
11
12    * Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16    * Neither the name of Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20
21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*/
33
34/** @file ispc.cpp
35    @brief ispc global definitions
36*/
37
38#if 0
39#include "ispc.h"
40#include "module.h"
41#include "util.h"
42#include "llvmutil.h"
43#include <stdio.h>
44#include <sstream>
45#include <stdarg.h>     /* va_list, va_start, va_arg, va_end */
46#ifdef ISPC_IS_WINDOWS
47  #include <windows.h>
48  #include <direct.h>
49  #define strcasecmp stricmp
50#else
51  #include <sys/types.h>
52  #include <unistd.h>
53#endif
54#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
55  #include <llvm/LLVMContext.h>
56  #include <llvm/Module.h>
57  #include <llvm/Instructions.h>
58#else /* 3.3+ */
59  #include <llvm/IR/LLVMContext.h>
60  #include <llvm/IR/Module.h>
61  #include <llvm/IR/Instructions.h>
62#endif
63#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
64  #include <llvm/Target/TargetSubtargetInfo.h>
65  #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
66    #include <llvm/Target/TargetLowering.h>
67  #endif
68#endif
69#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
70  #include <llvm/IR/DebugInfo.h>
71  #include <llvm/IR/DIBuilder.h>
72#else // LLVM 3.2, 3.3, 3.4
73  #include <llvm/DebugInfo.h>
74  #include <llvm/DIBuilder.h>
75#endif
76#include <llvm/Support/Dwarf.h>
77#include <llvm/Target/TargetMachine.h>
78#include <llvm/Target/TargetOptions.h>
79#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
80  #include <llvm/DataLayout.h>
81#else // LLVM 3.3+
82  #include <llvm/IR/DataLayout.h>
83  #include <llvm/IR/Attributes.h>
84#endif
85#include <llvm/Support/TargetRegistry.h>
86#include <llvm/Support/TargetSelect.h>
87#include <llvm/Support/Host.h>
88
89Globals *g;
90Module *m;
91*/
92#endif
93
94///////////////////////////////////////////////////////////////////////////
95// Target
96
97#if !defined(ISPC_IS_WINDOWS) && !defined(__arm__)
98static void __cpuid(int info[4], int infoType) {
99    __asm__ __volatile__ ("cpuid"
100                          : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
101                          : "0" (infoType));
102}
103
104/* Save %ebx in case it's the PIC register */
105static void __cpuidex(int info[4], int level, int count) {
106  __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
107                        "cpuid\n\t"
108                        "xchg{l}\t{%%}ebx, %1\n\t"
109                        : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
110                        : "0" (level), "2" (count));
111}
112#endif // !ISPC_IS_WINDOWS && !__ARM__
113
114#if !defined(__arm__)
115static bool __os_has_avx_support() {
116#if defined(ISPC_IS_WINDOWS)
117    // Check if the OS will save the YMM registers
118    unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
119    return (xcrFeatureMask & 6) == 6;
120#else // !defined(ISPC_IS_WINDOWS)
121    // Check xgetbv; this uses a .byte sequence instead of the instruction
122    // directly because older assemblers do not include support for xgetbv and
123    // there is no easy way to conditionally compile based on the assembler used.
124    int rEAX, rEDX;
125    __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
126    return (rEAX & 6) == 6;
127#endif // !defined(ISPC_IS_WINDOWS)
128}
129
130static bool __os_has_avx512_support() {
131#if defined(ISPC_IS_WINDOWS)
132    // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
133    // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
134    unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
135    return (xcrFeatureMask & 0xE6) == 0xE6;
136#else // !defined(ISPC_IS_WINDOWS)
137    // Check xgetbv; this uses a .byte sequence instead of the instruction
138    // directly because older assemblers do not include support for xgetbv and
139    // there is no easy way to conditionally compile based on the assembler used.
140    int rEAX, rEDX;
141    __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
142    return (rEAX & 0xE6) == 0xE6;
143#endif // !defined(ISPC_IS_WINDOWS)
144}
145#endif // !__arm__
146
147static const char *
148lGetSystemISA() {
149#ifdef __arm__
150    return "neon-i32x4";
151#else
152    int info[4];
153    __cpuid(info, 1);
154
155    int info2[4];
156    // Call cpuid with eax=7, ecx=0
157    __cpuidex(info2, 7, 0);
158
159    if ((info[2] & (1 << 27)) != 0 &&  // OSXSAVE
160        (info2[1] & (1 <<  5)) != 0 && // AVX2
161        (info2[1] & (1 << 16)) != 0 && // AVX512 F
162        __os_has_avx512_support()) {
163        // We need to verify that AVX2 is also available,
164        // as well as AVX512, because our targets are supposed
165        // to use both.
166
167        if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
168            (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
169            (info2[1] & (1 << 30)) != 0 && // AVX512 BW
170            (info2[1] & (1 << 31)) != 0) { // AVX512 VL
171            return "skx";
172        }
173        else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
174                 (info2[1] & (1 << 27)) != 0 && // AVX512 ER
175                 (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
176            return "avx512knl-i32x16";
177        }
178        // If it's unknown AVX512 target, fall through and use AVX2
179        // or whatever is available in the machine.
180    }
181
182    if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
183        (info[2] & (1 << 28)) != 0 &&
184         __os_has_avx_support()) {  // AVX
185        // AVX1 for sure....
186        // Ivy Bridge?
187        if ((info[2] & (1 << 29)) != 0 &&  // F16C
188            (info[2] & (1 << 30)) != 0) {  // RDRAND
189            // So far, so good.  AVX2?
190            if ((info2[1] & (1 << 5)) != 0)
191                return "avx2-i32x8";
192            else
193                return "avx1.1-i32x8";
194        }
195        // Regular AVX
196        return "avx1-i32x8";
197    }
198    else if ((info[2] & (1 << 19)) != 0)
199        return "sse4-i32x4";
200    else if ((info[3] & (1 << 26)) != 0)
201        return "sse2-i32x4";
202    else {
203        fprintf(stderr, "Unable to detect supported SSE/AVX ISA.  Exiting.\n");
204        exit(1);
205    }
206#endif
207}
208
209#if 0
210typedef enum {
211    // Special value, indicates that no CPU is present.
212    CPU_None = 0,
213
214    // 'Generic' CPU without any hardware SIMD capabilities.
215    CPU_Generic = 1,
216
217    // Early Atom CPU. Supports SSSE3.
218    CPU_Bonnell,
219
220    // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
221    // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
222    CPU_Core2,
223
224    // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
225    CPU_Penryn,
226
227    // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
228    CPU_Nehalem,
229
230    // Sandy Bridge. Supports AVX 1.
231    CPU_SandyBridge,
232
233    // Ivy Bridge. Supports AVX 1 + RDRAND.
234    CPU_IvyBridge,
235
236    // Haswell. Supports AVX 2.
237    CPU_Haswell,
238
239#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
240    // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
241    CPU_Broadwell,
242#endif
243
244#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
245    // KNL. Supports AVX512.
246    CPU_KNL,
247#endif
248
249#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
250    // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
251    CPU_Silvermont,
252#endif
253
254    // FIXME: LLVM supports a ton of different ARM CPU variants--not just
255    // cortex-a9 and a15.  We should be able to handle any of them that also
256    // have NEON support.
257#ifdef ISPC_ARM_ENABLED
258    // ARM Cortex A15. Supports NEON VFPv4.
259    CPU_CortexA15,
260
261    // ARM Cortex A9. Supports NEON VFPv3.
262    CPU_CortexA9,
263#endif
264
265#ifdef ISPC_NVPTX_ENABLED
266    // NVidia CUDA-compatible SM-35 architecture.
267    CPU_SM35,
268#endif
269
270    sizeofCPUtype
271} CPUtype;
272
273
274class AllCPUs {
275private:
276    std::vector<std::vector<std::string> > names;
277    std::vector<std::set<CPUtype> > compat;
278
279    std::set<CPUtype> Set(CPUtype type, ...) {
280        std::set<CPUtype> retn;
281        va_list args;
282
283        retn.insert(type);
284        va_start(args, type);
285        while ((type = (CPUtype)va_arg(args, int)) != CPU_None)
286            retn.insert(type);
287        va_end(args);
288
289        return retn;
290    }
291
292public:
293    AllCPUs() {
294        names = std::vector<std::vector<std::string> >(sizeofCPUtype);
295        compat = std::vector<std::set<CPUtype> >(sizeofCPUtype);
296
297        names[CPU_None].push_back("");
298
299        names[CPU_Generic].push_back("generic");
300
301        names[CPU_Bonnell].push_back("atom");
302        names[CPU_Bonnell].push_back("bonnell");
303
304        names[CPU_Core2].push_back("core2");
305
306        names[CPU_Penryn].push_back("penryn");
307
308#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
309        names[CPU_Silvermont].push_back("slm");
310        names[CPU_Silvermont].push_back("silvermont");
311#endif
312
313        names[CPU_Nehalem].push_back("corei7");
314        names[CPU_Nehalem].push_back("nehalem");
315
316        names[CPU_SandyBridge].push_back("corei7-avx");
317        names[CPU_SandyBridge].push_back("sandybridge");
318
319        names[CPU_IvyBridge].push_back("core-avx-i");
320        names[CPU_IvyBridge].push_back("ivybridge");
321
322        names[CPU_Haswell].push_back("core-avx2");
323        names[CPU_Haswell].push_back("haswell");
324
325#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
326        names[CPU_Broadwell].push_back("broadwell");
327#endif
328
329#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
330         names[CPU_KNL].push_back("knl");
331#endif
332
333#ifdef ISPC_ARM_ENABLED
334        names[CPU_CortexA15].push_back("cortex-a15");
335
336        names[CPU_CortexA9].push_back("cortex-a9");
337#endif
338
339#ifdef ISPC_NVPTX_ENABLED
340        names[CPU_SM35].push_back("sm_35");
341#endif
342
343
344#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
345        #define CPU_Silvermont CPU_Nehalem
346#else /* LLVM 3.4+ */
347        compat[CPU_Silvermont]  = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
348                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
349                                      CPU_None);
350#endif
351
352#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
353        compat[CPU_KNL]         = Set(CPU_KNL, CPU_Generic, CPU_Bonnell, CPU_Penryn,
354                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
355                                      CPU_SandyBridge, CPU_IvyBridge,
356                                      CPU_Haswell, CPU_Broadwell, CPU_None);
357#endif
358
359#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
360        #define CPU_Broadwell CPU_Haswell
361#else /* LLVM 3.6+ */
362        compat[CPU_Broadwell]   = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
363                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
364                                      CPU_SandyBridge, CPU_IvyBridge,
365                                      CPU_Haswell, CPU_Broadwell, CPU_None);
366#endif
367        compat[CPU_Haswell]     = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
368                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
369                                      CPU_SandyBridge, CPU_IvyBridge,
370                                      CPU_Haswell, CPU_Broadwell, CPU_None);
371        compat[CPU_IvyBridge]   = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
372                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
373                                      CPU_SandyBridge, CPU_IvyBridge,
374                                      CPU_None);
375        compat[CPU_SandyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
376                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
377                                      CPU_SandyBridge, CPU_None);
378        compat[CPU_Nehalem]     = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
379                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
380                                      CPU_None);
381        compat[CPU_Penryn]      = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
382                                      CPU_Core2, CPU_Nehalem, CPU_Silvermont,
383                                      CPU_None);
384        compat[CPU_Core2]       = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
385                                      CPU_None);
386        compat[CPU_Bonnell]     = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
387                                      CPU_None);
388        compat[CPU_Generic]     = Set(CPU_Generic, CPU_None);
389
390#ifdef ISPC_ARM_ENABLED
391        compat[CPU_CortexA15]   = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
392                                      CPU_None);
393        compat[CPU_CortexA9]    = Set(CPU_Generic, CPU_CortexA9, CPU_None);
394#endif
395
396#ifdef ISPC_NVPTX_ENABLED
397        compat[CPU_SM35]        = Set(CPU_Generic, CPU_SM35, CPU_None);
398#endif
399    }
400
401    std::string HumanReadableListOfNames() {
402        std::stringstream CPUs;
403        for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
404            CPUs << names[i][0];
405            if (names[i].size() > 1) {
406                CPUs << " (synonyms: " << names[i][1];
407                for (int j = 2, je = names[i].size(); j < je; j++)
408                    CPUs << ", " << names[i][j];
409                CPUs << ")";
410            }
411            if (i < sizeofCPUtype - 1)
412                CPUs << ", ";
413        }
414        return CPUs.str();
415    }
416
417    std::string &GetDefaultNameFromType(CPUtype type) {
418        Assert((type >= CPU_None) && (type < sizeofCPUtype));
419        return names[type][0];
420    }
421
422    CPUtype GetTypeFromName(std::string name) {
423        CPUtype retn = CPU_None;
424
425        for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
426            for (int j = 0, je = names[i].size();
427                (retn == CPU_None) && (j < je); j++)
428                if (!name.compare(names[i][j]))
429                    retn = (CPUtype)i;
430        return retn;
431    }
432
433    bool BackwardCompatible(CPUtype what, CPUtype with) {
434        Assert((what > CPU_None) && (what < sizeofCPUtype));
435        Assert((with > CPU_None) && (with < sizeofCPUtype));
436        return compat[what].find(with) != compat[what].end();
437    }
438};
439
440
441Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genericAsSmth) :
442    m_target(nullptr),
443    m_targetMachine(nullptr),
444    m_dataLayout(nullptr),
445    m_valid(false),
446    m_isa(SSE2),
447    m_treatGenericAsSmth(genericAsSmth),
448    m_arch(""),
449    m_is32Bit(true),
450    m_cpu(""),
451    m_attributes(""),
452#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
453    m_tf_attributes(nullptr),
454#endif
455    m_nativeVectorWidth(-1),
456    m_nativeVectorAlignment(-1),
457    m_dataTypeWidth(-1),
458    m_vectorWidth(-1),
459    m_generatePIC(pic),
460    m_maskingIsFree(false),
461    m_maskBitCount(-1),
462    m_hasHalf(false),
463    m_hasRand(false),
464    m_hasGather(false),
465    m_hasScatter(false),
466    m_hasTranscendentals(false),
467    m_hasTrigonometry(false),
468    m_hasRsqrtd(false),
469    m_hasRcpd(false),
470    m_hasVecPrefetch(false)
471{
472    CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
473    AllCPUs a;
474
475    if (cpu) {
476        CPUID = a.GetTypeFromName(cpu);
477        if (CPUID == CPU_None) {
478            Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
479                  " CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
480            return;
481        }
482    }
483
484    if (isa == nullptr) {
485        // If a CPU was specified explicitly, try to pick the best
486        // possible ISA based on that.
487        switch (CPUID) {
488            case CPU_None:
489                // No CPU and no ISA, so use system info to figure out
490                // what this CPU supports.
491                isa = lGetSystemISA();
492                Warning(SourcePos(), "No --target specified on command-line."
493                        " Using default system target \"%s\".", isa);
494                break;
495
496            case CPU_Generic:
497                isa = "generic-1";
498                break;
499
500#ifdef ISPC_NVPTX_ENABLED
501            case CPU_SM35:
502                isa = "nvptx";
503                break;
504#endif
505
506#ifdef ISPC_ARM_ENABLED
507            case CPU_CortexA9:
508            case CPU_CortexA15:
509                isa = "neon-i32x4";
510                break;
511#endif
512
513#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
514            case CPU_KNL:
515                isa = "avx512knl-i32x16";
516                break;
517#endif
518
519#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
520            case CPU_Broadwell:
521#endif
522            case CPU_Haswell:
523                isa = "avx2-i32x8";
524                break;
525
526            case CPU_IvyBridge:
527                isa = "avx1.1-i32x8";
528                break;
529
530            case CPU_SandyBridge:
531                isa = "avx1-i32x8";
532                break;
533
534            // Penryn is here because ISPC does not use SSE 4.2
535            case CPU_Penryn:
536            case CPU_Nehalem:
537#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
538            case CPU_Silvermont:
539#endif
540                isa = "sse4-i32x4";
541                break;
542
543            default:
544                isa = "sse2-i32x4";
545                break;
546        }
547        if (CPUID != CPU_None)
548            Warning(SourcePos(), "No --target specified on command-line."
549                    " Using ISA \"%s\" based on specified CPU \"%s\".",
550                    isa, cpu);
551    }
552
553    if (!strcasecmp(isa, "host")) {
554        isa = lGetSystemISA();
555    }
556
557    if (arch == nullptr) {
558#ifdef ISPC_ARM_ENABLED
559        if (!strncmp(isa, "neon", 4))
560            arch = "arm";
561        else
562#endif
563#ifdef ISPC_NVPTX_ENABLED
564         if(!strncmp(isa, "nvptx", 5))
565           arch = "nvptx64";
566         else
567#endif /* ISPC_NVPTX_ENABLED */
568            arch = "x86-64";
569    }
570
571    // Define arch alias
572    if (std::string(arch) == "x86_64")
573        arch = "x86-64";
574
575    bool error = false;
576
577    // Make sure the target architecture is a known one; print an error
578    // with the valid ones otherwise.
579#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
580    for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
581         iter != llvm::TargetRegistry::targets().end(); ++iter) {
582#else
583    for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
584         iter != llvm::TargetRegistry::end(); ++iter) {
585#endif
586        if (std::string(arch) == iter->getName()) {
587            this->m_target = &*iter;
588            break;
589        }
590    }
591    if (this->m_target == nullptr) {
592        fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
593        llvm::TargetRegistry::iterator iter;
594#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
595        for (iter = llvm::TargetRegistry::targets().begin();
596             iter != llvm::TargetRegistry::targets().end(); ++iter)
597#else
598        for (iter = llvm::TargetRegistry::begin();
599             iter != llvm::TargetRegistry::end(); ++iter)
600#endif
601            fprintf(stderr, "%s ", iter->getName());
602        fprintf(stderr, "\n");
603        error = true;
604    }
605    else {
606        this->m_arch = arch;
607    }
608
609    // Check default LLVM generated targets
610    if (!strcasecmp(isa, "sse2") ||
611        !strcasecmp(isa, "sse2-i32x4")) {
612        this->m_isa = Target::SSE2;
613        this->m_nativeVectorWidth = 4;
614        this->m_nativeVectorAlignment = 16;
615        this->m_dataTypeWidth = 32;
616        this->m_vectorWidth = 4;
617        this->m_maskingIsFree = false;
618        this->m_maskBitCount = 32;
619        CPUfromISA = CPU_Core2;
620    }
621    else if (!strcasecmp(isa, "sse2-x2") ||
622             !strcasecmp(isa, "sse2-i32x8")) {
623        this->m_isa = Target::SSE2;
624        this->m_nativeVectorWidth = 4;
625        this->m_nativeVectorAlignment = 16;
626        this->m_dataTypeWidth = 32;
627        this->m_vectorWidth = 8;
628        this->m_maskingIsFree = false;
629        this->m_maskBitCount = 32;
630        CPUfromISA = CPU_Core2;
631    }
632    else if (!strcasecmp(isa, "sse4") ||
633             !strcasecmp(isa, "sse4-i32x4")) {
634        this->m_isa = Target::SSE4;
635        this->m_nativeVectorWidth = 4;
636        this->m_nativeVectorAlignment = 16;
637        this->m_dataTypeWidth = 32;
638        this->m_vectorWidth = 4;
639        this->m_maskingIsFree = false;
640        this->m_maskBitCount = 32;
641        CPUfromISA = CPU_Nehalem;
642    }
643    else if (!strcasecmp(isa, "sse4x2") ||
644             !strcasecmp(isa, "sse4-x2") ||
645             !strcasecmp(isa, "sse4-i32x8")) {
646        this->m_isa = Target::SSE4;
647        this->m_nativeVectorWidth = 4;
648        this->m_nativeVectorAlignment = 16;
649        this->m_dataTypeWidth = 32;
650        this->m_vectorWidth = 8;
651        this->m_maskingIsFree = false;
652        this->m_maskBitCount = 32;
653        CPUfromISA = CPU_Nehalem;
654    }
655    else if (!strcasecmp(isa, "sse4-i8x16")) {
656        this->m_isa = Target::SSE4;
657        this->m_nativeVectorWidth = 16;
658        this->m_nativeVectorAlignment = 16;
659        this->m_dataTypeWidth = 8;
660        this->m_vectorWidth = 16;
661        this->m_maskingIsFree = false;
662        this->m_maskBitCount = 8;
663        CPUfromISA = CPU_Nehalem;
664    }
665    else if (!strcasecmp(isa, "sse4-i16x8")) {
666        this->m_isa = Target::SSE4;
667        this->m_nativeVectorWidth = 8;
668        this->m_nativeVectorAlignment = 16;
669        this->m_dataTypeWidth = 16;
670        this->m_vectorWidth = 8;
671        this->m_maskingIsFree = false;
672        this->m_maskBitCount = 16;
673        CPUfromISA = CPU_Nehalem;
674    }
675    else if (!strcasecmp(isa, "generic-4") ||
676             !strcasecmp(isa, "generic-x4")) {
677        this->m_isa = Target::GENERIC;
678        this->m_nativeVectorWidth = 4;
679        this->m_nativeVectorAlignment = 16;
680        this->m_vectorWidth = 4;
681        this->m_maskingIsFree = true;
682        this->m_maskBitCount = 1;
683        this->m_hasHalf = true;
684        this->m_hasTranscendentals = true;
685        this->m_hasTrigonometry = true;
686        this->m_hasGather = this->m_hasScatter = true;
687        this->m_hasRsqrtd = this->m_hasRcpd = true;
688        CPUfromISA = CPU_Generic;
689    }
690    else if (!strcasecmp(isa, "generic-8") ||
691             !strcasecmp(isa, "generic-x8")) {
692        this->m_isa = Target::GENERIC;
693        this->m_nativeVectorWidth = 8;
694        this->m_nativeVectorAlignment = 32;
695        this->m_vectorWidth = 8;
696        this->m_maskingIsFree = true;
697        this->m_maskBitCount = 1;
698        this->m_hasHalf = true;
699        this->m_hasTranscendentals = true;
700        this->m_hasTrigonometry = true;
701        this->m_hasGather = this->m_hasScatter = true;
702        this->m_hasRsqrtd = this->m_hasRcpd = true;
703        CPUfromISA = CPU_Generic;
704    }
705    else if (!strcasecmp(isa, "generic-16") ||
706             !strcasecmp(isa, "generic-x16") ||
707             // We treat *-generic-16 as generic-16, but with special name mangling
708             strstr(isa, "-generic-16") || 
709             strstr(isa, "-generic-x16")) {
710        this->m_isa = Target::GENERIC;
711        if (strstr(isa, "-generic-16") ||
712            strstr(isa, "-generic-x16")) {
713            // It is used for appropriate name mangling and dispatch function during multitarget compilation
714            this->m_treatGenericAsSmth = isa;
715            // We need to create appropriate name for mangling.
716            // Remove "-x16" or "-16" and replace "-" with "_".
717            this->m_treatGenericAsSmth = this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
718            std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
719        }
720        this->m_nativeVectorWidth = 16;
721        this->m_nativeVectorAlignment = 64;
722        this->m_vectorWidth = 16;
723        this->m_maskingIsFree = true;
724        this->m_maskBitCount = 1;
725        this->m_hasHalf = true;
726        this->m_hasTranscendentals = true;
727        // It's set to false, because stdlib implementation of math functions
728        // is faster on MIC, than "native" implementation provided by the
729        // icc compiler.
730        this->m_hasTrigonometry = false;
731        this->m_hasGather = this->m_hasScatter = true;
732        this->m_hasRsqrtd = this->m_hasRcpd = true;
733        // It's set to true, because MIC has hardware vector prefetch instruction
734        this->m_hasVecPrefetch = true;
735        CPUfromISA = CPU_Generic;
736    }
737    else if (!strcasecmp(isa, "generic-32") ||
738             !strcasecmp(isa, "generic-x32")) {
739        this->m_isa = Target::GENERIC;
740        this->m_nativeVectorWidth = 32;
741        this->m_nativeVectorAlignment = 64;
742        this->m_vectorWidth = 32;
743        this->m_maskingIsFree = true;
744        this->m_maskBitCount = 1;
745        this->m_hasHalf = true;
746        this->m_hasTranscendentals = true;
747        this->m_hasTrigonometry = true;
748        this->m_hasGather = this->m_hasScatter = true;
749        this->m_hasRsqrtd = this->m_hasRcpd = true;
750        CPUfromISA = CPU_Generic;
751    }
752    else if (!strcasecmp(isa, "generic-64") ||
753             !strcasecmp(isa, "generic-x64")) {
754        this->m_isa = Target::GENERIC;
755        this->m_nativeVectorWidth = 64;
756        this->m_nativeVectorAlignment = 64;
757        this->m_vectorWidth = 64;
758        this->m_maskingIsFree = true;
759        this->m_maskBitCount = 1;
760        this->m_hasHalf = true;
761        this->m_hasTranscendentals = true;
762        this->m_hasTrigonometry = true;
763        this->m_hasGather = this->m_hasScatter = true;
764        this->m_hasRsqrtd = this->m_hasRcpd = true;
765        CPUfromISA = CPU_Generic;
766    }
767    else if (!strcasecmp(isa, "generic-1") ||
768             !strcasecmp(isa, "generic-x1")) {
769        this->m_isa = Target::GENERIC;
770        this->m_nativeVectorWidth = 1;
771        this->m_nativeVectorAlignment = 16;
772        this->m_vectorWidth = 1;
773        this->m_maskingIsFree = false;
774        this->m_maskBitCount = 32;
775        CPUfromISA = CPU_Generic;
776    }
777    else if (!strcasecmp(isa, "avx1-i32x4")) {
778        this->m_isa = Target::AVX;
779        this->m_nativeVectorWidth = 8;
780        this->m_nativeVectorAlignment = 32;
781        this->m_dataTypeWidth = 32;
782        this->m_vectorWidth = 4;
783        this->m_maskingIsFree = false;
784        this->m_maskBitCount = 32;
785        CPUfromISA = CPU_SandyBridge;
786    }
787    else if (!strcasecmp(isa, "avx") ||
788             !strcasecmp(isa, "avx1") ||
789             !strcasecmp(isa, "avx1-i32x8")) {
790        this->m_isa = Target::AVX;
791        this->m_nativeVectorWidth = 8;
792        this->m_nativeVectorAlignment = 32;
793        this->m_dataTypeWidth = 32;
794        this->m_vectorWidth = 8;
795        this->m_maskingIsFree = false;
796        this->m_maskBitCount = 32;
797        CPUfromISA = CPU_SandyBridge;
798    }
799    else if (!strcasecmp(isa, "avx-i64x4") ||
800             !strcasecmp(isa, "avx1-i64x4")) {
801        this->m_isa = Target::AVX;
802        this->m_nativeVectorWidth = 8;  /* native vector width in terms of floats */
803        this->m_nativeVectorAlignment = 32;
804        this->m_dataTypeWidth = 64;
805        this->m_vectorWidth = 4;
806        this->m_maskingIsFree = false;
807        this->m_maskBitCount = 64;
808        CPUfromISA = CPU_SandyBridge;
809    }
810    else if (!strcasecmp(isa, "avx-x2") ||
811             !strcasecmp(isa, "avx1-x2") ||
812             !strcasecmp(isa, "avx1-i32x16")) {
813        this->m_isa = Target::AVX;
814        this->m_nativeVectorWidth = 8;
815        this->m_nativeVectorAlignment = 32;
816        this->m_dataTypeWidth = 32;
817        this->m_vectorWidth = 16;
818        this->m_maskingIsFree = false;
819        this->m_maskBitCount = 32;
820        CPUfromISA = CPU_SandyBridge;
821    }
822    else if (!strcasecmp(isa, "avx1.1") ||
823             !strcasecmp(isa, "avx1.1-i32x8")) {
824        this->m_isa = Target::AVX11;
825        this->m_nativeVectorWidth = 8;
826        this->m_nativeVectorAlignment = 32;
827        this->m_dataTypeWidth = 32;
828        this->m_vectorWidth = 8;
829        this->m_maskingIsFree = false;
830        this->m_maskBitCount = 32;
831        this->m_hasHalf = true;
832        this->m_hasRand = true;
833        CPUfromISA = CPU_IvyBridge;
834    }
835    else if (!strcasecmp(isa, "avx1.1-x2") ||
836             !strcasecmp(isa, "avx1.1-i32x16")) {
837        this->m_isa = Target::AVX11;
838        this->m_nativeVectorWidth = 8;
839        this->m_nativeVectorAlignment = 32;
840        this->m_dataTypeWidth = 32;
841        this->m_vectorWidth = 16;
842        this->m_maskingIsFree = false;
843        this->m_maskBitCount = 32;
844        this->m_hasHalf = true;
845        this->m_hasRand = true;
846        CPUfromISA = CPU_IvyBridge;
847    }
848    else if (!strcasecmp(isa, "avx1.1-i64x4")) {
849        this->m_isa = Target::AVX11;
850        this->m_nativeVectorWidth = 8;  /* native vector width in terms of floats */
851        this->m_nativeVectorAlignment = 32;
852        this->m_dataTypeWidth = 64;
853        this->m_vectorWidth = 4;
854        this->m_maskingIsFree = false;
855        this->m_maskBitCount = 64;
856        this->m_hasHalf = true;
857        this->m_hasRand = true;
858        CPUfromISA = CPU_IvyBridge;
859    }
860    else if (!strcasecmp(isa, "avx2") ||
861             !strcasecmp(isa, "avx2-i32x8")) {
862        this->m_isa = Target::AVX2;
863        this->m_nativeVectorWidth = 8;
864        this->m_nativeVectorAlignment = 32;
865        this->m_dataTypeWidth = 32;
866        this->m_vectorWidth = 8;
867        this->m_maskingIsFree = false;
868        this->m_maskBitCount = 32;
869        this->m_hasHalf = true;
870        this->m_hasRand = true;
871        this->m_hasGather = true;
872        CPUfromISA = CPU_Haswell;
873    }
874    else if (!strcasecmp(isa, "avx2-x2") ||
875             !strcasecmp(isa, "avx2-i32x16")) {
876        this->m_isa = Target::AVX2;
877        this->m_nativeVectorWidth = 16;
878        this->m_nativeVectorAlignment = 32;
879        this->m_dataTypeWidth = 32;
880        this->m_vectorWidth = 16;
881        this->m_maskingIsFree = false;
882        this->m_maskBitCount = 32;
883        this->m_hasHalf = true;
884        this->m_hasRand = true;
885        this->m_hasGather = true;
886        CPUfromISA = CPU_Haswell;
887    }
888    else if (!strcasecmp(isa, "avx2-i64x4")) {
889        this->m_isa = Target::AVX2;
890        this->m_nativeVectorWidth = 8;  /* native vector width in terms of floats */
891        this->m_nativeVectorAlignment = 32;
892        this->m_dataTypeWidth = 64;
893        this->m_vectorWidth = 4;
894        this->m_maskingIsFree = false;
895        this->m_maskBitCount = 64;
896        this->m_hasHalf = true;
897        this->m_hasRand = true;
898        this->m_hasGather = true;
899        CPUfromISA = CPU_Haswell;
900    }
901#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
902    else if (!strcasecmp(isa, "avx512knl-i32x16")) {
903        this->m_isa = Target::KNL_AVX512;
904        this->m_nativeVectorWidth = 16;
905        this->m_nativeVectorAlignment = 64;
906        // ?? this->m_dataTypeWidth = 32;
907        this->m_vectorWidth = 16;
908        this->m_maskingIsFree = true;
909        this->m_maskBitCount = 1;
910        this->m_hasHalf = true;
911        this->m_hasRand = true;
912        this->m_hasGather = this->m_hasScatter = true;
913        this->m_hasTranscendentals = false;
914        // For MIC it is set to true due to performance reasons. The option should be tested.
915        this->m_hasTrigonometry = false;
916        this->m_hasRsqrtd = this->m_hasRcpd = false;
917        this->m_hasVecPrefetch = false;
918        CPUfromISA = CPU_KNL;
919    }
920#endif
921
922#ifdef ISPC_ARM_ENABLED
923    else if (!strcasecmp(isa, "neon-i8x16")) {
924        this->m_isa = Target::NEON8;
925        this->m_nativeVectorWidth = 16;
926        this->m_nativeVectorAlignment = 16;
927        this->m_dataTypeWidth = 8;
928        this->m_vectorWidth = 16;
929        this->m_attributes = "+neon,+fp16";
930        this->m_hasHalf = true; // ??
931        this->m_maskingIsFree = false;
932        this->m_maskBitCount = 8;
933    }
934    else if (!strcasecmp(isa, "neon-i16x8")) {
935        this->m_isa = Target::NEON16;
936        this->m_nativeVectorWidth = 8;
937        this->m_nativeVectorAlignment = 16;
938        this->m_dataTypeWidth = 16;
939        this->m_vectorWidth = 8;
940        this->m_attributes = "+neon,+fp16";
941        this->m_hasHalf = true; // ??
942        this->m_maskingIsFree = false;
943        this->m_maskBitCount = 16;
944    }
945    else if (!strcasecmp(isa, "neon") ||
946             !strcasecmp(isa, "neon-i32x4")) {
947        this->m_isa = Target::NEON32;
948        this->m_nativeVectorWidth = 4;
949        this->m_nativeVectorAlignment = 16;
950        this->m_dataTypeWidth = 32;
951        this->m_vectorWidth = 4;
952        this->m_attributes = "+neon,+fp16";
953        this->m_hasHalf = true; // ??
954        this->m_maskingIsFree = false;
955        this->m_maskBitCount = 32;
956    }
957#endif
958#ifdef ISPC_NVPTX_ENABLED
959    else if (!strcasecmp(isa, "nvptx")) {
960        this->m_isa = Target::NVPTX;
961        this->m_cpu = "sm_35";
962        this->m_nativeVectorWidth = 32;
963        this->m_nativeVectorAlignment = 32;
964        this->m_vectorWidth = 1;
965        this->m_hasHalf = true;
966        this->m_maskingIsFree = true;
967        this->m_maskBitCount = 1;
968        this->m_hasTranscendentals = true;
969        this->m_hasTrigonometry = true;
970        this->m_hasGather = this->m_hasScatter = false;
971        CPUfromISA = CPU_SM35;
972    }
973#endif /* ISPC_NVPTX_ENABLED */
974    else {
975        Error(SourcePos(), "Target \"%s\" is unknown.  Choices are: %s.",
976              isa, SupportedTargets());
977        error = true;
978    }
979
980#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
981    if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
982        CPUID = CPU_CortexA9;
983#endif
984
985    if (CPUID == CPU_None) {
986#ifndef ISPC_ARM_ENABLED
987        if (isa == nullptr) {
988#endif
989            std::string hostCPU = llvm::sys::getHostCPUName();
990            if (hostCPU.size() > 0)
991                cpu = strdup(hostCPU.c_str());
992            else {
993                Warning(SourcePos(), "Unable to determine host CPU!\n");
994                cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
995            }
996#ifndef ISPC_ARM_ENABLED
997        }
998        else {
999            cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
1000        }
1001#endif
1002    }
1003    else {
1004        if ((CPUfromISA != CPU_None) &&
1005            !a.BackwardCompatible(CPUID, CPUfromISA)) {
1006            Error(SourcePos(), "The requested CPU is incompatible"
1007                  " with the CPU %s needs: %s vs. %s!\n",
1008                  isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
1009            return;
1010        }
1011        cpu = a.GetDefaultNameFromType(CPUID).c_str();
1012    }
1013    this->m_cpu = cpu;
1014
1015    if (!error) {
1016        // Create TargetMachine
1017        std::string triple = GetTripleString();
1018
1019        llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
1020            llvm::Reloc::Default;
1021        std::string featuresString = m_attributes;
1022        llvm::TargetOptions options;
1023#ifdef ISPC_ARM_ENABLED
1024        if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
1025            m_isa == Target::NEON32)
1026            options.FloatABIType = llvm::FloatABI::Hard;
1027#endif
1028        if (g->opt.disableFMA == false)
1029            options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1030#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1031#ifdef ISPC_IS_WINDOWS
1032        if (strcmp("x86", arch) == 0) {
1033            // Workaround for issue #503 (LLVM issue 14646).
1034            // It's Win32 specific.
1035            options.NoFramePointerElim = true;
1036        }
1037#endif
1038#endif
1039        m_targetMachine =
1040            m_target->createTargetMachine(triple, m_cpu, featuresString, options,
1041                    relocModel);
1042        Assert(m_targetMachine != nullptr);
1043
1044#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1045        m_targetMachine->setAsmVerbosityDefault(true);
1046#else /* LLVM 3.7+ */
1047        m_targetMachine->Options.MCOptions.AsmVerbose = true;
1048#endif
1049        // Initialize TargetData/DataLayout in 3 steps.
1050        // 1. Get default data layout first
1051        std::string dl_string;
1052#if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1053        dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
1054#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1055        dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1056#else // LLVM 3.5- or LLVM 3.7
1057        dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
1058#endif
1059        // 2. Adjust for generic
1060        if (m_isa == Target::GENERIC) {
1061            // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1062            // that to the regular datalayout string for IA..
1063            // For generic-4 target we need to treat <4 x i1> as 128 bit value
1064            // in terms of required memory storage and alignment, as this is
1065            // translated to __m128 type.
1066            dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1067                "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1068                "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1069        }
1070#ifdef ISPC_NVPTX_ENABLED
1071        else if (m_isa == Target::NVPTX)
1072        {
1073          dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
1074        }
1075#endif
1076
1077        // 3. Finally set member data
1078        m_dataLayout = new llvm::DataLayout(dl_string);
1079
1080        // Set is32Bit
1081        // This indicates if we are compiling for 32 bit platform
1082        // and can assume 32 bit runtime.
1083        // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1084
1085        this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1086
1087#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1088        // This is LLVM 3.3+ feature.
1089        // Initialize target-specific "target-feature" attribute.
1090        if (!m_attributes.empty()) {
1091            llvm::AttrBuilder attrBuilder;
1092#ifdef ISPC_NVPTX_ENABLED
1093            if (m_isa != Target::NVPTX)
1094#endif
1095            attrBuilder.addAttribute("target-cpu", this->m_cpu);
1096            attrBuilder.addAttribute("target-features", this->m_attributes);
1097            this->m_tf_attributes = new llvm::AttributeSet(
1098                llvm::AttributeSet::get(
1099                    *g->ctx,
1100                    llvm::AttributeSet::FunctionIndex,
1101                    attrBuilder));
1102        }
1103#endif
1104
1105        Assert(this->m_vectorWidth <= ISPC_MAX_NVEC);
1106    }
1107
1108    m_valid = !error;
1109
1110    if (printTarget) {
1111        printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1112        printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1113        printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1114    }
1115
1116    return;
1117}
1118
1119
1120std::string
1121Target::SupportedCPUs() {
1122    AllCPUs a;
1123    return a.HumanReadableListOfNames();
1124}
1125
1126
1127const char *
1128Target::SupportedArchs() {
1129    return
1130#ifdef ISPC_ARM_ENABLED
1131        "arm, "
1132#endif
1133        "x86, x86-64";
1134}
1135
1136
1137const char *
1138Target::SupportedTargets() {
1139    return
1140        "host, sse2-i32x4, sse2-i32x8, "
1141        "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
1142        "avx1-i32x4, "
1143        "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
1144        "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4, "
1145        "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
1146#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1147        "avx512knl-i32x16, "
1148#endif
1149        "generic-x1, generic-x4, generic-x8, generic-x16, "
1150        "generic-x32, generic-x64, *-generic-x16, "
1151#ifdef ISPC_ARM_ENABLED
1152        ", neon-i8x16, neon-i16x8, neon-i32x4"
1153#endif
1154#ifdef ISPC_NVPTX_ENABLED
1155        ", nvptx"
1156#endif
1157;
1158
1159}
1160
1161
1162std::string
1163Target::GetTripleString() const {
1164    llvm::Triple triple;
1165#ifdef ISPC_ARM_ENABLED
1166    if (m_arch == "arm") {
1167        triple.setTriple("armv7-eabi");
1168    }
1169    else
1170#endif
1171    {
1172        // Start with the host triple as the default
1173        triple.setTriple(llvm::sys::getDefaultTargetTriple());
1174
1175        // And override the arch in the host triple based on what the user
1176        // specified.  Here we need to deal with the fact that LLVM uses one
1177        // naming convention for targets TargetRegistry, but wants some
1178        // slightly different ones for the triple.  TODO: is there a way to
1179        // have it do this remapping, which would presumably be a bit less
1180        // error prone?
1181        if (m_arch == "x86")
1182            triple.setArchName("i386");
1183        else if (m_arch == "x86-64")
1184            triple.setArchName("x86_64");
1185#ifdef ISPC_NVPTX_ENABLED
1186        else if (m_arch == "nvptx64")
1187          triple = llvm::Triple("nvptx64", "nvidia", "cuda");
1188#endif /* ISPC_NVPTX_ENABLED */
1189        else
1190            triple.setArchName(m_arch);
1191    }
1192    return triple.str();
1193}
1194
1195// This function returns string representation of ISA for the purpose of
1196// mangling. And may return any unique string, preferably short, like
1197// sse4, avx and etc.
1198const char *
1199Target::ISAToString(ISA isa) {
1200    switch (isa) {
1201#ifdef ISPC_ARM_ENABLED
1202    case Target::NEON8:
1203        return "neon-8";
1204    case Target::NEON16:
1205        return "neon-16";
1206    case Target::NEON32:
1207        return "neon-32";
1208#endif
1209    case Target::SSE2:
1210        return "sse2";
1211    case Target::SSE4:
1212        return "sse4";
1213    case Target::AVX:
1214        return "avx";
1215    case Target::AVX11:
1216        return "avx11";
1217    case Target::AVX2:
1218        return "avx2";
1219#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1220    case Target::KNL_AVX512:
1221        return "avx512knl-i32x16";
1222#endif
1223    case Target::SKX:
1224        return "skx";
1225    case Target::GENERIC:
1226        return "generic";
1227#ifdef ISPC_NVPTX_ENABLED
1228    case Target::NVPTX:
1229        return "nvptx";
1230#endif /* ISPC_NVPTX_ENABLED */
1231    default:
1232        FATAL("Unhandled target in ISAToString()");
1233    }
1234    return "";
1235}
1236
1237const char *
1238Target::GetISAString() const {
1239    return ISAToString(m_isa);
1240}
1241
1242
1243// This function returns string representation of default target corresponding
1244// to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX11 it's avx1.1-i32x8. This
1245// string may be used to initialize Target.
1246const char *
1247Target::ISAToTargetString(ISA isa) {
1248    switch (isa) {
1249#ifdef ISPC_ARM_ENABLED
1250    case Target::NEON8:
1251        return "neon-8";
1252    case Target::NEON16:
1253        return "neon-16";
1254    case Target::NEON32:
1255        return "neon-32";
1256#endif
1257    case Target::SSE2:
1258        return "sse2-i32x4";
1259    case Target::SSE4:
1260        return "sse4-i32x4";
1261    case Target::AVX:
1262        return "avx1-i32x8";
1263    case Target::AVX11:
1264        return "avx1.1-i32x8";
1265    case Target::AVX2:
1266        return "avx2-i32x8";
1267#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1268    case Target::KNL_AVX512:
1269        return "avx512knl-i32x16";
1270#endif
1271    case Target::SKX:
1272        return "avx2";
1273    case Target::GENERIC:
1274        return "generic-4";
1275#ifdef ISPC_NVPTX_ENABLED
1276    case Target::NVPTX:
1277        return "nvptx";
1278#endif /* ISPC_NVPTX_ENABLED */
1279    default:
1280        FATAL("Unhandled target in ISAToTargetString()");
1281    }
1282    return "";
1283}
1284
1285
1286const char *
1287Target::GetISATargetString() const {
1288    return ISAToString(m_isa);
1289}
1290
1291
1292static bool
1293lGenericTypeLayoutIndeterminate(llvm::Type *type) {
1294    if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() ||
1295        type->isIntegerTy() || type->isLabelTy() || type->isMetadataTy())
1296        return false;
1297
1298    if (type == LLVMTypes::BoolVectorType ||
1299        type == LLVMTypes::MaskType ||
1300        type == LLVMTypes::Int1VectorType)
1301        return true;
1302
1303    llvm::ArrayType *at =
1304        llvm::dyn_cast<llvm::ArrayType>(type);
1305    if (at != nullptr)
1306        return lGenericTypeLayoutIndeterminate(at->getElementType());
1307
1308    llvm::PointerType *pt =
1309        llvm::dyn_cast<llvm::PointerType>(type);
1310    if (pt != nullptr)
1311        return false;
1312
1313    llvm::StructType *st =
1314        llvm::dyn_cast<llvm::StructType>(type);
1315    if (st != nullptr) {
1316        for (int i = 0; i < (int)st->getNumElements(); ++i)
1317            if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1318                return true;
1319        return false;
1320    }
1321
1322    Assert(llvm::isa<llvm::VectorType>(type));
1323    return true;
1324}
1325
1326
1327llvm::Value *
1328Target::SizeOf(llvm::Type *type,
1329               llvm::BasicBlock *insertAtEnd) {
1330    if (m_isa == Target::GENERIC &&
1331        lGenericTypeLayoutIndeterminate(type)) {
1332        llvm::Value *index[1] = { LLVMInt32(1) };
1333        llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1334        llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1335        llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1336#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1337        llvm::Instruction *gep =
1338            llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
1339                                            insertAtEnd);
1340#else /* LLVM 3.7+ */
1341        llvm::Instruction *gep =
1342            llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1343                                            arrayRef, "sizeof_gep",
1344                                            insertAtEnd);
1345#endif
1346        if (m_is32Bit || g->opt.force32BitAddressing)
1347            return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1348                                          "sizeof_int", insertAtEnd);
1349        else
1350            return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1351                                          "sizeof_int", insertAtEnd);
1352    }
1353
1354    uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1355    if (m_is32Bit || g->opt.force32BitAddressing)
1356        return LLVMInt32((int32_t)byteSize);
1357    else
1358        return LLVMInt64(byteSize);
1359}
1360
1361
1362llvm::Value *
1363Target::StructOffset(llvm::Type *type, int element,
1364                     llvm::BasicBlock *insertAtEnd) {
1365    if (m_isa == Target::GENERIC &&
1366        lGenericTypeLayoutIndeterminate(type) == true) {
1367        llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
1368        llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1369        llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1370        llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1371#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1372        llvm::Instruction *gep =
1373            llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
1374                                            insertAtEnd);
1375#else /* LLVM 3.7+ */
1376        llvm::Instruction *gep =
1377            llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1378                                            arrayRef, "offset_gep",
1379                                            insertAtEnd);
1380#endif
1381        if (m_is32Bit || g->opt.force32BitAddressing)
1382            return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1383                                          "offset_int", insertAtEnd);
1384        else
1385            return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1386                                          "offset_int", insertAtEnd);
1387    }
1388
1389    llvm::StructType *structType =
1390        llvm::dyn_cast<llvm::StructType>(type);
1391    if (structType == nullptr || structType->isSized() == false) {
1392        Assert(m->errorCount > 0);
1393        return nullptr;
1394    }
1395
1396    const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1397    Assert(sl != nullptr);
1398
1399    uint64_t offset = sl->getElementOffset(element);
1400    if (m_is32Bit || g->opt.force32BitAddressing)
1401        return LLVMInt32((int32_t)offset);
1402    else
1403        return LLVMInt64(offset);
1404}
1405
1406void Target::markFuncWithTargetAttr(llvm::Function* func) {
1407#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1408    if (m_tf_attributes) {
1409        func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
1410    }
1411#endif
1412}
1413
1414
1415///////////////////////////////////////////////////////////////////////////
1416// Opt
1417
1418Opt::Opt() {
1419    level = 1;
1420    fastMath = false;
1421    fastMaskedVload = false;
1422    force32BitAddressing = true;
1423    unrollLoops = true;
1424    disableAsserts = false;
1425    disableFMA = false;
1426    forceAlignedMemory = false;
1427    disableMaskAllOnOptimizations = false;
1428    disableHandlePseudoMemoryOps = false;
1429    disableBlendedMaskedStores = false;
1430    disableCoherentControlFlow = false;
1431    disableUniformControlFlow = false;
1432    disableGatherScatterOptimizations = false;
1433    disableMaskedStoreToStore = false;
1434    disableGatherScatterFlattening = false;
1435    disableUniformMemoryOptimizations = false;
1436    disableCoalescing = false;
1437}
1438
1439///////////////////////////////////////////////////////////////////////////
1440// Globals
1441
1442Globals::Globals() {
1443    mathLib = Globals::Math_ISPC;
1444
1445    includeStdlib = true;
1446    runCPP = true;
1447    debugPrint = false;
1448    printTarget = false;
1449    debugIR = -1;
1450    disableWarnings = false;
1451    warningsAsErrors = false;
1452    quiet = false;
1453    forceColoredOutput = false;
1454    disableLineWrap = false;
1455    emitPerfWarnings = true;
1456    emitInstrumentation = false;
1457    generateDebuggingSymbols = false;
1458    enableFuzzTest = false;
1459    fuzzTestSeed = -1;
1460    mangleFunctionsWithTarget = false;
1461
1462    ctx = new llvm::LLVMContext;
1463
1464#ifdef ISPC_IS_WINDOWS
1465    _getcwd(currentDirectory, sizeof(currentDirectory));
1466#else
1467    if (getcwd(currentDirectory, sizeof(currentDirectory)) == nullptr)
1468        FATAL("Current directory path too long!");
1469#endif
1470    forceAlignment = -1;
1471    dllExport = false;
1472}
1473
1474///////////////////////////////////////////////////////////////////////////
1475// SourcePos
1476
1477SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1478    name = n;
1479    if (name == nullptr) {
1480        if (m != nullptr)
1481            name = m->module->getModuleIdentifier().c_str();
1482        else
1483            name = "(unknown)";
1484    }
1485    first_line = fl;
1486    first_column = fc;
1487    last_line = ll != 0 ? ll : fl;
1488    last_column = lc != 0 ? lc : fc;
1489}
1490
1491
1492#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1493llvm::DIFile
1494#else /* LLVM 3.7+ */
1495llvm::DIFile*
1496//llvm::MDFile*
1497#endif
1498SourcePos::GetDIFile() const {
1499    std::string directory, filename;
1500    GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1501#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1502    llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
1503    Assert(ret.Verify());
1504#else /* LLVM 3.7+ */
1505    llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1506    //llvm::MDFile *ret = m->diBuilder->createFile(filename, directory);
1507#endif
1508    return ret;
1509}
1510
1511
1512void
1513SourcePos::Print() const {
1514    printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column,
1515           last_line, last_column);
1516}
1517
1518
1519bool
1520SourcePos::operator==(const SourcePos &p2) const {
1521    return (!strcmp(name, p2.name) &&
1522            first_line == p2.first_line &&
1523            first_column == p2.first_column &&
1524            last_line == p2.last_line &&
1525            last_column == p2.last_column);
1526}
1527
1528
1529SourcePos
1530Union(const SourcePos &p1, const SourcePos &p2) {
1531    if (strcmp(p1.name, p2.name) != 0)
1532        return p1;
1533
1534    SourcePos ret;
1535    ret.name = p1.name;
1536    ret.first_line = std::min(p1.first_line, p2.first_line);
1537    ret.first_column = std::min(p1.first_column, p2.first_column);
1538    ret.last_line = std::max(p1.last_line, p2.last_line);
1539    ret.last_column = std::max(p1.last_column, p2.last_column);
1540    return ret;
1541}
1542#endif
Note: See TracBrowser for help on using the repository browser.