Changeset 3586 for trunk


Ignore:
Timestamp:
Dec 17, 2013, 5:07:38 PM (5 years ago)
Author:
linmengl
Message:

compile llvm into native code, banned some invalid combinition of fw and ir

Location:
trunk/libgen
Files:
1 added
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/libgen/Library_Generator/LLVMInstructions.py

    r3584 r3586  
    3535                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    3636                "return_type":"SIMD_type",
    37                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     37                "fws":[[8, 16, 32, 64, 128]],
    3838        },
    3939        "simd_sub":\
     
    4242                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    4343                "return_type":"SIMD_type",
    44                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     44                "fws":[[8, 16, 32, 64, 128]],
    4545        },
    46         "simd_umult":\
    47         {
    48                 "signature":["t llvm_mul_$fw$(t arg1, t arg2)"],
    49                 "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    50                 "return_type":"SIMD_type",
    51                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
    52         },     
    5346        "simd_mult":\
    5447        {
     
    5649                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    5750                "return_type":"SIMD_type",
    58                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     51                "fws":[[8, 16, 32, 64]],
    5952        },
    6053        "simd_eq":\
     
    6356                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    6457                "return_type":"SIMD_type",
    65                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     58                "fws":[[8, 16, 32, 64, 128]],
    6659        },
    6760        "simd_gt":\
     
    7063                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    7164                "return_type":"SIMD_type",
    72                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     65                "fws":[[8, 16, 32, 64, 128]],
    7366        },
    7467        "simd_ugt":\
     
    7770                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    7871                "return_type":"SIMD_type",
    79                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     72                "fws":[[8, 16, 32, 64, 128]],
    8073        },
    8174        "simd_lt":\
     
    8477                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    8578                "return_type":"SIMD_type",
    86                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     79                "fws":[[8, 16, 32, 64, 128]],
    8780        },
    8881        "simd_ult":\
     
    9184                "args_type":{"arg1":"SIMD_type", "arg2":"SIMD_type"},
    9285                "return_type":"SIMD_type",
    93                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     86                "fws":[[8, 16, 32, 64, 128]],
    9487        },     
    9588        "simd_vsrl":\
     
    9891                "args_type":{"arg1":"SIMD_type", "shift_mask":"__m128i"},
    9992                "return_type":"SIMD_type",
    100                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     93                "fws":[[8, 16, 32]],
    10194        },     
    10295        "simd_vsra":\
     
    10598                "args_type":{"arg1":"SIMD_type", "shift_mask":"__m128i"},
    10699                "return_type":"SIMD_type",
    107                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     100                "fws":[[8, 16, 32, 64]],
    108101        },
    109102        "simd_vsll":\
     
    112105                "args_type":{"arg1":"SIMD_type", "shift_mask":"__m128i"},
    113106                "return_type":"SIMD_type",
    114                 "fws":[[2, 4, 8, 16, 32, 64, 128]],
     107                "fws":[[8, 16, 32]],
    115108        },     
    116109        "bitblock_load_aligned":\
  • trunk/libgen/Library_Tester/CalculatingModules/simd_vsra.py

    r3584 r3586  
    88        (arg1, count) = (data[0], data[1])
    99        (i, sz, ans) = (0, len(arg1), "")
    10         count = int(count[-32:], 2)
    11         ans = simd_srai.GetResult(fw, count, [arg1])
     10       
     11        while i<sz:
     12                sh = int(count[i:i+fw], 2) if fw<=32 else int(count[i+fw-32:i+fw], 2)
     13                sh &= (fw - 1)
     14                ans += simd_srai.GetResult(fw, sh, [arg1[i:i+fw]])
     15                i += fw
     16       
    1217        return ans
  • trunk/libgen/Library_Tester/LibraryTester.py

    r3584 r3586  
    5252                return "g++ -mavx2 -o"
    5353        elif arch in configure.LLVM_SERIES:
    54                 return "./llvm_build.sh "
     54                return "make llvm_test # "
    5555
    5656def GetRunCommand(arch):
    57         if arch in configure.LLVM_SERIES:
    58                 return "lli optimized.bc"
    59         else:
    60                 return "./" + arch + "_test"
     57        return "./" + arch + "_test"
    6158
    6259def ReadContentAsOneLine(fileName):
  • trunk/libgen/Makefile

    r3584 r3586  
    195195        rm -f $(TESTER_DIR)/SS*_test
    196196        rm -f $(TESTER_DIR)/AVX*_test
     197        rm -f $(TESTER_DIR)/LLVM*_test
    197198        rm -f $(TESTER_DIR)/idisa*.*
    198199        rm -f $(TESTER_DIR)/CalculatingModules/*.pyc
     
    201202        rm -f $(TESTER_DIR)/output_temp/*
    202203        rm -f $(CONFIGURE_DIR)/*.pyc
     204        make -C Library_Tester clean
     205        make -C llvm_header clean
  • trunk/libgen/llvm_header/Makefile

    r3584 r3586  
    2222
    2323clean:
    24         rm all.* optimized.* playground.ll playground *.bc *.pyc tester.ll tester
     24        rm -f all.* optimized.* playground.ll playground
     25        rm -f *.bc *.pyc tester.* tester optimized
  • trunk/libgen/llvm_header/config.py

    r3584 r3586  
    9292impl_template['store_unaligned'] = '''\
    9393define void @llvm_store_unaligned(<{n} x i64> %a, <{n} x i64>* %addr) alwaysinline {{
     94entry:
    9495  store <{n} x i64> %a, <{n} x i64>* %addr, align 1
    9596  ret void
     
    103104impl_template['store_aligned'] = '''\
    104105define void @llvm_store_aligned(<{n} x i64> %a, <{n} x i64>* %addr) alwaysinline {{
     106entry:
    105107  store <{n} x i64> %a, <{n} x i64>* %addr, align 16
    106108  ret void
     
    108110'''
    109111
    110 fw_set = [2**i for i in range(1, 8)] # 2^1 ~ 2^7
     112fw_set = [2 ** i for i in range(3, 8)] # 2^1 ~ 2^7
    111113
    112114if register_bits > 128:
     
    115117c_type_fw = {'short': 16, 'int': 32, 'long long': 64}
    116118
    117 vertical_ir_set = ['add', 'sub', 'mul', 'and', 'or', 'xor',
    118                    'icmp eq', 'icmp sgt', 'icmp ugt', 'icmp slt', 'icmp ult',
    119                    'shl', 'lshr', 'ashr']
     119vertical_ir_set = ['add', 'sub', 'mul', 'and', 'or', 'xor', 'icmp eq',
     120                   'icmp sgt', 'icmp ugt', 'icmp slt', 'icmp ult', 'shl',
     121                   'lshr', 'ashr']
     122
     123banned_vertical_fw_ir_pairs = [(128, 'mul'), (64, 'shl'), (128, 'shl'),
     124                               (64, 'lshr'), (128, 'lshr'), (128, 'ashr')]
    120125
    121126minimal_test_cpp = '''\
     
    134139
    135140# Utility functions
     141
     142
    136143def get_llvm_func(fw, ir_func):
    137144    return "llvm_{ir_func}_{fw}".format(
     
    145152
    146153def get_vertical_decl(fw, ir_func):
    147     return decl_template['vertical'].format(llvm_func=get_llvm_func(fw, ir_func))
     154    return decl_template['vertical'].format(
     155        llvm_func=get_llvm_func(fw, ir_func))
    148156
    149157
     
    161169        llvm_func=get_llvm_func(fw, ir_func),
    162170        vec_type=get_vec_type(fw), impl=impl)
    163 
  • trunk/libgen/llvm_header/header.h

    r3584 r3586  
    77
    88extern "C" {
    9 SIMD_type llvm_add_2(SIMD_type a, SIMD_type b);
    10 SIMD_type llvm_add_4(SIMD_type a, SIMD_type b);
    119SIMD_type llvm_add_8(SIMD_type a, SIMD_type b);
    1210SIMD_type llvm_add_16(SIMD_type a, SIMD_type b);
     
    1412SIMD_type llvm_add_64(SIMD_type a, SIMD_type b);
    1513SIMD_type llvm_add_128(SIMD_type a, SIMD_type b);
    16 SIMD_type llvm_sub_2(SIMD_type a, SIMD_type b);
    17 SIMD_type llvm_sub_4(SIMD_type a, SIMD_type b);
    1814SIMD_type llvm_sub_8(SIMD_type a, SIMD_type b);
    1915SIMD_type llvm_sub_16(SIMD_type a, SIMD_type b);
     
    2117SIMD_type llvm_sub_64(SIMD_type a, SIMD_type b);
    2218SIMD_type llvm_sub_128(SIMD_type a, SIMD_type b);
    23 SIMD_type llvm_mul_2(SIMD_type a, SIMD_type b);
    24 SIMD_type llvm_mul_4(SIMD_type a, SIMD_type b);
    2519SIMD_type llvm_mul_8(SIMD_type a, SIMD_type b);
    2620SIMD_type llvm_mul_16(SIMD_type a, SIMD_type b);
    2721SIMD_type llvm_mul_32(SIMD_type a, SIMD_type b);
    2822SIMD_type llvm_mul_64(SIMD_type a, SIMD_type b);
    29 SIMD_type llvm_mul_128(SIMD_type a, SIMD_type b);
    30 SIMD_type llvm_and_2(SIMD_type a, SIMD_type b);
    31 SIMD_type llvm_and_4(SIMD_type a, SIMD_type b);
    3223SIMD_type llvm_and_8(SIMD_type a, SIMD_type b);
    3324SIMD_type llvm_and_16(SIMD_type a, SIMD_type b);
     
    3526SIMD_type llvm_and_64(SIMD_type a, SIMD_type b);
    3627SIMD_type llvm_and_128(SIMD_type a, SIMD_type b);
    37 SIMD_type llvm_or_2(SIMD_type a, SIMD_type b);
    38 SIMD_type llvm_or_4(SIMD_type a, SIMD_type b);
    3928SIMD_type llvm_or_8(SIMD_type a, SIMD_type b);
    4029SIMD_type llvm_or_16(SIMD_type a, SIMD_type b);
     
    4231SIMD_type llvm_or_64(SIMD_type a, SIMD_type b);
    4332SIMD_type llvm_or_128(SIMD_type a, SIMD_type b);
    44 SIMD_type llvm_xor_2(SIMD_type a, SIMD_type b);
    45 SIMD_type llvm_xor_4(SIMD_type a, SIMD_type b);
    4633SIMD_type llvm_xor_8(SIMD_type a, SIMD_type b);
    4734SIMD_type llvm_xor_16(SIMD_type a, SIMD_type b);
     
    4936SIMD_type llvm_xor_64(SIMD_type a, SIMD_type b);
    5037SIMD_type llvm_xor_128(SIMD_type a, SIMD_type b);
    51 SIMD_type llvm_icmp_eq_2(SIMD_type a, SIMD_type b);
    52 SIMD_type llvm_icmp_eq_4(SIMD_type a, SIMD_type b);
    5338SIMD_type llvm_icmp_eq_8(SIMD_type a, SIMD_type b);
    5439SIMD_type llvm_icmp_eq_16(SIMD_type a, SIMD_type b);
     
    5641SIMD_type llvm_icmp_eq_64(SIMD_type a, SIMD_type b);
    5742SIMD_type llvm_icmp_eq_128(SIMD_type a, SIMD_type b);
    58 SIMD_type llvm_icmp_sgt_2(SIMD_type a, SIMD_type b);
    59 SIMD_type llvm_icmp_sgt_4(SIMD_type a, SIMD_type b);
    6043SIMD_type llvm_icmp_sgt_8(SIMD_type a, SIMD_type b);
    6144SIMD_type llvm_icmp_sgt_16(SIMD_type a, SIMD_type b);
     
    6346SIMD_type llvm_icmp_sgt_64(SIMD_type a, SIMD_type b);
    6447SIMD_type llvm_icmp_sgt_128(SIMD_type a, SIMD_type b);
    65 SIMD_type llvm_icmp_ugt_2(SIMD_type a, SIMD_type b);
    66 SIMD_type llvm_icmp_ugt_4(SIMD_type a, SIMD_type b);
    6748SIMD_type llvm_icmp_ugt_8(SIMD_type a, SIMD_type b);
    6849SIMD_type llvm_icmp_ugt_16(SIMD_type a, SIMD_type b);
     
    7051SIMD_type llvm_icmp_ugt_64(SIMD_type a, SIMD_type b);
    7152SIMD_type llvm_icmp_ugt_128(SIMD_type a, SIMD_type b);
    72 SIMD_type llvm_icmp_slt_2(SIMD_type a, SIMD_type b);
    73 SIMD_type llvm_icmp_slt_4(SIMD_type a, SIMD_type b);
    7453SIMD_type llvm_icmp_slt_8(SIMD_type a, SIMD_type b);
    7554SIMD_type llvm_icmp_slt_16(SIMD_type a, SIMD_type b);
     
    7756SIMD_type llvm_icmp_slt_64(SIMD_type a, SIMD_type b);
    7857SIMD_type llvm_icmp_slt_128(SIMD_type a, SIMD_type b);
    79 SIMD_type llvm_icmp_ult_2(SIMD_type a, SIMD_type b);
    80 SIMD_type llvm_icmp_ult_4(SIMD_type a, SIMD_type b);
    8158SIMD_type llvm_icmp_ult_8(SIMD_type a, SIMD_type b);
    8259SIMD_type llvm_icmp_ult_16(SIMD_type a, SIMD_type b);
     
    8461SIMD_type llvm_icmp_ult_64(SIMD_type a, SIMD_type b);
    8562SIMD_type llvm_icmp_ult_128(SIMD_type a, SIMD_type b);
    86 SIMD_type llvm_shl_2(SIMD_type a, SIMD_type b);
    87 SIMD_type llvm_shl_4(SIMD_type a, SIMD_type b);
    8863SIMD_type llvm_shl_8(SIMD_type a, SIMD_type b);
    8964SIMD_type llvm_shl_16(SIMD_type a, SIMD_type b);
    9065SIMD_type llvm_shl_32(SIMD_type a, SIMD_type b);
    91 SIMD_type llvm_shl_64(SIMD_type a, SIMD_type b);
    92 SIMD_type llvm_shl_128(SIMD_type a, SIMD_type b);
    93 SIMD_type llvm_lshr_2(SIMD_type a, SIMD_type b);
    94 SIMD_type llvm_lshr_4(SIMD_type a, SIMD_type b);
    9566SIMD_type llvm_lshr_8(SIMD_type a, SIMD_type b);
    9667SIMD_type llvm_lshr_16(SIMD_type a, SIMD_type b);
    9768SIMD_type llvm_lshr_32(SIMD_type a, SIMD_type b);
    98 SIMD_type llvm_lshr_64(SIMD_type a, SIMD_type b);
    99 SIMD_type llvm_lshr_128(SIMD_type a, SIMD_type b);
    100 SIMD_type llvm_ashr_2(SIMD_type a, SIMD_type b);
    101 SIMD_type llvm_ashr_4(SIMD_type a, SIMD_type b);
    10269SIMD_type llvm_ashr_8(SIMD_type a, SIMD_type b);
    10370SIMD_type llvm_ashr_16(SIMD_type a, SIMD_type b);
    10471SIMD_type llvm_ashr_32(SIMD_type a, SIMD_type b);
    10572SIMD_type llvm_ashr_64(SIMD_type a, SIMD_type b);
    106 SIMD_type llvm_ashr_128(SIMD_type a, SIMD_type b);
    10773int llvm_extractelement_32(SIMD_type a, int idx);
    10874short llvm_extractelement_16(SIMD_type a, int idx);
     
    11581void llvm_store_aligned(SIMD_type a, SIMD_type *addr);
    11682void llvm_store_unaligned(SIMD_type a, SIMD_type *addr);
     83
     84SIMD_type llvm_constant_32(int val);
    11785}
    11886#endif //LLVM_HEADER_H
  • trunk/libgen/llvm_header/header.ll

    r3584 r3586  
    1 define <64 x i2> @llvm_add_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    2 entry:
    3     %res = add <64 x i2> %a, %b
    4     ret <64 x i2> %res
    5 }
    6 define <32 x i4> @llvm_add_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    7 entry:
    8     %res = add <32 x i4> %a, %b
    9     ret <32 x i4> %res
    10 }
    111define <16 x i8> @llvm_add_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    122entry:
     
    3424    ret <1 x i128> %res
    3525}
    36 define <64 x i2> @llvm_sub_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    37 entry:
    38     %res = sub <64 x i2> %a, %b
    39     ret <64 x i2> %res
    40 }
    41 define <32 x i4> @llvm_sub_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    42 entry:
    43     %res = sub <32 x i4> %a, %b
    44     ret <32 x i4> %res
    45 }
    4626define <16 x i8> @llvm_sub_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    4727entry:
     
    6949    ret <1 x i128> %res
    7050}
    71 define <64 x i2> @llvm_mul_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    72 entry:
    73     %res = mul <64 x i2> %a, %b
    74     ret <64 x i2> %res
    75 }
    76 define <32 x i4> @llvm_mul_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    77 entry:
    78     %res = mul <32 x i4> %a, %b
    79     ret <32 x i4> %res
    80 }
    8151define <16 x i8> @llvm_mul_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    8252entry:
     
    9969    ret <2 x i64> %res
    10070}
    101 define <1 x i128> @llvm_mul_128(<1 x i128> %a, <1 x i128> %b) alwaysinline {
    102 entry:
    103     %res = mul <1 x i128> %a, %b
    104     ret <1 x i128> %res
    105 }
    106 define <64 x i2> @llvm_and_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    107 entry:
    108     %res = and <64 x i2> %a, %b
    109     ret <64 x i2> %res
    110 }
    111 define <32 x i4> @llvm_and_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    112 entry:
    113     %res = and <32 x i4> %a, %b
    114     ret <32 x i4> %res
    115 }
    11671define <16 x i8> @llvm_and_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    11772entry:
     
    13994    ret <1 x i128> %res
    14095}
    141 define <64 x i2> @llvm_or_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    142 entry:
    143     %res = or <64 x i2> %a, %b
    144     ret <64 x i2> %res
    145 }
    146 define <32 x i4> @llvm_or_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    147 entry:
    148     %res = or <32 x i4> %a, %b
    149     ret <32 x i4> %res
    150 }
    15196define <16 x i8> @llvm_or_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    15297entry:
     
    174119    ret <1 x i128> %res
    175120}
    176 define <64 x i2> @llvm_xor_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    177 entry:
    178     %res = xor <64 x i2> %a, %b
    179     ret <64 x i2> %res
    180 }
    181 define <32 x i4> @llvm_xor_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    182 entry:
    183     %res = xor <32 x i4> %a, %b
    184     ret <32 x i4> %res
    185 }
    186121define <16 x i8> @llvm_xor_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    187122entry:
     
    208143    %res = xor <1 x i128> %a, %b
    209144    ret <1 x i128> %res
    210 }
    211 define <64 x i2> @llvm_icmp_eq_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    212 entry:
    213     %cmp_res = icmp eq <64 x i2> %a, %b
    214         %res = sext <64 x i1> %cmp_res to <64 x i2>
    215     ret <64 x i2> %res
    216 }
    217 define <32 x i4> @llvm_icmp_eq_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    218 entry:
    219     %cmp_res = icmp eq <32 x i4> %a, %b
    220         %res = sext <32 x i1> %cmp_res to <32 x i4>
    221     ret <32 x i4> %res
    222145}
    223146define <16 x i8> @llvm_icmp_eq_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
     
    251174    ret <1 x i128> %res
    252175}
    253 define <64 x i2> @llvm_icmp_sgt_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    254 entry:
    255     %cmp_res = icmp sgt <64 x i2> %a, %b
    256         %res = sext <64 x i1> %cmp_res to <64 x i2>
    257     ret <64 x i2> %res
    258 }
    259 define <32 x i4> @llvm_icmp_sgt_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    260 entry:
    261     %cmp_res = icmp sgt <32 x i4> %a, %b
    262         %res = sext <32 x i1> %cmp_res to <32 x i4>
    263     ret <32 x i4> %res
    264 }
    265176define <16 x i8> @llvm_icmp_sgt_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    266177entry:
     
    293204    ret <1 x i128> %res
    294205}
    295 define <64 x i2> @llvm_icmp_ugt_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    296 entry:
    297     %cmp_res = icmp ugt <64 x i2> %a, %b
    298         %res = sext <64 x i1> %cmp_res to <64 x i2>
    299     ret <64 x i2> %res
    300 }
    301 define <32 x i4> @llvm_icmp_ugt_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    302 entry:
    303     %cmp_res = icmp ugt <32 x i4> %a, %b
    304         %res = sext <32 x i1> %cmp_res to <32 x i4>
    305     ret <32 x i4> %res
    306 }
    307206define <16 x i8> @llvm_icmp_ugt_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    308207entry:
     
    335234    ret <1 x i128> %res
    336235}
    337 define <64 x i2> @llvm_icmp_slt_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    338 entry:
    339     %cmp_res = icmp slt <64 x i2> %a, %b
    340         %res = sext <64 x i1> %cmp_res to <64 x i2>
    341     ret <64 x i2> %res
    342 }
    343 define <32 x i4> @llvm_icmp_slt_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    344 entry:
    345     %cmp_res = icmp slt <32 x i4> %a, %b
    346         %res = sext <32 x i1> %cmp_res to <32 x i4>
    347     ret <32 x i4> %res
    348 }
    349236define <16 x i8> @llvm_icmp_slt_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    350237entry:
     
    377264    ret <1 x i128> %res
    378265}
    379 define <64 x i2> @llvm_icmp_ult_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    380 entry:
    381     %cmp_res = icmp ult <64 x i2> %a, %b
    382         %res = sext <64 x i1> %cmp_res to <64 x i2>
    383     ret <64 x i2> %res
    384 }
    385 define <32 x i4> @llvm_icmp_ult_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    386 entry:
    387     %cmp_res = icmp ult <32 x i4> %a, %b
    388         %res = sext <32 x i1> %cmp_res to <32 x i4>
    389     ret <32 x i4> %res
    390 }
    391266define <16 x i8> @llvm_icmp_ult_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    392267entry:
     
    419294    ret <1 x i128> %res
    420295}
    421 define <64 x i2> @llvm_shl_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    422 entry:
    423     %res = shl <64 x i2> %a, %b
    424     ret <64 x i2> %res
    425 }
    426 define <32 x i4> @llvm_shl_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    427 entry:
    428     %res = shl <32 x i4> %a, %b
    429     ret <32 x i4> %res
    430 }
    431296define <16 x i8> @llvm_shl_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    432297entry:
     
    444309    ret <4 x i32> %res
    445310}
    446 define <2 x i64> @llvm_shl_64(<2 x i64> %a, <2 x i64> %b) alwaysinline {
    447 entry:
    448     %res = shl <2 x i64> %a, %b
    449     ret <2 x i64> %res
    450 }
    451 define <1 x i128> @llvm_shl_128(<1 x i128> %a, <1 x i128> %b) alwaysinline {
    452 entry:
    453     %res = shl <1 x i128> %a, %b
    454     ret <1 x i128> %res
    455 }
    456 define <64 x i2> @llvm_lshr_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    457 entry:
    458     %res = lshr <64 x i2> %a, %b
    459     ret <64 x i2> %res
    460 }
    461 define <32 x i4> @llvm_lshr_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    462 entry:
    463     %res = lshr <32 x i4> %a, %b
    464     ret <32 x i4> %res
    465 }
    466311define <16 x i8> @llvm_lshr_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    467312entry:
     
    479324    ret <4 x i32> %res
    480325}
    481 define <2 x i64> @llvm_lshr_64(<2 x i64> %a, <2 x i64> %b) alwaysinline {
    482 entry:
    483     %res = lshr <2 x i64> %a, %b
    484     ret <2 x i64> %res
    485 }
    486 define <1 x i128> @llvm_lshr_128(<1 x i128> %a, <1 x i128> %b) alwaysinline {
    487 entry:
    488     %res = lshr <1 x i128> %a, %b
    489     ret <1 x i128> %res
    490 }
    491 define <64 x i2> @llvm_ashr_2(<64 x i2> %a, <64 x i2> %b) alwaysinline {
    492 entry:
    493     %res = ashr <64 x i2> %a, %b
    494     ret <64 x i2> %res
    495 }
    496 define <32 x i4> @llvm_ashr_4(<32 x i4> %a, <32 x i4> %b) alwaysinline {
    497 entry:
    498     %res = ashr <32 x i4> %a, %b
    499     ret <32 x i4> %res
    500 }
    501326define <16 x i8> @llvm_ashr_8(<16 x i8> %a, <16 x i8> %b) alwaysinline {
    502327entry:
     
    518343    %res = ashr <2 x i64> %a, %b
    519344    ret <2 x i64> %res
    520 }
    521 define <1 x i128> @llvm_ashr_128(<1 x i128> %a, <1 x i128> %b) alwaysinline {
    522 entry:
    523     %res = ashr <1 x i128> %a, %b
    524     ret <1 x i128> %res
    525345}
    526346define i32 @llvm_extractelement_32(<4 x i32> %a, i32 %idx) alwaysinline {
     
    565385}
    566386define void @llvm_store_aligned(<2 x i64> %a, <2 x i64>* %addr) alwaysinline {
     387entry:
    567388  store <2 x i64> %a, <2 x i64>* %addr, align 16
    568389  ret void
    569390}
    570391define void @llvm_store_unaligned(<2 x i64> %a, <2 x i64>* %addr) alwaysinline {
     392entry:
    571393  store <2 x i64> %a, <2 x i64>* %addr, align 1
    572394  ret void
    573395}
     396
     397define <4 x i32> @llvm_constant_32(i32 %val) alwaysinline {
     398entry:
     399  %0 = insertelement <4 x i32> undef, i32 %val, i32 0
     400  %1 = insertelement <4 x i32> %0, i32 %val, i32 1
     401  %2 = insertelement <4 x i32> %1, i32 %val, i32 2
     402  %3 = insertelement <4 x i32> %2, i32 %val, i32 3
     403  ret <4 x i32> %3
     404}
  • trunk/libgen/llvm_header/header_gen.py

    r3584 r3586  
    44
    55def append_vertical(doth, dotll, fw, ir_func):
     6    if (fw, ir_func) in config.banned_vertical_fw_ir_pairs:
     7        return
     8
    69    # Append doth(.h) declare
    710    doth.write(config.get_vertical_decl(fw, ir_func))
     
    1316    for c_type in config.c_type_fw:
    1417        fw = config.c_type_fw[c_type]
     18       
    1519        doth.write(config.decl_template[ir_func].format(
    1620            c_type=c_type, fw=fw))
    17 
    1821        dotll.write(config.impl_template[ir_func].format(
    1922            fw=fw, n=config.register_bits / fw))
  • trunk/libgen/llvm_header/op_tester.py

    r3584 r3586  
    3535for ir_func in config.vertical_ir_set:
    3636        for fw in config.fw_set:
    37                 generate_teseter_cpp(fw, ir_func)                               
     37                if (fw, ir_func) in config.banned_vertical_fw_ir_pairs:
     38                        continue
    3839
    39                 if os.system("make with_ir_header") != 0:
     40                generate_teseter_cpp(fw, ir_func)
     41
     42                if os.system("make with_ir_header >/dev/null") != 0:
    4043                        claim_make_fail(fw, ir_func)
    4144                        continue
    4245
    43                 if os.system("lli optimized.bc") != 0:
     46                if os.system("lli optimized.bc >/dev/null 2>>lli.log") != 0:
    4447                        claim_lli_fail(fw, ir_func)                     
    4548
     
    5053print failed
    5154
     55
    5256# [(2, 'add'), (4, 'add'), (2, 'sub'), (4, 'sub'), (2, 'mul'), (4, 'mul'),
    53 #  (2, 'and'), (4, 'and'), (2, 'or'), (4, 'or'), (2, 'xor'), (4, 'xor'),
    54 #  (2, 'icmp eq'), (4, 'icmp eq'), (2, 'icmp sgt'), (4, 'icmp sgt'),
    55 #  (2, 'icmp ugt'), (4, 'icmp ugt'), (2, 'icmp slt'), (4, 'icmp slt'),
    56 #  (2, 'icmp ult'), (4, 'icmp ult'), (2, 'shl'), (4, 'shl'), (128, 'shl'),
    57 #  (2, 'lshr'), (4, 'lshr'), (128, 'lshr'), (2, 'ashr'), (4, 'ashr'), (128, 'ashr')]
     57#  (128, 'mul'), (2, 'and'), (4, 'and'), (2, 'or'), (4, 'or'), (2, 'xor'),
     58#  (4, 'xor'), (2, 'icmp eq'), (4, 'icmp eq'), (2, 'icmp sgt'), (4, 'icmp sgt'),
     59#  (2, 'icmp ugt'), (4, 'icmp ugt'), (2, 'icmp slt'), (4, 'icmp slt'), (2, 'icmp ult'),
     60#  (4, 'icmp ult'), (2, 'shl'), (4, 'shl'), (64, 'shl'), (128, 'shl'), (2, 'lshr'),
     61#  (4, 'lshr'), (64, 'lshr'), (128, 'lshr'), (2, 'ashr'), (4, 'ashr'), (128, 'ashr')]
  • trunk/libgen/llvm_header/playground.cpp

    r3584 r3586  
    44int main()
    55{       
    6         SIMD_type a, b, c;     
    7         a = mvmd<32>::fill4(rand(), rand(), rand(), rand());
     6        // SIMD_type a, b, c;   
     7        SIMD_type c;
     8        // a = mvmd<32>::fill4(rand(), rand(), rand(), rand());
    89        // a = simd<32>::constant<2>();
    9         b = simd<32>::constant<2>();
     10        // b = simd<32>::constant<2>();
    1011       
    11         cout << "Playground running..." << endl;
     12        // cout << "Playground running..." << endl;
    1213       
    13         c = llvm_add_8(a, b);
     14        // c = llvm_constant_32(2);
     15        c = simd<32>::constant<2>();
     16        // cout << Store2String(b, 1) << endl;
    1417        cout << Store2String(c, 1) << endl;     
    1518
Note: See TracChangeset for help on using the changeset viewer.