Changes between Version 1 and Version 2 of I2Result


Ignore:
Timestamp:
Apr 2, 2014, 2:41:21 AM (3 years ago)
Author:
cameron
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • I2Result

    v1 v2  
    11= I2Result Demo =
     2
     3Here is a small program {{{pr.cpp}}}.
     4
     5{{{
     6int main(int argc, char * argv[]) {
     7  // Initialize a temporary bitblock value.
     8  BitBlock volatile temp1 = simd<8>::constant<0x33>();
     9  // print it out.
     10  print_register<BitBlock>("temp1", temp1);
     11  // And another.
     12  BitBlock volatile temp2 = simd<16>::constant<0x3344>();
     13  print_register<BitBlock>("temp2", temp2);
     14  //
     15  BitBlock rslt = simd<8>::eq(temp1, temp2);
     16  print_register("simd<8>::eq(temp1, temp2)", rslt);
     17  uint32_t msk = hsimd<64>::signmask(rslt);
     18  printf("  hsimd<64>::signmask(rslt) = %u\n", msk);
     19
     20  // Done.   Exit normally.
     21  return(0);
     22}
     23}}}
     24
     25We can compile to LLVM IR.
     26
     27{{{
     28clang++ -msse2  -O3  -o pr.ll pr.cpp -I../lib/ -S -emit-llvm
     29}}}
     30
     31We get the following main.
     32
     33{{{
     34define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
     35  %temp1 = alloca <2 x i64>, align 16
     36  %temp2 = alloca <2 x i64>, align 16
     37  store volatile <2 x i64> <i64 3689348814741910323, i64 3689348814741910323>, <2 x i64>* %temp1, align 16
     38  %1 = load volatile <2 x i64>* %temp1, align 16
     39  tail call void @_Z14print_registerIDv2_xEvPKcT_(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), <2 x i64> %1)
     40  store volatile <2 x i64> <i64 3694133962361549636, i64 3694133962361549636>, <2 x i64>* %temp2, align 16
     41  %2 = load volatile <2 x i64>* %temp2, align 16
     42  tail call void @_Z14print_registerIDv2_xEvPKcT_(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0), <2 x i64> %2)
     43  %3 = load volatile <2 x i64>* %temp1, align 16
     44  %4 = load volatile <2 x i64>* %temp2, align 16
     45  %5 = bitcast <2 x i64> %3 to <16 x i8>
     46  %6 = bitcast <2 x i64> %4 to <16 x i8>
     47  %7 = icmp eq <16 x i8> %5, %6
     48  %8 = sext <16 x i1> %7 to <16 x i8>
     49  %9 = bitcast <16 x i8> %8 to <2 x i64>
     50  tail call void @_Z14print_registerIDv2_xEvPKcT_(i8* getelementptr inbounds ([26 x i8]* @.str2, i64 0, i64 0), <2 x i64> %9)
     51  %10 = bitcast <16 x i8> %8 to <2 x double>
     52  %11 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %10) #3
     53  %12 = and i32 %11, 255
     54  %13 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([34 x i8]* @.str3, i64 0, i64 0), i32 %12)
     55  ret i32 0
     56}
     57}}}
     58
     59We can replace the {{{llvm.x86.sse2.movmsk.pd}}} with our own
     60implementation {{{signmaskd}}}.   Call the result program {{{pr2.ll}}}
     61
     62{{{
     63define i32 @signmaskd(<2 x double> %a) alwaysinline #5
     64{
     65        %bits = bitcast <2 x double> %a to <2 x i64>
     66        %b = icmp slt <2 x i64> %bits, zeroinitializer
     67        %c = bitcast <2 x i1> %b to i2
     68        %result = zext i2 %c to i32
     69        ret i32 %result
     70}
     71}}}
     72
     73But these programs produce different results.   Why?
     74{{{
     75$ ./pr
     76                                   temp1 = 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33
     77                                   temp2 = 33 44 33 44 33 44 33 44 33 44 33 44 33 44 33 44
     78               simd<8>::eq(temp1, temp2) = FF 00 FF 00 FF 00 FF 00 FF 00 FF 00 FF 00 FF 00
     79  hsimd<64>::signmask(rslt) = 3
     80$ ./pr2
     81                                   temp1 = 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33
     82                                   temp2 = 33 44 33 44 33 44 33 44 33 44 33 44 33 44 33 44
     83               simd<8>::eq(temp1, temp2) = FF 00 FF 00 FF 00 FF 00 FF 00 FF 00 FF 00 FF 00
     84  hsimd<64>::signmask(rslt) = 1
     85}}}
     86}}}