Changeset 3920 for trunk


Ignore:
Timestamp:
Jul 21, 2014, 7:00:26 PM (3 years ago)
Author:
linmengl
Message:

s2p_bytepack pure ir library works for sse2 now.

Location:
trunk/lib_ir
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib_ir/s2p.h

    r3919 r3920  
    33
    44#include "idisa.hpp"
    5 // use s2p(s0, s1, ..., s7, *p0, *p1, ..., *p7)
    6 // choose from s2p_bytepack and s2p_ideal
    7 #define s2p s2p_bytepack
    85
    96extern "C" {
    10   void s2p_ideal(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
    11 
    12   void s2p_bytepack(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
     7  //void s2p_ideal(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
    138
    149  BitBlock packh_16(BitBlock a, BitBlock b);
     
    2419  BitBlock srli_16(BitBlock a, BitBlock shift_mask);
    2520  BitBlock slli_16(BitBlock a, BitBlock shift_mask);
     21
     22  BitBlock s2p_step_ir(BitBlock s0, BitBlock s1, BitBlock hi_mask, BitBlock shift_mask, BitBlock *p0, BitBlock *p1);
     23
     24  BitBlock const16_1();
     25  BitBlock const16_2();
     26  BitBlock const16_4();
     27
     28  BitBlock himask_2();
     29  BitBlock himask_4();
     30  BitBlock himask_8();
    2631}
    2732
     33#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
     34  do {\
     35        BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3;\
     36        BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3;\
     37        BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1;\
     38        BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1;\
     39        s2p_step_ir(s0,s1,himask_2(),const16_1(),&bit00224466_0,&bit11335577_0);\
     40        s2p_step_ir(s2,s3,himask_2(),const16_1(),&bit00224466_1,&bit11335577_1);\
     41        s2p_step_ir(s4,s5,himask_2(),const16_1(),&bit00224466_2,&bit11335577_2);\
     42        s2p_step_ir(s6,s7,himask_2(),const16_1(),&bit00224466_3,&bit11335577_3);\
     43        s2p_step_ir(bit00224466_0,bit00224466_1,himask_4(),const16_2(),&bit00004444_0,&bit22226666_0);\
     44        s2p_step_ir(bit00224466_2,bit00224466_3,himask_4(),const16_2(),&bit00004444_1,&bit22226666_1);\
     45        s2p_step_ir(bit11335577_0,bit11335577_1,himask_4(),const16_2(),&bit11115555_0,&bit33337777_0);\
     46        s2p_step_ir(bit11335577_2,bit11335577_3,himask_4(),const16_2(),&bit11115555_1,&bit33337777_1);\
     47        s2p_step_ir(bit00004444_0,bit00004444_1,himask_8(),const16_4(),&p0,&p4);\
     48        s2p_step_ir(bit11115555_0,bit11115555_1,himask_8(),const16_4(),&p1,&p5);\
     49        s2p_step_ir(bit22226666_0,bit22226666_1,himask_8(),const16_4(),&p2,&p6);\
     50        s2p_step_ir(bit33337777_0,bit33337777_1,himask_8(),const16_4(),&p3,&p7);\
     51  } while(0)
     52
    2853#endif
  • trunk/lib_ir/s2p.ll

    r3919 r3920  
    5959}
    6060
    61 define void @s2p_step(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %hi_mask, <8 x i16> %shift_mask, <4 x i32>* %p0, <4 x i32>* %p1) alwaysinline {
     61define void @s2p_step_ir(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %hi_mask, <8 x i16> %shift_mask, <4 x i32>* %p0, <4 x i32>* %p1) alwaysinline {
    6262entry:
    6363  %t0 = call <4 x i32> @packh_16(<4 x i32> %s0, <4 x i32> %s1)
     
    7575}
    7676
     77define <8 x i16> @const16_1() alwaysinline {
     78entry:
     79  ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
     80}
     81
     82define <8 x i16> @const16_2() alwaysinline {
     83entry:
     84  ret <8 x i16> <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     85}
     86
     87define <8 x i16> @const16_4() alwaysinline {
     88entry:
     89  ret <8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
     90}
     91
     92define <4 x i32> @himask_2() alwaysinline {
     93entry:
     94  ret <4 x i32> <i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766>
     95}
     96
     97define <4 x i32> @himask_4() alwaysinline {
     98entry:
     99  ret <4 x i32> <i32 -858993460, i32 -858993460, i32 -858993460, i32 -858993460>
     100}
     101
     102define <4 x i32> @himask_8() alwaysinline {
     103entry:
     104  ret <4 x i32> <i32 -252645136, i32 -252645136, i32 -252645136, i32 -252645136>
     105}
     106
     107
     108;TODO: all the packh/l below need to swap aa and bb, because of the endings.
    77109;define <4 x i32> @packh_8(<4 x i32> %a, <4 x i32> %b) alwaysinline {
    78110;entry:
  • trunk/lib_ir/test_pack.cpp

    r3919 r3920  
    11#include "utility.h"
    22#include "s2p.h"
     3#include "s2p.hpp"
    34#include <iostream>
    45#include <cstdlib>
     
    8283  }
    8384
     85  //Test s2p_step
     86  SIMD_type hi_mask = simd<2>::himask(), p0, p1, q0, q1;
     87  a = mvmd<32>::fill((int)rand() % 10000);
     88  b = mvmd<32>::fill((int)rand() % 10000);
     89  c = mvmd<16>::fill(2);
     90  s2p_step(a, b, hi_mask, 2, p0, p1);
     91  s2p_step_ir(a, b, hi_mask, c, &q0, &q1);
     92  if (Store2String(p0,1) != Store2String(q0,1) ||
     93      Store2String(p1,1) != Store2String(q1,1)) {
     94    cout << "s2p_step failed." << endl;
     95    return 1;
     96  }
     97  a = mvmd<32>::fill((int)rand() % 10000);
     98  b = mvmd<32>::fill((int)rand() % 10000);
     99  c = mvmd<16>::fill(4);
     100  s2p_step(a, b, hi_mask, 4, p0, p1);
     101  s2p_step_ir(a, b, hi_mask, c, &q0, &q1);
     102  if (Store2String(p0,1) != Store2String(q0,1) ||
     103      Store2String(p1,1) != Store2String(q1,1)) {
     104    cout << "s2p_step failed." << endl;
     105    return 1;
     106  }
    84107
    85108  cout << "Test passed." << endl;
  • trunk/lib_ir/test_s2p.cpp

    r3909 r3920  
    1919  for (int i = 0; i < 8; ++i) s[i] = mvmd<32>::fill(x);
    2020
    21   s2p_ideal(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7],
    22       &p[0], &p[1], &p[2], &p[3], &p[4], &p[5], &p[6], &p[7]);
     21  s2p_bytepack(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7],
     22      p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
    2323
    2424  for (int i = 0; i < 8; ++i)
Note: See TracChangeset for help on using the changeset viewer.