Changeset 436


Ignore:
Timestamp:
Jul 4, 2010, 12:13:46 PM (9 years ago)
Author:
cameron
Message:

Updates for revised compiler.

Location:
proto/parabix2/compiled
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/compiled/sse_simd.h

    r424 r436  
    1919#endif
    2020#include <limits.h>
    21 
    2221#ifndef LONG_BIT
    23 #if ULONG_MAX == 0xFFFFFFFF
    24 #define LONG_BIT 32
     22#define LONG_BIT (8*__WORDSIZE)
    2523#endif
    26 #if ULONG_MAX == 0xFFFFFFFFFFFFFFFF
    27 #define LONG_BIT 64
    28 #endif
    29 #endif
    30 
    3124#include <emmintrin.h>
    3225#ifdef USE_LDDQU
     
    3629
    3730
    38 
    39 #ifdef SSSE3
    40 #include <tmmintrin.h>
    41 
    42 #define simd_permute(bytepack, indexes) _mm_shuffle_epi8(bytepack, indexes)
    43 #endif
     31#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
     32  __asm__  ("sahf\n\t" \
     33        "adc %[e1], %[z1]\n\t" \
     34        "adc %[e2], %[z2]\n\t" \
     35        "lahf\n\t" \
     36     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
     37         : "[z1]" (x1), "[z2]" (x2), \
     38           [e1] "r" (y1), [e2] "r" (y2), \
     39           "[carryflag]" (carry) \
     40         : "cc")
     41
     42#define adc128(first, second, carry, sum) \
     43do\
     44{\
     45  union {__m128i bitblock;\
     46         uint64_t int64[2];} rslt;\
     47\
     48  union {__m128i bitblock;\
     49         uint64_t int64[2];} x;\
     50\
     51  union {__m128i bitblock;\
     52         uint64_t int64[2];} y;\
     53\
     54  x.bitblock = first;\
     55  y.bitblock = second;\
     56\
     57  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
     58  sum = rslt.bitblock;\
     59}while(0)
     60
     61
     62
     63#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
     64  __asm__  ("sahf\n\t" \
     65        "sbb %[e1], %[z1]\n\t" \
     66        "sbb %[e2], %[z2]\n\t" \
     67        "lahf\n\t" \
     68     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
     69         : "[z1]" (x1), "[z2]" (x2), \
     70           [e1] "r" (y1), [e2] "r" (y2), \
     71           "[carryflag]" (carry) \
     72         : "cc")
     73
     74#define sbb128(first, second, carry, sum) \
     75do\
     76{ union {__m128i bitblock;\
     77         uint64_t int64[2];} rslt;\
     78\
     79  union {__m128i bitblock;\
     80         uint64_t int64[2];} x;\
     81\
     82  union {__m128i bitblock;\
     83         uint64_t int64[2];} y;\
     84\
     85  x.bitblock = first;\
     86  y.bitblock = second;\
     87\
     88  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
     89                   rslt.int64[0], rslt.int64[1], carry);\
     90  sum = rslt.bitblock;\
     91}while(0)
     92
     93
     94
     95#define adc128_simd(x, y, carry,  sum) \
     96do{ \
     97  SIMD_type gen = simd_and(x, y); \
     98  SIMD_type prop = simd_or(x, y); \
     99  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
     100  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
     101  sum = simd_add_64(c1, partial); \
     102  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
     103} while(0)
     104
     105
     106#define sbb128_simd(x, y, borrow, difference) \
     107do {\
     108  SIMD_type gen = simd_andc(y, x); \
     109  SIMD_type prop = simd_not(simd_xor(x, y)); \
     110  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
     111  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
     112  difference = simd_sub_64(partial, b1); \
     113  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
     114}while(0)
     115
     116
     117#define advance_with_carry(cursor, carry, rslt)\
     118do{\
     119  SIMD_type shift_out = simd_srli_64(cursor, 63);\
     120  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
     121  carry = sisd_srli(shift_out, 64);\
     122  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
     123}while(0)
     124
     125
     126
    44127
    45128/*------------------------------------------------------------*/
     
    57140
    58141/*  Specific constants. */
    59 #define simd_himask_2 _mm_set1_epi32(0xAAAAAAAA)
    60 #define simd_himask_4 _mm_set1_epi32(0xCCCCCCCC)
    61 #define simd_himask_8 _mm_set1_epi32(0xF0F0F0F0)
     142#define simd_himask_2 _mm_set1_epi8(0xAA)
     143#define simd_himask_4 _mm_set1_epi8(0xCC)
     144#define simd_himask_8 _mm_set1_epi8(0xF0)
    62145/* Little-endian */
    63 #define simd_himask_16 _mm_set1_epi32(0xFF00FF00)
     146#define simd_himask_16 _mm_set1_epi16(0xFF00)
    64147#define simd_himask_32 _mm_set1_epi32(0xFFFF0000)
    65148#define simd_himask_64 _mm_set_epi32(-1,0,-1,0)
     
    88171#define simd_sll_64(r, shft_reg) _mm_sll_epi64(r, shft_reg)
    89172#define simd_srl_64(r, shft_reg) _mm_srl_epi64(r, shft_reg)
    90 #define simd_packus_16(a, b) _mm_packus_epi16(b, a)
    91173#define simd_pack_16(a, b) \
    92174  _mm_packus_epi16(simd_andc(b, simd_himask_16), simd_andc(a, simd_himask_16))
     
    148230#define simd_const_8(n) _mm_set1_epi8(n)
    149231#define simd_const_4(n) _mm_set1_epi8((n)<<4|(n))
    150 #define simd_const_2(n) simd_const_4((n)<<2|n)
     232#define simd_const_2(n) simd_const_4(n<<2|n)
    151233#define simd_const_1(n) \
    152234  (n==0 ? simd_const_8(0): simd_const_8(-1))
     
    183265         simd_and(simd_slli_32(r,sh),simd_const_4((15<<sh)&15))
    184266#define simd_slli_8(r, sh)\
    185          (sh == 1 ? simd_add_8(r,r):\
    186           simd_and(simd_slli_32(r,sh),simd_const_8((255<<sh) &255)))
     267         simd_and(simd_slli_32(r,sh),simd_const_8((255<<sh) &255))
    187268
    188269
     
    236317}
    237318
    238 /*  Packed test operation of SSE4. */
    239 static inline int sisd_ptest(SIMD_type v, SIMD_type mask) {
    240   return !simd_all_true_8(simd_eq_8(simd_and(v, mask), simd_const_8(0)));
    241 }
    242 
    243319
    244320
     
    667743
    668744
    669 void print_bit_block(char * var_name, SIMD_type v) {
     745void print_bit_block(const char * var_name, SIMD_type v) {
    670746  union {SIMD_type vec; unsigned char elems[8];} x;
    671747  x.vec = v;
     
    675751  for (i = 0; i < sizeof(SIMD_type); i++) {
    676752    c = x.elems[i];
    677      printf("%02X ", c); 
     753     printf("%02X ", c);
    678754  }
    679755  printf("\n");
     
    717793#endif
    718794#ifndef _MSC_VER
    719 #if LONG_BIT < 64
     795#if (LONG_BIT < 64)
    720796  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
    721797  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
     
    725801}
    726802
    727 #ifdef ADC_128_VIA_GEN_REG
    728 // 128-bit add with carry
    729 // (rslt, carryout) = x + y + carryin
    730 // where x = (x2, x1), y = (y2, y1), rslt = (rslt2, rslt1)
    731 #define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
    732   __asm__  ("sahf\n\t" \
    733             "adc %[e1], %[z1]\n\t" \
    734             "adc %[e2], %[z2]\n\t" \
    735             "lahf\n\t" \
    736          : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carry] "=a" (carry) \
    737          : "[z1]" (x1), "[z2]" (x2), \
    738            [e1] "r" (y1), [e2] "r" (y2), \
    739            "[carry]" (carry) \
    740          : "cc")
    741 
    742 static inline SIMD_type adc128(SIMD_type first, SIMD_type second, int &carry)
    743 {
    744   union {__m128i bitblock;
    745          uint64_t int64[2];} rslt;
    746 
    747   union {__m128i bitblock;
    748          uint64_t int64[2];} x;
    749 
    750   union {__m128i bitblock;
    751          uint64_t int64[2];} y;
    752 
    753   x.bitblock = first;
    754   y.bitblock = second;
    755 
    756   double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1],
    757                    rslt.int64[0], rslt.int64[1], carry);
    758 
    759   return rslt.bitblock;
    760 }
    761 #endif
    762 
    763 #ifndef ADC_128_VIA_GEN_REG
    764 static inline void adc128(SIMD_type x, SIMD_type y, SIMD_type &carry, SIMD_type &sum)
    765 {
    766   /* Carries are always generated if both high bits are 1. */
    767   SIMD_type gen = simd_and(x, y);
    768   /* Carries may propagate if either high bit is 1. */
    769   SIMD_type prop = simd_or(x, y);
    770   /* Partial add without carry for high 64. */
    771   SIMD_type partial = simd_add_64(simd_add_64(x, y), carry);
    772   /* Carry for high 64 */
    773   SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
    774   /* Final sum */
    775   sum = simd_add_64(c1, partial);
    776   /* carry out */
    777   carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127);
    778 }
    779 #endif
    780 
    781 static inline SIMD_type advance_with_carry(SIMD_type cursor, SIMD_type &carry)
    782 {
    783   SIMD_type shift_out = simd_srli_64(cursor, 63);
    784   SIMD_type low_bits = simd_mergel_64(shift_out, carry);
    785   carry = sisd_srli(shift_out, 64);
    786   return simd_or(simd_add_64(cursor, cursor), low_bits);
    787 }
    788 
    789 
    790 
    791 
    792 #endif
    793 
     803
     804
     805#endif
     806
  • proto/parabix2/compiled/template.c

    r425 r436  
    178178  }
    179179 
     180  @stream_stmts
     181
    180182  while(chars_read>0){
    181183
     
    212214        array_bit__7_ &= EOF_mask;
    213215
    214         @stmts
     216        @block_stmts
    215217
    216218        if (bitblock_has_bit(error_mask)) {
     
    238240        array_bit__0_,array_bit__1_,array_bit__2_,array_bit__3_,array_bit__4_,array_bit__5_,array_bit__6_,array_bit__7_);
    239241
    240         @stmts
     242        @block_stmts
    241243       
    242244        if (bitblock_has_bit(error_mask)) {
Note: See TracChangeset for help on using the changeset viewer.