Changeset 928 for trunk/lib


Ignore:
Timestamp:
Mar 10, 2011, 6:28:29 PM (8 years ago)
Author:
cameron
Message:

Update SIMD_CARRY_Q with new functions; inline

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/carryQ.h

    r925 r928  
    160160SIMD_type carryQ_co_mask = sisd_slli(carryQ_ci_mask, 127);
    161161
    162 #define carryQ_adc128_ci_co(x, y, carryQ,  sum) \
    163 do{ \
    164   SIMD_type gen = simd_and(x, y); \
    165   SIMD_type prop = simd_or(x, y); \
    166   SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask)); \
    167   carryQ = simd_srli_64(carryQ, 1); \
    168   SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
    169   sum = simd_add_64(c1, partial); \
    170   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); \
    171 } while(0)
    172 
    173 #define carryQ_adc128_co(x, y, carryQ,  sum) \
    174 do{ \
    175   SIMD_type gen = simd_and(x, y); \
    176   SIMD_type prop = simd_or(x, y); \
    177   SIMD_type partial = simd_add_64(x, y); \
    178   carryQ = simd_srli_64(carryQ, 1); \
    179   SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
    180   sum = simd_add_64(c1, partial); \
    181   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); \
    182 } while(0)
    183 
    184 #define carryQ_sbb128_ci_co(x, y, carryQ, difference) \
    185 do {\
    186   SIMD_type gen = simd_andc(y, x); \
    187   SIMD_type prop = simd_not(simd_xor(x, y)); \
    188   SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask)); \
    189   carryQ = simd_srli_64(carryQ, 1); \
    190   SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
    191   difference = simd_sub_64(partial, b1); \
    192   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, difference)), carryQ_co_mask)); \
    193 }while(0)
    194 
    195 #define carryQ_sbb128_co(x, y, carryQ, difference) \
    196 do {\
    197   SIMD_type gen = simd_andc(y, x); \
    198   SIMD_type prop = simd_not(simd_xor(x, y)); \
    199   SIMD_type partial = simd_sub_64(x, y); \
    200   carryQ = simd_srli_64(carryQ, 1); \
    201   SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
    202   difference = simd_sub_64(partial, b1); \
    203   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, difference)), carryQ_co_mask)); \
    204 }while(0)
    205 
    206 #define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\
    207 do {\
    208   SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);\
    209   SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);\
    210   carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); \
    211   SIMD_type shift_out = simd_srli_64(cursor, 63);\
    212   SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);\
    213   rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
    214 } while(0)
    215 
    216 #define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\
    217 do {\
    218   SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);\
    219   carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); \
    220   SIMD_type shift_out = simd_srli_64(cursor, 63);\
    221   SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0));\
    222   rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
    223 } while(0)
    224 
    225 
    226162static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    227163
     
    264200static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    265201
    266 
    267 
    268 
    269 
    270 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) {
    271  BitBlock rslt;
    272  carryQ_advance_with_carry_ci_co(strm, cq, rslt);
    273  return rslt;
    274 }
    275 
    276 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, const int carryno) {
    277  BitBlock rslt;
    278  carryQ_advance_with_carry_co(strm, cq, rslt);
    279  return rslt;
    280 }
    281 
    282 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) {
    283  BitBlock sum;
    284  carryQ_adc128_ci_co(strm1, strm2, cq, sum);
    285  return sum;
    286 }
    287 
    288 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) {
    289  BitBlock sum;
    290  carryQ_adc128_co(strm1, strm2, cq, sum);
    291  return sum;
    292 }
    293 
    294 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) {
    295  BitBlock diff;
    296  carryQ_sbb128_ci_co(strm1, strm2, cq, diff);
    297  return diff;
    298 }
    299 
    300 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) {
    301  BitBlock diff;
    302  carryQ_sbb128_co(strm1, strm2, cq, diff);
    303  return diff;
     202static inline BitBlock BitBlock_advance_ci_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) {
     203  SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);
     204  SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);
     205  carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out);
     206  SIMD_type shift_out = simd_srli_64(cursor, 63);
     207  SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);
     208  return simd_or(simd_add_64(cursor, cursor), low_bits);
     209}
     210
     211static inline BitBlock BitBlock_advance_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) {
     212  SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);
     213  carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out);
     214  SIMD_type shift_out = simd_srli_64(cursor, 63);
     215  SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0));
     216  return simd_or(simd_add_64(cursor, cursor), low_bits);
     217}
     218
     219static inline BitBlock BitBlock_advance_ci(BitBlock cursor, CarryQtype & carryQ, const int carryno) {
     220  SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);
     221  carryQ = simd_srli_64(carryQ, 1);
     222  SIMD_type shift_out = simd_srli_64(cursor, 63);
     223  SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);
     224  return simd_or(simd_add_64(cursor, cursor), low_bits);
     225}
     226
     227static inline BitBlock BitBlock_advance(BitBlock cursor) {
     228  return sisd_srli(cursor, 1);
     229}
     230
     231
     232static inline BitBlock BitBlock_add_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
     233  BitBlock sum;
     234  SIMD_type gen = simd_and(x, y);
     235  SIMD_type prop = simd_or(x, y);
     236  SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask));
     237  carryQ = simd_srli_64(carryQ, 1);
     238  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
     239  sum = simd_add_64(c1, partial);
     240  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask));
     241  return sum;
     242}
     243
     244static inline BitBlock BitBlock_add_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
     245  BitBlock sum;
     246  SIMD_type gen = simd_and(x, y);
     247  SIMD_type prop = simd_or(x, y);
     248  SIMD_type partial = simd_add_64(x, y);
     249  carryQ = simd_srli_64(carryQ, 1);
     250  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
     251  sum = simd_add_64(c1, partial);
     252  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask));
     253  return sum;
     254}
     255
     256static inline BitBlock BitBlock_add_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
     257  BitBlock sum;
     258  SIMD_type gen = simd_and(x, y);
     259  SIMD_type prop = simd_or(x, y);
     260  SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask));
     261  carryQ = simd_srli_64(carryQ, 1);
     262  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
     263  sum = simd_add_64(c1, partial);
     264  return sum;
     265}
     266
     267static inline BitBlock BitBlock_add(BitBlock x, BitBlock y) {
     268  return simd_add_128(x, y);
     269}
     270
     271static inline BitBlock BitBlock_sub_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
     272  BitBlock diff;
     273  SIMD_type gen = simd_andc(y, x);
     274  SIMD_type prop = simd_not(simd_xor(x, y));
     275  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask));
     276  carryQ = simd_srli_64(carryQ, 1);
     277  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
     278  diff = simd_sub_64(partial, b1);
     279  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask));
     280  return diff;
     281}
     282
     283static inline BitBlock BitBlock_sub_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
     284  BitBlock diff;
     285  SIMD_type gen = simd_andc(y, x);
     286  SIMD_type prop = simd_not(simd_xor(x, y));
     287  SIMD_type partial = simd_sub_64(x, y);
     288  carryQ = simd_srli_64(carryQ, 1);
     289  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
     290  diff = simd_sub_64(partial, b1);
     291  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask));
     292  return diff;
     293}
     294
     295static inline BitBlock BitBlock_sub_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
     296  BitBlock diff;
     297  SIMD_type gen = simd_andc(y, x);
     298  SIMD_type prop = simd_not(simd_xor(x, y));
     299  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask));
     300  carryQ = simd_srli_64(carryQ, 1);
     301  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
     302  diff = simd_sub_64(partial, b1);
     303  return diff;
     304}
     305
     306static inline BitBlock BitBlock_sub(BitBlock x, BitBlock y) {
     307  BitBlock diff;
     308  SIMD_type gen = simd_andc(y, x);
     309  SIMD_type prop = simd_not(simd_xor(x, y));
     310  SIMD_type partial = simd_sub_64(x, y);
     311  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
     312  diff = simd_sub_64(partial, b1);
     313  return diff;
    304314}
    305315
    306316static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    307  BitBlock markers1;
    308  carryQ_adc128_ci_co(markers0, charclass, cq, markers1);
    309  return simd_andc(markers1, charclass);
     317 return simd_andc(BitBlock_add_ci_co(markers0, charclass, cq, carryno), charclass);
    310318}
    311319
    312320static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    313  BitBlock markers1;
    314  carryQ_adc128_co(markers0, charclass, cq, markers1);
    315  return simd_andc(markers1, charclass);
     321 return simd_andc(BitBlock_add_co(markers0, charclass, cq, carryno), charclass);
     322}
     323
     324static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
     325 return simd_andc(BitBlock_add_ci(markers0, charclass, cq, carryno), charclass);
     326}
     327
     328static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) {
     329 return simd_andc(BitBlock_add(markers0, charclass), charclass);
    316330}
    317331
     
    539553
    540554
    541 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) {
    542         BitBlock rslt;
    543         carryQ_advance_with_carry_ci_co(strm, cq, rslt);
    544         return rslt;
    545 }
    546 
    547 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, const int carryno) {
    548         BitBlock rslt;
    549         carryQ_advance_with_carry_co(strm, cq, rslt);
    550         return rslt;
    551 }
     555static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & carryQ, const int carryno) {
     556  BitBlock_int64 x, z;
     557  x.bitblock = strm;
     558  __asm__  __volatile__ ("add %[cq], %[cq]\n\t"
     559        "adc %[z1], %[z1]\n\t"
     560        "adc %[z2], %[z2]\n\t"
     561        "adc $0, %[cq]\n\t"
     562         : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)
     563         : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
     564           "[cq]" (carryQ)
     565         : "cc");
     566  return z.bitblock;
     567}
     568
     569static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & carryQ, const int carryno) {
     570  BitBlock_int64 x, z;
     571  x.bitblock = strm;
     572  __asm__  __volatile__ ("add %[cq], %[cq]\n\t"
     573        "add %[z1], %[z1]\n\t"
     574        "adc %[z2], %[z2]\n\t"
     575        "adc $0, %[cq]\n\t"
     576         : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)
     577         : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
     578           "[cq]" (carryQ)
     579         : "cc");
     580  return z.bitblock;
     581}
     582
     583static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & carryQ, const int carryno) {
     584  BitBlock_int64 x, z;
     585  x.bitblock = strm;
     586  __asm__  __volatile__ ("add %[cq], %[cq]\n\t"
     587        "adc %[z1], %[z1]\n\t"
     588        "add %[z2], %[z2]\n\t"
     589         : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)
     590         : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
     591           "[cq]" (carryQ)
     592         : "cc");
     593  return z.bitblock;
     594}
     595
     596static inline BitBlock BitBlock_advance(BitBlock strm) {
     597  BitBlock_int64 x, z;
     598  x.bitblock = strm;
     599  __asm__  __volatile__ (
     600        "add %[z1], %[z1]\n\t"
     601        "adc %[z2], %[z2]\n\t"
     602         : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1])
     603         : "[z1]" (x.int64[0]), "[z2]" (x.int64[1])
     604         : "cc");
     605  return z.bitblock;
     606}
     607
    552608
    553609static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) {
Note: See TracChangeset for help on using the changeset viewer.