Ignore:
Timestamp:
Apr 8, 2011, 10:16:25 AM (8 years ago)
Author:
cameron
Message:

CarryQ changes for AVX/BitBlock_scantofirst

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/carryQ_avx.h

    r973 r1077  
    1 //
    2 // carryQ.h
    3 // Robert D. Cameron
    4 // Dec. 5, 2010 - first queuing implementation
    5 // November 29, 2010 - first version without actual queueing.
    6 //
    7 #ifndef CARRY_Q_H
    8 #define CARRY_Q_H
     1#define CARRYQ_AVX
     2#include "carryQ.h"
    93
    10 #ifdef SIMD_CARRY_Q
    11 #define CARRY_Q
    12 #define CarryQtype SIMD_type
    13 #endif
    14 
    15 #ifdef ADC64_CARRY_Q
    16 #define CARRY_Q
    17 #define CarryQtype uint64_t
    18 #endif
    19 
    20 #ifdef CARRY_Q
    21 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    22 
    23 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    24 
    25 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    26 
    27 static inline BitBlock BitBlock_advance(BitBlock strm) __attribute__ ((always_inline));
    28 
    29 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    30 
    31 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    32 
    33 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    34 
    35 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));
    36 
    37 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    38 
    39 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    40 
    41 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    42 
    43 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));
    44 
    45 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    46 
    47 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    48 
    49 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    50 
    51 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) __attribute__ ((always_inline));
    52 
    53 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    54 
    55 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    56 
    57 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));
    58 
    59 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) __attribute__ ((always_inline));
    60 
    61 static inline bool CarryTest(CarryQtype & cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    62 
    63 static inline void CarryDequeueEnqueue(CarryQtype & cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    64 
    65 static inline void CarryQ_Adjust(CarryQtype & cq, const int carry_count) __attribute__ ((always_inline));
    66 
    67 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    68 
    69 #endif
    70 
    71 
    72 #ifndef CARRY_Q
    73 #include "block_carry_avx.h"
    74 
    75 
    76 #define CarryQtype CarryType *
    77 
    78 #define CarryDeclare(name, count)\
    79 CarryType name[count];\
    80 
    81 #define CarryInit(name, count)\
    82 for (int j=0; j < count; j++) name[j] = Carry0
    83 
    84 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    85 
    86 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    87 
    88 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    89 
    90 static inline BitBlock BitBlock_advance(BitBlock strm) __attribute__ ((always_inline));
    91 
    92 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    93 
    94 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    95 
    96 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    97 
    98 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));
    99 
    100 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    101 
    102 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    103 
    104 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    105 
    106 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));
    107 
    108 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    109 
    110 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    111 
    112 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    113 
    114 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) __attribute__ ((always_inline));
    115 
    116 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    117 
    118 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, CarryQtype cq, BitBlock EOF_mask, const int carryno) __attribute__ ((always_inline));
    119 
    120 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));
    121 
    122 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) __attribute__ ((always_inline));
    123 
    124 static inline bool CarryTest(CarryQtype cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    125 
    126 static inline void CarryDequeueEnqueue(CarryQtype cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    127 
    128 static inline void CarryQ_Adjust(CarryQtype cq, const int carry_count) __attribute__ ((always_inline));
    129 
    130 static inline void CarryCombine(CarryQtype cq, CarryQtype local_cq, const int carryno, const int carry_count) __attribute__ ((always_inline));
    131 
    132 
    133 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype cq, const int carryno) {
    134         BitBlock rslt;
    135         advance_with_carry256(strm, cq[carryno], rslt);
    136         return rslt;
    137 }
    138 
    139 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype cq, const int carryno) {
    140         BitBlock rslt;
    141         cq[carryno] = Carry0;
    142         advance_with_carry256(strm, cq[carryno], rslt);
    143         return rslt;
    144 }
    145 
    146 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype cq, const int carryno) {
    147         BitBlock rslt;
    148         CarryType c = cq[carryno];
    149         advance_with_carry256(strm, c, rslt);
    150         return rslt;
    151 }
    152 
    153 static inline BitBlock BitBlock_advance(BitBlock strm) {
    154         return sisd_slli(strm, 1);
    155 }
    156 
    157 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {
    158         BitBlock sum;
    159         adc256(strm1, strm2, cq[carryno], sum);
    160         return sum;
    161 }
    162 
    163 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {
    164         BitBlock sum;
    165         cq[carryno] = Carry0;
    166         adc256(strm1, strm2, cq[carryno], sum);
    167         return sum;
    168 }
    169 
    170 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {
    171         BitBlock sum;
    172         CarryType c = cq[carryno];
    173         adc256(strm1, strm2, c, sum);
    174         return sum;
    175 }
    176 
    177 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) {
    178         BitBlock sum;
    179         CarryType c = Carry0;
    180         adc256(strm1, strm2, c, sum);
    181         return sum;
    182 }
    183 
    184 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {
    185         BitBlock diff;
    186         sbb256(strm1, strm2, cq[carryno], diff);
    187         return diff;
    188 }
    189 
    190 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {
    191         BitBlock diff;
    192         cq[carryno] = Carry0;
    193         sbb256(strm1, strm2, cq[carryno], diff);
    194         return diff;
    195 }
    196 
    197 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {
    198         BitBlock diff;
    199         CarryType c = cq[carryno];
    200         sbb256(strm1, strm2, c, diff);
    201         return diff;
    202 }
    203 
    204 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) {
    205         BitBlock diff;
    206         CarryType c = Carry0;
    207         sbb256(strm1, strm2, c, diff);
    208         return diff;
    209 }
    210 
    211 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {
    212         BitBlock markers1;
    213         adc256(markers0, charclass, cq[carryno], markers1);
    214         return simd_andc(markers1, charclass);
    215 }
    216 
    217 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {
    218         BitBlock markers1;
    219         cq[carryno] = Carry0;
    220         adc256(markers0, charclass, cq[carryno], markers1);
    221         return simd_andc(markers1, charclass);
    222 }
    223 
    224 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {
    225         BitBlock markers1;
    226         CarryType c = cq[carryno];
    227         adc256(markers0, charclass, c, markers1);
    228         return simd_andc(markers1, charclass);
    229 }
    230 
    231 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) {
    232         BitBlock markers1;
    233         CarryType c = Carry0;
    234         adc256(markers0, charclass, c, markers1);
    235         return simd_andc(markers1, charclass);
    236 }
    237 
    238 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {
    239         BitBlock markers1;
    240         adc256(markers0, simd_not(charclass), cq[carryno], markers1);
    241         return simd_and(markers1, charclass);
    242 }
    243 
    244 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {
    245         BitBlock markers1;
    246         cq[carryno] = Carry0;
    247         adc256(markers0, simd_not(charclass), cq[carryno], markers1);
    248         return simd_and(markers1, charclass);
    249 }
    250 
    251 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask, CarryQtype cq, const int carryno) {
    252         BitBlock markers1;
    253         CarryType c = cq[carryno];
    254         BitBlock scanclass = simd_andc(EOF_mask, charclass);
    255         adc256(markers0, scanclass, c, markers1);
    256         return simd_andc(markers1, scanclass);
    257 }
    258 
    259 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) {
    260         BitBlock markers1;
    261         CarryType c = Carry0;
    262         BitBlock scanclass = simd_andc(EOF_mask, charclass);
    263         adc256(markers0, scanclass, c, markers1);
    264         return simd_andc(markers1, scanclass);
    265 }
    266 
    267 static inline bool CarryTest(CarryQtype cq, const int carryno, const int carry_count) {
    268   CarryType c1 = cq[carryno];
    269   int i;
    270   for (i = carryno + 1; i < carryno + carry_count; i++) {
    271     c1 = carry_or(c1, cq[i]);
    272   }
    273   return test_carry(c1);
    274 }
    275 
    276 static inline void CarryDequeueEnqueue(CarryQtype cq, const int carryno, const int carry_count) {
    277   // Given carryin queue with carry_count carries starting from carryno are 0,
    278   // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
    279   // Nothing to do when the queues are the same!
    280   return;
    281 }
    282 
    283 static inline void CarryQ_Adjust(CarryQtype cq, const int carry_count) {
    284   // Adjust the carryQ so that carries enqueued are readied for dequeiing.
    285   // Nothing to do with indexed queues.
    286   return;
    287 }
    288 
    289 static inline void CarryCombine(CarryQtype cq, CarryQtype local_cq, const int carryno, const int carry_count) {
    290   int i;
    291   for (i = 0; i < carry_count; i++) {
    292     cq[carryno+i] = carry_or(cq[carryno+i], local_cq[i]);
    293   }
    294 }
    295 #endif
    296 #ifdef SIMD_CARRY_Q
    297 
    298 #define CarryDeclare(name, count)\
    299   CarryQtype name
    300 
    301 #define CarryInit(name, count)\
    302   name = simd_const_1(0)
    303 
    304 SIMD_type carryQ_ci_mask = sisd_from_int(1);
    305 SIMD_type carryQ_co_mask = sisd_slli(carryQ_ci_mask, 127);
    306 
    307 static inline BitBlock BitBlock_advance_ci_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) {
    308   SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);
    309   SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);
    310   carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out);
    311   SIMD_type shift_out = simd_srli_64(cursor, 63);
    312   SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);
    313   return simd_or(simd_add_64(cursor, cursor), low_bits);
    314 }
    315 
    316 static inline BitBlock BitBlock_advance_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) {
    317   SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);
    318   carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out);
    319   SIMD_type shift_out = simd_srli_64(cursor, 63);
    320   SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0));
    321   return simd_or(simd_add_64(cursor, cursor), low_bits);
    322 }
    323 
    324 static inline BitBlock BitBlock_advance_ci(BitBlock cursor, CarryQtype & carryQ, const int carryno) {
    325   SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);
    326   carryQ = simd_srli_64(carryQ, 1);
    327   SIMD_type shift_out = simd_srli_64(cursor, 63);
    328   SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);
    329   return simd_or(simd_add_64(cursor, cursor), low_bits);
    330 }
    331 
    332 static inline BitBlock BitBlock_advance(BitBlock cursor) {
    333   return sisd_slli(cursor, 1);
    334 }
    335 
    336 
    337 static inline BitBlock BitBlock_add_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
    338   BitBlock sum;
    339   SIMD_type gen = simd_and(x, y);
    340   SIMD_type prop = simd_or(x, y);
    341   SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask));
    342   carryQ = simd_srli_64(carryQ, 1);
    343   SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
    344   sum = simd_add_64(c1, partial);
    345   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask));
    346   return sum;
    347 }
    348 
    349 static inline BitBlock BitBlock_add_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
    350   BitBlock sum;
    351   SIMD_type gen = simd_and(x, y);
    352   SIMD_type prop = simd_or(x, y);
    353   SIMD_type partial = simd_add_64(x, y);
    354   carryQ = simd_srli_64(carryQ, 1);
    355   SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
    356   sum = simd_add_64(c1, partial);
    357   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask));
    358   return sum;
    359 }
    360 
    361 static inline BitBlock BitBlock_add_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
    362   BitBlock sum;
    363   SIMD_type gen = simd_and(x, y);
    364   SIMD_type prop = simd_or(x, y);
    365   SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask));
    366   carryQ = simd_srli_64(carryQ, 1);
    367   SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);
    368   sum = simd_add_64(c1, partial);
    369   return sum;
    370 }
    371 
    372 static inline BitBlock BitBlock_add(BitBlock x, BitBlock y) {
    373   return simd_add_128(x, y);
    374 }
    375 
    376 static inline BitBlock BitBlock_sub_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
    377   BitBlock diff;
    378   SIMD_type gen = simd_andc(y, x);
    379   SIMD_type prop = simd_not(simd_xor(x, y));
    380   SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask));
    381   carryQ = simd_srli_64(carryQ, 1);
    382   SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
    383   diff = simd_sub_64(partial, b1);
    384   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask));
    385   return diff;
    386 }
    387 
    388 static inline BitBlock BitBlock_sub_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
    389   BitBlock diff;
    390   SIMD_type gen = simd_andc(y, x);
    391   SIMD_type prop = simd_not(simd_xor(x, y));
    392   SIMD_type partial = simd_sub_64(x, y);
    393   carryQ = simd_srli_64(carryQ, 1);
    394   SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
    395   diff = simd_sub_64(partial, b1);
    396   carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask));
    397   return diff;
    398 }
    399 
    400 static inline BitBlock BitBlock_sub_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {
    401   BitBlock diff;
    402   SIMD_type gen = simd_andc(y, x);
    403   SIMD_type prop = simd_not(simd_xor(x, y));
    404   SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask));
    405   carryQ = simd_srli_64(carryQ, 1);
    406   SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
    407   diff = simd_sub_64(partial, b1);
    408   return diff;
    409 }
    410 
    411 static inline BitBlock BitBlock_sub(BitBlock x, BitBlock y) {
    412   BitBlock diff;
    413   SIMD_type gen = simd_andc(y, x);
    414   SIMD_type prop = simd_not(simd_xor(x, y));
    415   SIMD_type partial = simd_sub_64(x, y);
    416   SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);
    417   diff = simd_sub_64(partial, b1);
    418   return diff;
    419 }
    420 
    421 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
    422 
    423 static inline bool CarryTest(CarryQtype & cq, const int carryno, const int carry_count) {
    424   BitBlock_int64 t;
    425   t.bitblock = cq;
    426   uint64_t carryQ_top_N_mask = ((1 << carry_count) -1);
    427   return t.int64[0] & carryQ_top_N_mask;
    428 }
    429 
    430 static inline void CarryDequeueEnqueue(CarryQtype & cq, const int carryno, const int carry_count) {
    431   // Given carryin queue with carry_count carries starting from carryno are 0,
    432   // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
    433   cq = sisd_srli(cq, carry_count);
    434 }
    435 
    436 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) {
    437   cq = simd_or(cq, local_cq);
    438 }
    439 
    440 static inline void CarryQ_Adjust(CarryQtype & cq, const int carry_count) {
    441   // Adjust the carryQ so that carries enqueued are readied for dequeiing.
    442   cq = sisd_srli(cq, (128-carry_count));
    443 }
    444 
    445 
    446 #endif
    447 
    448 #ifdef ADC64_CARRY_Q
    449 
    450 //
    451 // CarryQueue implementation using 64-bit integer queues.
    452 // A single 64-bit integer holds both the input and output
    453 // carries, with bits moving right-to-left.   Thus the
    454 // high bit in the queue is always the next carry to be
    455 // dequeued; a newly enqueued carry is always inserted as
    456 // the low bit.
    457 //
    458 // The two typical operations for dequeueing and enqueueing
    459 // carryies from/to a CarryQueue cq are the following.
    460 // 1.  Dequeueing:  add(cq, cq)
    461 //     The high carry bit is dequeued and sets the processor
    462 //     carry flag to be used as a carry-in variable in the
    463 //     following bitblock operation.   This also shifts cq
    464 //     right one position, making room for enqueuing a new carry.
    465 // 2.  Enqueueing:  adc($0, cq)
    466 //     The carry out value of an operation as recorded in the
    467 //     processor carry flag is enqueued by adding it in to the
    468 //     low bit position of cq (this bit will have been cleared
    469 //     by the dequeue operation.
    470 
    471 #define CarryDeclare(name, count)\
    472 CarryQtype name
    473 
    474 #define CarryInit(name, count)\
    475 name = 0
    476 
    477 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
    478 
    479 
    480 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & carryQ, const int carryno) {
    481         BitBlock_int64 x, z;
    482         x.bitblock = strm;
    483         __asm__  __volatile__ ("add %[cq], %[cq]\n\t"
    484                                                    "adc %[z1], %[z1]\n\t"
    485                                                    "adc %[z2], %[z2]\n\t"
    486                                                    "adc $0, %[cq]\n\t"
    487                                                    : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)
    488                                                    : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    489                                                    "[cq]" (carryQ)
    490                                                    : "cc");
    491         return z.bitblock;
    492 }
    493 
    494 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & carryQ, const int carryno) {
    495         BitBlock_int64 x, z;
    496         x.bitblock = strm;
    497         __asm__  __volatile__ ("add %[cq], %[cq]\n\t"
    498                                                    "add %[z1], %[z1]\n\t"
    499                                                    "adc %[z2], %[z2]\n\t"
    500                                                    "adc $0, %[cq]\n\t"
    501                                                    : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)
    502                                                    : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    503                                                    "[cq]" (carryQ)
    504                                                    : "cc");
    505         return z.bitblock;
    506 }
    507 
    508 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & carryQ, const int carryno) {
    509         BitBlock_int64 x, z;
    510         x.bitblock = strm;
    511         __asm__  __volatile__ ("add %[cq], %[cq]\n\t"
    512                                                    "adc %[z1], %[z1]\n\t"
    513                                                    "adc %[z2], %[z2]\n\t"
    514                                                    : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)
    515                                                    : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    516                                                    "[cq]" (carryQ)
    517                                                    : "cc");
    518         return z.bitblock;
    519 }
    520 
    521 static inline BitBlock BitBlock_advance(BitBlock strm) {
    522         BitBlock_int64 x, z;
    523         x.bitblock = strm;
    524         __asm__  __volatile__ ("add %[z1], %[z1]\n\t"
    525                                                    "adc %[z2], %[z2]\n\t"
    526                                                    : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1])
    527                                                    : "[z1]" (x.int64[0]), "[z2]" (x.int64[1])
    528                                                    : "cc");
    529         return z.bitblock;
    530 }
    531 
    532 
    533 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {
    534         BitBlock_int64 rslt, x, y;
    535         x.bitblock = strm1;
    536         y.bitblock = strm2;
    537         __asm__ __volatile__ ("add %[cq], %[cq]\n\t"
    538                                                   "adc %[e1], %[z1]\n\t"
    539                                                   "adc %[e2], %[z2]\n\t"
    540                                                   "adc $0, %[cq]\n\t"
    541                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)
    542                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    543                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),
    544                                                   "[cq]" (carryQ)
    545                                                   : "cc");
    546         return rslt.bitblock;
    547 }
    548 
    549 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {
    550         BitBlock_int64 rslt, x, y;
    551         x.bitblock = strm1;
    552         y.bitblock = strm2;
    553         __asm__ __volatile__ ("add %[cq], %[cq]\n\t"
    554                                                   "add %[e1], %[z1]\n\t"
    555                                                   "adc %[e2], %[z2]\n\t"
    556                                                   "adc $0, %[cq]\n\t"
    557                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)
    558                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    559                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),
    560                                                   "[cq]" (carryQ)
    561                                                   : "cc");
    562         return rslt.bitblock;
    563 }
    564 
    565 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {
    566         BitBlock_int64 rslt, x, y;
    567         x.bitblock = strm1;
    568         y.bitblock = strm2;
    569         __asm__ __volatile__ ("add %[cq], %[cq]\n\t"
    570                                                   "adc %[e1], %[z1]\n\t"
    571                                                   "adc %[e2], %[z2]\n\t"
    572                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)
    573                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    574                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),
    575                                                   "[cq]" (carryQ)
    576                                                   : "cc");
    577         return rslt.bitblock;
    578 }
    579 
    580 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) {
    581         BitBlock_int64 rslt, x, y;
    582         x.bitblock = strm1;
    583         y.bitblock = strm2;
    584         __asm__ __volatile__ ("add %[e1], %[z1]\n\t"
    585                                                   "adc %[e2], %[z2]\n\t"
    586                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1])
    587                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    588                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1])
    589                                                   : "cc");
    590         return rslt.bitblock;
    591 }
    592 
    593 
    594 
    595 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {
    596         BitBlock_int64 rslt, x, y;
    597         x.bitblock = strm1;
    598         y.bitblock = strm2;
    599         __asm__ __volatile__ ("add %[cq], %[cq]\n\t"
    600                                                   "sbb %[e1], %[z1]\n\t"
    601                                                   "sbb %[e2], %[z2]\n\t"
    602                                                   "adc $0, %[cq]\n\t"
    603                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)
    604                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    605                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),
    606                                                   "[cq]" (carryQ)
    607                                                   : "cc");
    608         return rslt.bitblock;
    609 }
    610 
    611 
    612 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {
    613         BitBlock_int64 rslt, x, y;
    614         x.bitblock = strm1;
    615         y.bitblock = strm2;
    616         __asm__ __volatile__ ("add %[cq], %[cq]\n\t"
    617                                                   "sub %[e1], %[z1]\n\t"
    618                                                   "sbb %[e2], %[z2]\n\t"
    619                                                   "adc $0, %[cq]\n\t"
    620                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)
    621                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    622                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),
    623                                                   "[cq]" (carryQ)
    624                                                   : "cc");
    625         return rslt.bitblock;
    626 }
    627 
    628 
    629 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {
    630         BitBlock_int64 rslt, x, y;
    631         x.bitblock = strm1;
    632         y.bitblock = strm2;
    633         __asm__ __volatile__ ("add %[cq], %[cq]\n\t"
    634                                                   "sbb %[e1], %[z1]\n\t"
    635                                                   "sbb %[e2], %[z2]\n\t"
    636                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)
    637                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    638                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),
    639                                                   "[cq]" (carryQ)
    640                                                   : "cc");
    641         return rslt.bitblock;
    642 }
    643 
    644 
    645 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) {
    646         BitBlock_int64 rslt, x, y;
    647         x.bitblock = strm1;
    648         y.bitblock = strm2;
    649         __asm__ __volatile__ ("sub %[e1], %[z1]\n\t"
    650                                                   "sbb %[e2], %[z2]\n\t"
    651                                                   : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1])
    652                                                   : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),
    653                                                   [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1])
    654                                                   : "cc");
    655         return rslt.bitblock;
    656 }
    657 
    658 
    659 
    660 static inline bool CarryTest(CarryQtype & cq, const int carryno, const int carry_count) {
    661 //         print_general_register_64("cq", cq);
    662         uint64_t carryQ_top_N_mask = ~(0xFFFFFFFFFFFFFFFFULL >> carry_count);
    663         return (cq & carryQ_top_N_mask) != 0;
    664 }
    665 
    666 static inline void CarryDequeueEnqueue(CarryQtype & cq, const int carryno, const int carry_count) {
    667         // Given carryin queue with carry_count carries starting from carryno are 0,
    668         // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
    669         cq <<= carry_count;
    670 }
    671 
    672 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) {
    673         cq |= local_cq;
    674 }
    675 
    676 static inline void CarryQ_Adjust(CarryQtype & cq, int total_carries) {
    677         // Adjust the carryQ so that carries enqueued are readied for dequeiing.
    678         cq <<= (64-total_carries);
    679 }
    680 
    681 
    682 #endif
    683 
    684 #ifdef CARRY_Q
    685 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    686         return simd_andc(BitBlock_add_ci_co(markers0, charclass, cq, carryno), charclass);
    687 }
    688 
    689 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    690         return simd_andc(BitBlock_add_co(markers0, charclass, cq, carryno), charclass);
    691 }
    692 
    693 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    694         return simd_andc(BitBlock_add_ci(markers0, charclass, cq, carryno), charclass);
    695 }
    696 
    697 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) {
    698         return simd_andc(BitBlock_add(markers0, charclass), charclass);
    699 }
    700 
    701 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    702         return simd_and(BitBlock_add_ci_co(markers0, simd_not(charclass), cq, carryno), charclass);
    703 }
    704 
    705 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {
    706         return simd_and(BitBlock_add_co(markers0, simd_not(charclass), cq, carryno), charclass);
    707 }
    708 
    709 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask, CarryQtype & cq, const int carryno) {
    710         BitBlock scanclass = simd_andc(EOF_mask, charclass);
    711         return simd_andc(BitBlock_add_ci(markers0, scanclass, cq, carryno), scanclass);
    712 }
    713 
    714 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) {
    715         BitBlock scanclass = simd_andc(EOF_mask, charclass);
    716         return simd_andc(BitBlock_add(markers0, scanclass), scanclass);
    717 }
    718 
    719 
    720 #endif
    721 
    722 #endif
    723 
Note: See TracChangeset for help on using the changeset viewer.