Changeset 1077 for trunk/lib/carryQ_avx.h
 Timestamp:
 Apr 8, 2011, 10:16:25 AM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/carryQ_avx.h
r973 r1077 1 // 2 // carryQ.h 3 // Robert D. Cameron 4 // Dec. 5, 2010  first queuing implementation 5 // November 29, 2010  first version without actual queueing. 6 // 7 #ifndef CARRY_Q_H 8 #define CARRY_Q_H 1 #define CARRYQ_AVX 2 #include "carryQ.h" 9 3 10 #ifdef SIMD_CARRY_Q11 #define CARRY_Q12 #define CarryQtype SIMD_type13 #endif14 15 #ifdef ADC64_CARRY_Q16 #define CARRY_Q17 #define CarryQtype uint64_t18 #endif19 20 #ifdef CARRY_Q21 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));22 23 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));24 25 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));26 27 static inline BitBlock BitBlock_advance(BitBlock strm) __attribute__ ((always_inline));28 29 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));30 31 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));32 33 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));34 35 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));36 37 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));38 39 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));40 41 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));42 43 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));44 45 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));46 47 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));48 49 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));50 51 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) __attribute__ ((always_inline));52 53 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));54 55 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));56 57 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) __attribute__ ((always_inline));58 59 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) __attribute__ ((always_inline));60 61 static inline bool CarryTest(CarryQtype & cq, const int carryno, const int carry_count) __attribute__ ((always_inline));62 63 static inline void CarryDequeueEnqueue(CarryQtype & cq, const int carryno, const int carry_count) __attribute__ ((always_inline));64 65 static inline void CarryQ_Adjust(CarryQtype & cq, const int carry_count) __attribute__ ((always_inline));66 67 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) __attribute__ ((always_inline));68 69 #endif70 71 72 #ifndef CARRY_Q73 #include "block_carry_avx.h"74 75 76 #define CarryQtype CarryType *77 78 #define CarryDeclare(name, count)\79 CarryType name[count];\80 81 #define CarryInit(name, count)\82 for (int j=0; j < count; j++) name[j] = Carry083 84 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype cq, const int carryno) __attribute__ ((always_inline));85 86 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype cq, const int carryno) __attribute__ ((always_inline));87 88 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype cq, const int carryno) __attribute__ ((always_inline));89 90 static inline BitBlock BitBlock_advance(BitBlock strm) __attribute__ ((always_inline));91 92 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));93 94 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));95 96 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));97 98 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));99 100 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));101 102 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));103 104 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) __attribute__ ((always_inline));105 106 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) __attribute__ ((always_inline));107 108 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));109 110 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));111 112 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));113 114 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) __attribute__ ((always_inline));115 116 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));117 118 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, CarryQtype cq, BitBlock EOF_mask, const int carryno) __attribute__ ((always_inline));119 120 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) __attribute__ ((always_inline));121 122 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) __attribute__ ((always_inline));123 124 static inline bool CarryTest(CarryQtype cq, const int carryno, const int carry_count) __attribute__ ((always_inline));125 126 static inline void CarryDequeueEnqueue(CarryQtype cq, const int carryno, const int carry_count) __attribute__ ((always_inline));127 128 static inline void CarryQ_Adjust(CarryQtype cq, const int carry_count) __attribute__ ((always_inline));129 130 static inline void CarryCombine(CarryQtype cq, CarryQtype local_cq, const int carryno, const int carry_count) __attribute__ ((always_inline));131 132 133 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype cq, const int carryno) {134 BitBlock rslt;135 advance_with_carry256(strm, cq[carryno], rslt);136 return rslt;137 }138 139 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype cq, const int carryno) {140 BitBlock rslt;141 cq[carryno] = Carry0;142 advance_with_carry256(strm, cq[carryno], rslt);143 return rslt;144 }145 146 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype cq, const int carryno) {147 BitBlock rslt;148 CarryType c = cq[carryno];149 advance_with_carry256(strm, c, rslt);150 return rslt;151 }152 153 static inline BitBlock BitBlock_advance(BitBlock strm) {154 return sisd_slli(strm, 1);155 }156 157 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {158 BitBlock sum;159 adc256(strm1, strm2, cq[carryno], sum);160 return sum;161 }162 163 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {164 BitBlock sum;165 cq[carryno] = Carry0;166 adc256(strm1, strm2, cq[carryno], sum);167 return sum;168 }169 170 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {171 BitBlock sum;172 CarryType c = cq[carryno];173 adc256(strm1, strm2, c, sum);174 return sum;175 }176 177 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) {178 BitBlock sum;179 CarryType c = Carry0;180 adc256(strm1, strm2, c, sum);181 return sum;182 }183 184 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {185 BitBlock diff;186 sbb256(strm1, strm2, cq[carryno], diff);187 return diff;188 }189 190 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {191 BitBlock diff;192 cq[carryno] = Carry0;193 sbb256(strm1, strm2, cq[carryno], diff);194 return diff;195 }196 197 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype cq, const int carryno) {198 BitBlock diff;199 CarryType c = cq[carryno];200 sbb256(strm1, strm2, c, diff);201 return diff;202 }203 204 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) {205 BitBlock diff;206 CarryType c = Carry0;207 sbb256(strm1, strm2, c, diff);208 return diff;209 }210 211 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {212 BitBlock markers1;213 adc256(markers0, charclass, cq[carryno], markers1);214 return simd_andc(markers1, charclass);215 }216 217 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {218 BitBlock markers1;219 cq[carryno] = Carry0;220 adc256(markers0, charclass, cq[carryno], markers1);221 return simd_andc(markers1, charclass);222 }223 224 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {225 BitBlock markers1;226 CarryType c = cq[carryno];227 adc256(markers0, charclass, c, markers1);228 return simd_andc(markers1, charclass);229 }230 231 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) {232 BitBlock markers1;233 CarryType c = Carry0;234 adc256(markers0, charclass, c, markers1);235 return simd_andc(markers1, charclass);236 }237 238 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {239 BitBlock markers1;240 adc256(markers0, simd_not(charclass), cq[carryno], markers1);241 return simd_and(markers1, charclass);242 }243 244 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, const int carryno) {245 BitBlock markers1;246 cq[carryno] = Carry0;247 adc256(markers0, simd_not(charclass), cq[carryno], markers1);248 return simd_and(markers1, charclass);249 }250 251 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask, CarryQtype cq, const int carryno) {252 BitBlock markers1;253 CarryType c = cq[carryno];254 BitBlock scanclass = simd_andc(EOF_mask, charclass);255 adc256(markers0, scanclass, c, markers1);256 return simd_andc(markers1, scanclass);257 }258 259 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) {260 BitBlock markers1;261 CarryType c = Carry0;262 BitBlock scanclass = simd_andc(EOF_mask, charclass);263 adc256(markers0, scanclass, c, markers1);264 return simd_andc(markers1, scanclass);265 }266 267 static inline bool CarryTest(CarryQtype cq, const int carryno, const int carry_count) {268 CarryType c1 = cq[carryno];269 int i;270 for (i = carryno + 1; i < carryno + carry_count; i++) {271 c1 = carry_or(c1, cq[i]);272 }273 return test_carry(c1);274 }275 276 static inline void CarryDequeueEnqueue(CarryQtype cq, const int carryno, const int carry_count) {277 // Given carryin queue with carry_count carries starting from carryno are 0,278 // ensure that the carryout queue has carry_count carries starting from carryno set to 0.279 // Nothing to do when the queues are the same!280 return;281 }282 283 static inline void CarryQ_Adjust(CarryQtype cq, const int carry_count) {284 // Adjust the carryQ so that carries enqueued are readied for dequeiing.285 // Nothing to do with indexed queues.286 return;287 }288 289 static inline void CarryCombine(CarryQtype cq, CarryQtype local_cq, const int carryno, const int carry_count) {290 int i;291 for (i = 0; i < carry_count; i++) {292 cq[carryno+i] = carry_or(cq[carryno+i], local_cq[i]);293 }294 }295 #endif296 #ifdef SIMD_CARRY_Q297 298 #define CarryDeclare(name, count)\299 CarryQtype name300 301 #define CarryInit(name, count)\302 name = simd_const_1(0)303 304 SIMD_type carryQ_ci_mask = sisd_from_int(1);305 SIMD_type carryQ_co_mask = sisd_slli(carryQ_ci_mask, 127);306 307 static inline BitBlock BitBlock_advance_ci_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) {308 SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);309 SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);310 carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out);311 SIMD_type shift_out = simd_srli_64(cursor, 63);312 SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);313 return simd_or(simd_add_64(cursor, cursor), low_bits);314 }315 316 static inline BitBlock BitBlock_advance_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) {317 SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);318 carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out);319 SIMD_type shift_out = simd_srli_64(cursor, 63);320 SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0));321 return simd_or(simd_add_64(cursor, cursor), low_bits);322 }323 324 static inline BitBlock BitBlock_advance_ci(BitBlock cursor, CarryQtype & carryQ, const int carryno) {325 SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);326 carryQ = simd_srli_64(carryQ, 1);327 SIMD_type shift_out = simd_srli_64(cursor, 63);328 SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);329 return simd_or(simd_add_64(cursor, cursor), low_bits);330 }331 332 static inline BitBlock BitBlock_advance(BitBlock cursor) {333 return sisd_slli(cursor, 1);334 }335 336 337 static inline BitBlock BitBlock_add_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {338 BitBlock sum;339 SIMD_type gen = simd_and(x, y);340 SIMD_type prop = simd_or(x, y);341 SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask));342 carryQ = simd_srli_64(carryQ, 1);343 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);344 sum = simd_add_64(c1, partial);345 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask));346 return sum;347 }348 349 static inline BitBlock BitBlock_add_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {350 BitBlock sum;351 SIMD_type gen = simd_and(x, y);352 SIMD_type prop = simd_or(x, y);353 SIMD_type partial = simd_add_64(x, y);354 carryQ = simd_srli_64(carryQ, 1);355 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);356 sum = simd_add_64(c1, partial);357 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask));358 return sum;359 }360 361 static inline BitBlock BitBlock_add_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {362 BitBlock sum;363 SIMD_type gen = simd_and(x, y);364 SIMD_type prop = simd_or(x, y);365 SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask));366 carryQ = simd_srli_64(carryQ, 1);367 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64);368 sum = simd_add_64(c1, partial);369 return sum;370 }371 372 static inline BitBlock BitBlock_add(BitBlock x, BitBlock y) {373 return simd_add_128(x, y);374 }375 376 static inline BitBlock BitBlock_sub_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {377 BitBlock diff;378 SIMD_type gen = simd_andc(y, x);379 SIMD_type prop = simd_not(simd_xor(x, y));380 SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask));381 carryQ = simd_srli_64(carryQ, 1);382 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);383 diff = simd_sub_64(partial, b1);384 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask));385 return diff;386 }387 388 static inline BitBlock BitBlock_sub_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {389 BitBlock diff;390 SIMD_type gen = simd_andc(y, x);391 SIMD_type prop = simd_not(simd_xor(x, y));392 SIMD_type partial = simd_sub_64(x, y);393 carryQ = simd_srli_64(carryQ, 1);394 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);395 diff = simd_sub_64(partial, b1);396 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask));397 return diff;398 }399 400 static inline BitBlock BitBlock_sub_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) {401 BitBlock diff;402 SIMD_type gen = simd_andc(y, x);403 SIMD_type prop = simd_not(simd_xor(x, y));404 SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask));405 carryQ = simd_srli_64(carryQ, 1);406 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);407 diff = simd_sub_64(partial, b1);408 return diff;409 }410 411 static inline BitBlock BitBlock_sub(BitBlock x, BitBlock y) {412 BitBlock diff;413 SIMD_type gen = simd_andc(y, x);414 SIMD_type prop = simd_not(simd_xor(x, y));415 SIMD_type partial = simd_sub_64(x, y);416 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64);417 diff = simd_sub_64(partial, b1);418 return diff;419 }420 421 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;422 423 static inline bool CarryTest(CarryQtype & cq, const int carryno, const int carry_count) {424 BitBlock_int64 t;425 t.bitblock = cq;426 uint64_t carryQ_top_N_mask = ((1 << carry_count) 1);427 return t.int64[0] & carryQ_top_N_mask;428 }429 430 static inline void CarryDequeueEnqueue(CarryQtype & cq, const int carryno, const int carry_count) {431 // Given carryin queue with carry_count carries starting from carryno are 0,432 // ensure that the carryout queue has carry_count carries starting from carryno set to 0.433 cq = sisd_srli(cq, carry_count);434 }435 436 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) {437 cq = simd_or(cq, local_cq);438 }439 440 static inline void CarryQ_Adjust(CarryQtype & cq, const int carry_count) {441 // Adjust the carryQ so that carries enqueued are readied for dequeiing.442 cq = sisd_srli(cq, (128carry_count));443 }444 445 446 #endif447 448 #ifdef ADC64_CARRY_Q449 450 //451 // CarryQueue implementation using 64bit integer queues.452 // A single 64bit integer holds both the input and output453 // carries, with bits moving righttoleft. Thus the454 // high bit in the queue is always the next carry to be455 // dequeued; a newly enqueued carry is always inserted as456 // the low bit.457 //458 // The two typical operations for dequeueing and enqueueing459 // carryies from/to a CarryQueue cq are the following.460 // 1. Dequeueing: add(cq, cq)461 // The high carry bit is dequeued and sets the processor462 // carry flag to be used as a carryin variable in the463 // following bitblock operation. This also shifts cq464 // right one position, making room for enqueuing a new carry.465 // 2. Enqueueing: adc($0, cq)466 // The carry out value of an operation as recorded in the467 // processor carry flag is enqueued by adding it in to the468 // low bit position of cq (this bit will have been cleared469 // by the dequeue operation.470 471 #define CarryDeclare(name, count)\472 CarryQtype name473 474 #define CarryInit(name, count)\475 name = 0476 477 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;478 479 480 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & carryQ, const int carryno) {481 BitBlock_int64 x, z;482 x.bitblock = strm;483 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"484 "adc %[z1], %[z1]\n\t"485 "adc %[z2], %[z2]\n\t"486 "adc $0, %[cq]\n\t"487 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)488 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),489 "[cq]" (carryQ)490 : "cc");491 return z.bitblock;492 }493 494 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & carryQ, const int carryno) {495 BitBlock_int64 x, z;496 x.bitblock = strm;497 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"498 "add %[z1], %[z1]\n\t"499 "adc %[z2], %[z2]\n\t"500 "adc $0, %[cq]\n\t"501 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)502 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),503 "[cq]" (carryQ)504 : "cc");505 return z.bitblock;506 }507 508 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & carryQ, const int carryno) {509 BitBlock_int64 x, z;510 x.bitblock = strm;511 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"512 "adc %[z1], %[z1]\n\t"513 "adc %[z2], %[z2]\n\t"514 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ)515 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),516 "[cq]" (carryQ)517 : "cc");518 return z.bitblock;519 }520 521 static inline BitBlock BitBlock_advance(BitBlock strm) {522 BitBlock_int64 x, z;523 x.bitblock = strm;524 __asm__ __volatile__ ("add %[z1], %[z1]\n\t"525 "adc %[z2], %[z2]\n\t"526 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1])527 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1])528 : "cc");529 return z.bitblock;530 }531 532 533 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {534 BitBlock_int64 rslt, x, y;535 x.bitblock = strm1;536 y.bitblock = strm2;537 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"538 "adc %[e1], %[z1]\n\t"539 "adc %[e2], %[z2]\n\t"540 "adc $0, %[cq]\n\t"541 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)542 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),543 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),544 "[cq]" (carryQ)545 : "cc");546 return rslt.bitblock;547 }548 549 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {550 BitBlock_int64 rslt, x, y;551 x.bitblock = strm1;552 y.bitblock = strm2;553 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"554 "add %[e1], %[z1]\n\t"555 "adc %[e2], %[z2]\n\t"556 "adc $0, %[cq]\n\t"557 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)558 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),559 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),560 "[cq]" (carryQ)561 : "cc");562 return rslt.bitblock;563 }564 565 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {566 BitBlock_int64 rslt, x, y;567 x.bitblock = strm1;568 y.bitblock = strm2;569 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"570 "adc %[e1], %[z1]\n\t"571 "adc %[e2], %[z2]\n\t"572 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)573 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),574 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),575 "[cq]" (carryQ)576 : "cc");577 return rslt.bitblock;578 }579 580 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) {581 BitBlock_int64 rslt, x, y;582 x.bitblock = strm1;583 y.bitblock = strm2;584 __asm__ __volatile__ ("add %[e1], %[z1]\n\t"585 "adc %[e2], %[z2]\n\t"586 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1])587 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),588 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1])589 : "cc");590 return rslt.bitblock;591 }592 593 594 595 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {596 BitBlock_int64 rslt, x, y;597 x.bitblock = strm1;598 y.bitblock = strm2;599 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"600 "sbb %[e1], %[z1]\n\t"601 "sbb %[e2], %[z2]\n\t"602 "adc $0, %[cq]\n\t"603 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)604 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),605 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),606 "[cq]" (carryQ)607 : "cc");608 return rslt.bitblock;609 }610 611 612 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {613 BitBlock_int64 rslt, x, y;614 x.bitblock = strm1;615 y.bitblock = strm2;616 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"617 "sub %[e1], %[z1]\n\t"618 "sbb %[e2], %[z2]\n\t"619 "adc $0, %[cq]\n\t"620 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)621 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),622 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),623 "[cq]" (carryQ)624 : "cc");625 return rslt.bitblock;626 }627 628 629 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) {630 BitBlock_int64 rslt, x, y;631 x.bitblock = strm1;632 y.bitblock = strm2;633 __asm__ __volatile__ ("add %[cq], %[cq]\n\t"634 "sbb %[e1], %[z1]\n\t"635 "sbb %[e2], %[z2]\n\t"636 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ)637 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),638 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]),639 "[cq]" (carryQ)640 : "cc");641 return rslt.bitblock;642 }643 644 645 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) {646 BitBlock_int64 rslt, x, y;647 x.bitblock = strm1;648 y.bitblock = strm2;649 __asm__ __volatile__ ("sub %[e1], %[z1]\n\t"650 "sbb %[e2], %[z2]\n\t"651 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1])652 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]),653 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1])654 : "cc");655 return rslt.bitblock;656 }657 658 659 660 static inline bool CarryTest(CarryQtype & cq, const int carryno, const int carry_count) {661 // print_general_register_64("cq", cq);662 uint64_t carryQ_top_N_mask = ~(0xFFFFFFFFFFFFFFFFULL >> carry_count);663 return (cq & carryQ_top_N_mask) != 0;664 }665 666 static inline void CarryDequeueEnqueue(CarryQtype & cq, const int carryno, const int carry_count) {667 // Given carryin queue with carry_count carries starting from carryno are 0,668 // ensure that the carryout queue has carry_count carries starting from carryno set to 0.669 cq <<= carry_count;670 }671 672 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) {673 cq = local_cq;674 }675 676 static inline void CarryQ_Adjust(CarryQtype & cq, int total_carries) {677 // Adjust the carryQ so that carries enqueued are readied for dequeiing.678 cq <<= (64total_carries);679 }680 681 682 #endif683 684 #ifdef CARRY_Q685 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {686 return simd_andc(BitBlock_add_ci_co(markers0, charclass, cq, carryno), charclass);687 }688 689 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {690 return simd_andc(BitBlock_add_co(markers0, charclass, cq, carryno), charclass);691 }692 693 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {694 return simd_andc(BitBlock_add_ci(markers0, charclass, cq, carryno), charclass);695 }696 697 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) {698 return simd_andc(BitBlock_add(markers0, charclass), charclass);699 }700 701 static inline BitBlock BitBlock_scanto_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {702 return simd_and(BitBlock_add_ci_co(markers0, simd_not(charclass), cq, carryno), charclass);703 }704 705 static inline BitBlock BitBlock_scanto_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) {706 return simd_and(BitBlock_add_co(markers0, simd_not(charclass), cq, carryno), charclass);707 }708 709 static inline BitBlock BitBlock_scanto_ci(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask, CarryQtype & cq, const int carryno) {710 BitBlock scanclass = simd_andc(EOF_mask, charclass);711 return simd_andc(BitBlock_add_ci(markers0, scanclass, cq, carryno), scanclass);712 }713 714 static inline BitBlock BitBlock_scanto(BitBlock markers0, BitBlock charclass, BitBlock EOF_mask) {715 BitBlock scanclass = simd_andc(EOF_mask, charclass);716 return simd_andc(BitBlock_add(markers0, scanclass), scanclass);717 }718 719 720 #endif721 722 #endif723
Note: See TracChangeset
for help on using the changeset viewer.