Changeset 808 for trunk


Ignore:
Timestamp:
Dec 11, 2010, 5:13:55 AM (9 years ago)
Author:
cameron
Message:

CarryQueue implementation ADC64_CARRY_Q

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/carryQ.h

    r784 r808  
    55// November 29, 2010 - first version without actual queueing.
    66//
    7 
    8 #ifndef SIMD_CARRY_Q
     7#ifdef SIMD_CARRY_Q
     8#define CARRY_Q
     9#endif
     10#ifdef ADC64_CARRY_Q
     11#define CARRY_Q
     12#endif
     13
     14
     15#ifndef CARRY_Q
    916#include "block_carry.h"
    1017
     
    249256
    250257#endif
     258
     259#ifdef ADC64_CARRY_Q
     260
     261//
     262// CarryQueue implementation using 64-bit integer queues.
     263// A single 64-bit integer holds both the input and output
     264// carries, with bits moving right-to-left.   Thus the
     265// high bit in the queue is always the next carry to be
     266// dequeued; a newly enqueued carry is always inserted as
     267// the low bit.
     268//
     269// The two typical operations for dequeueing and enqueueing
     270// carryies from/to a CarryQueue cq are the following.
     271// 1.  Dequeueing:  add(cq, cq)
     272//     The high carry bit is dequeued and sets the processor
     273//     carry flag to be used as a carry-in variable in the
     274//     following bitblock operation.   This also shifts cq
     275//     right one position, making room for enqueuing a new carry.
     276// 2.  Enqueueing:  adc($0, cq)
     277//     The carry out value of an operation as recorded in the
     278//     processor carry flag is enqueued by adding it in to the
     279//     low bit position of cq (this bit will have been cleared
     280//     by the dequeue operation.
     281
     282#define CarryQtype uint64_t
     283
     284#define CarryDeclare(name, count)\
     285CarryQtype name
     286
     287#define CarryInit(name, count)\
     288name = 0
     289
     290typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
     291
     292#define double_int64_adc_ci_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \
     293   __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \
     294         "adc %[e1], %[z1]\n\t" \
     295         "adc %[e2], %[z2]\n\t" \
     296         "adc $0, %[cq]\n\t" \
     297     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
     298         : "[z1]" (x1), "[z2]" (x2), \
     299           [e1] "r" (y1), [e2] "r" (y2), \
     300           "[cq]" (carryQ) \
     301         : "cc")
     302
     303
     304#define carryQ_adc128_ci_co(first, second, carryQ, sum) \
     305do {\
     306  BitBlock_int64 rslt, x, y;\
     307  x.bitblock = first;\
     308  y.bitblock = second;\
     309  double_int64_adc_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\
     310  sum = rslt.bitblock;\
     311} while(0)
     312
     313#define double_int64_adc_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \
     314   __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \
     315         "add %[e1], %[z1]\n\t" \
     316         "adc %[e2], %[z2]\n\t" \
     317         "adc $0, %[cq]\n\t" \
     318     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
     319         : "[z1]" (x1), "[z2]" (x2), \
     320           [e1] "r" (y1), [e2] "r" (y2), \
     321           "[cq]" (carryQ) \
     322         : "cc")
     323
     324
     325#define carryQ_adc128_co(first, second, carryQ, sum) \
     326do {\
     327  BitBlock_int64 rslt, x, y;\
     328  x.bitblock = first;\
     329  y.bitblock = second;\
     330  double_int64_adc_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\
     331  sum = rslt.bitblock;\
     332} while(0)
     333
     334
     335#define double_int64_sbb_ci_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \
     336  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
     337        "sbb %[e1], %[z1]\n\t" \
     338        "sbb %[e2], %[z2]\n\t" \
     339        "adc $0, %[cq]\n\t" \
     340     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \
     341         : "[z1]" (x1), "[z2]" (x2), \
     342           [e1] "r" (y1), [e2] "r" (y2), \
     343           "[cq]" (brwQ) \
     344         : "cc")
     345
     346#define carryQ_sbb128_ci_co(first, second, borrowQ, diff) \
     347do {\
     348  BitBlock_int64 rslt, x, y;\
     349  x.bitblock = first;\
     350  y.bitblock = second;\
     351  double_int64_sbb_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
     352                   rslt.int64[0], rslt.int64[1], borrowQ);\
     353  diff = rslt.bitblock;\
     354} while(0)
     355
     356#define double_int64_sbb_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \
     357  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
     358        "sub %[e1], %[z1]\n\t" \
     359        "sbb %[e2], %[z2]\n\t" \
     360        "adc $0, %[cq]\n\t" \
     361     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \
     362         : "[z1]" (x1), "[z2]" (x2), \
     363           [e1] "r" (y1), [e2] "r" (y2), \
     364           "[cq]" (brwQ) \
     365         : "cc")
     366
     367#define carryQ_sbb128_co(first, second, borrowQ, diff) \
     368do {\
     369  BitBlock_int64 rslt, x, y;\
     370  x.bitblock = first;\
     371  y.bitblock = second;\
     372  double_int64_sbb_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
     373                   rslt.int64[0], rslt.int64[1], borrowQ);\
     374  diff = rslt.bitblock;\
     375} while(0)
     376
     377#define double_int64_advance_ci_co(x1, x2, rslt1, rslt2, carryQ) \
     378  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
     379        "adc %[z1], %[z1]\n\t" \
     380        "adc %[z2], %[z2]\n\t" \
     381        "adc $0, %[cq]\n\t" \
     382         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
     383         : "[z1]" (x1), "[z2]" (x2), \
     384           "[cq]" (carryQ) \
     385         : "cc")
     386
     387#define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\
     388do {\
     389  BitBlock_int64 x, z;\
     390  x.bitblock = cursor;\
     391  double_int64_advance_ci_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\
     392  rslt = z.bitblock;\
     393} while(0)
     394
     395
     396#define double_int64_advance_co(x1, x2, rslt1, rslt2, carryQ) \
     397  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
     398        "add %[z1], %[z1]\n\t" \
     399        "adc %[z2], %[z2]\n\t" \
     400        "adc $0, %[cq]\n\t" \
     401         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
     402         : "[z1]" (x1), "[z2]" (x2), \
     403           "[cq]" (carryQ) \
     404         : "cc")
     405
     406#define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\
     407do {\
     408  BitBlock_int64 x, z;\
     409  x.bitblock = cursor;\
     410  double_int64_advance_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\
     411  rslt = z.bitblock;\
     412} while(0)
     413
     414
     415static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, int carryno) {
     416        BitBlock rslt;
     417        carryQ_advance_with_carry_ci_co(strm, cq, rslt);
     418        return rslt;
     419}
     420
     421static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, int carryno) {
     422        BitBlock rslt;
     423        carryQ_advance_with_carry_co(strm, cq, rslt);
     424        return rslt;
     425}
     426
     427static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
     428        BitBlock sum;
     429        carryQ_adc128_ci_co(strm1, strm2, cq, sum);
     430        return sum;
     431}
     432
     433static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
     434        BitBlock sum;
     435        carryQ_adc128_co(strm1, strm2, cq, sum);
     436        return sum;
     437}
     438
     439static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
     440        BitBlock diff;
     441        carryQ_sbb128_ci_co(strm1, strm2, cq, diff);
     442        return diff;
     443}
     444
     445static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
     446        BitBlock diff;
     447        carryQ_sbb128_co(strm1, strm2, cq, diff);
     448        return diff;
     449}
     450
     451static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) {
     452        BitBlock markers1;
     453        carryQ_adc128_ci_co(markers0, charclass, cq, markers1);
     454        return simd_andc(markers1, charclass);
     455}
     456
     457static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) {
     458        BitBlock markers1;
     459        carryQ_adc128_co(markers0, charclass, cq, markers1);
     460        return simd_andc(markers1, charclass);
     461}
     462
     463static inline bool CarryTest(CarryQtype cq, int carryno, int carry_count) {
     464        print_general_register_64("cq", cq);
     465        uint64_t carryQ_top_N_mask = ~(0xFFFFFFFFFFFFFFFFULL >> carry_count);
     466        print_general_register_64("mask", carryQ_top_N_mask);
     467
     468        return (cq & carryQ_top_N_mask) != 0;
     469}
     470
     471static inline void CarryDequeueEnqueue(CarryQtype & cq, int carryno, int carry_count) {
     472        // Given carryin queue with carry_count carries starting from carryno are 0,
     473        // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
     474        cq <<= carry_count;
     475}
     476
     477static inline void CarryCombine(CarryQtype & cq, CarryQtype local_cq, int carryno, int carry_count) {
     478        cq |= local_cq;
     479}
     480
     481static inline void CarryQ_Adjust(CarryQtype & cq, int total_carries) {
     482        // Adjust the carryQ so that carries enqueued are readied for dequeiing.
     483        cq <<= (64-total_carries);
     484}
     485
     486
     487#endif
Note: See TracChangeset for help on using the changeset viewer.