source: trunk/lib/carryQ.h @ 808

Last change on this file since 808 was 808, checked in by cameron, 9 years ago

CarryQueue implementation ADC64_CARRY_Q

File size: 15.7 KB
Line 
1//
2// carryQ.h
3// Robert D. Cameron
4// Dec. 5, 2010 - first queuing implementation
5// November 29, 2010 - first version without actual queueing.
6//
7#ifdef SIMD_CARRY_Q
8#define CARRY_Q
9#endif
10#ifdef ADC64_CARRY_Q
11#define CARRY_Q
12#endif
13
14
15#ifndef CARRY_Q
16#include "block_carry.h"
17
18
19#define CarryQtype CarryType *
20
21#define CarryDeclare(name, count)\
22CarryType name[count];\
23
24#define CarryInit(name, count)\
25for (int j=0; j < count; j++) name[j] = Carry0
26
27static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype cq, int carryno) {
28 BitBlock rslt;
29 advance_with_carry(strm, cq[carryno], rslt);
30 return rslt;
31}
32
33static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype cq, int carryno) {
34 BitBlock rslt;
35 cq[carryno] = Carry0;
36 advance_with_carry(strm, cq[carryno], rslt);
37 return rslt;
38}
39
40static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, int carryno) {
41 BitBlock sum;
42 adc128(strm1, strm2, cq[carryno], sum);
43 return sum;
44}
45
46static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, int carryno) {
47 BitBlock sum;
48 cq[carryno] = Carry0;
49 adc128(strm1, strm2, cq[carryno], sum);
50 return sum;
51}
52
53static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, int carryno) {
54 BitBlock diff;
55 sbb128(strm1, strm2, cq[carryno], diff);
56 return diff;
57}
58
59static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype cq, int carryno) {
60 BitBlock diff;
61 cq[carryno] = Carry0;
62 sbb128(strm1, strm2, cq[carryno], diff);
63 return diff;
64}
65
66static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, int carryno) {
67 BitBlock markers1;
68 adc128(markers0, charclass, cq[carryno], markers1);
69 return simd_andc(markers1, charclass);
70}
71
72static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype cq, int carryno) {
73 BitBlock markers1;
74 cq[carryno] = Carry0;
75 adc128(markers0, charclass, cq[carryno], markers1);
76 return simd_andc(markers1, charclass);
77}
78
79static inline bool CarryTest(CarryQtype cq, int carryno, int carry_count) {
80  CarryType c1 = cq[carryno];
81  int i;
82  for (i = carryno + 1; i < carryno + carry_count; i++) {
83    c1 = carry_or(c1, cq[i]);
84  }
85  return test_carry(c1);
86}
87
88static inline void CarryDequeueEnqueue(CarryQtype cq, int carryno, int carry_count) {
89  // Given carryin queue with carry_count carries starting from carryno are 0,
90  // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
91  // Nothing to do when the queues are the same!
92  return;
93}
94
95static inline void CarryQ_Adjust(CarryQtype cq, int carry_count) {
96  // Adjust the carryQ so that carries enqueued are readied for dequeiing.
97  // Nothing to do with indexed queues.
98  return;
99}
100
101static inline void CarryCombine(CarryQtype cq, CarryQtype local_cq, int carryno, int carry_count) {
102  int i;
103  for (i = 0; i < carry_count; i++) {
104    cq[carryno+i] = carry_or(cq[carryno+i], local_cq[i]);
105  }
106}
107#endif
108#ifdef SIMD_CARRY_Q
109
110#define CarryQtype SIMD_type
111
112#define CarryDeclare(name, count)\
113  CarryQtype name
114
115#define CarryInit(name, count)\
116  name = simd_const_1(0)
117
118SIMD_type carryQ_ci_mask = sisd_from_int(1);
119SIMD_type carryQ_co_mask = sisd_slli(carryQ_ci_mask, 127);
120
121#define carryQ_adc128_ci_co(x, y, carryQ,  sum) \
122do{ \
123  SIMD_type gen = simd_and(x, y); \
124  SIMD_type prop = simd_or(x, y); \
125  SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask)); \
126  carryQ = simd_srli_64(carryQ, 1); \
127  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
128  sum = simd_add_64(c1, partial); \
129  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); \
130} while(0)
131
132#define carryQ_adc128_co(x, y, carryQ,  sum) \
133do{ \
134  SIMD_type gen = simd_and(x, y); \
135  SIMD_type prop = simd_or(x, y); \
136  SIMD_type partial = simd_add_64(x, y); \
137  carryQ = simd_srli_64(carryQ, 1); \
138  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
139  sum = simd_add_64(c1, partial); \
140  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); \
141} while(0)
142
143#define carryQ_sbb128_ci_co(x, y, carryQ, difference) \
144do {\
145  SIMD_type gen = simd_andc(y, x); \
146  SIMD_type prop = simd_not(simd_xor(x, y)); \
147  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask)); \
148  carryQ = simd_srli_64(carryQ, 1); \
149  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
150  difference = simd_sub_64(partial, b1); \
151  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, difference)), carryQ_co_mask)); \
152}while(0)
153
154#define carryQ_sbb128_co(x, y, carryQ, difference) \
155do {\
156  SIMD_type gen = simd_andc(y, x); \
157  SIMD_type prop = simd_not(simd_xor(x, y)); \
158  SIMD_type partial = simd_sub_64(x, y); \
159  carryQ = simd_srli_64(carryQ, 1); \
160  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
161  difference = simd_sub_64(partial, b1); \
162  carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, difference)), carryQ_co_mask)); \
163}while(0)
164
165#define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\
166do {\
167  SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);\
168  SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);\
169  carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); \
170  SIMD_type shift_out = simd_srli_64(cursor, 63);\
171  SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);\
172  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
173} while(0)
174
175#define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\
176do {\
177  SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);\
178  carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); \
179  SIMD_type shift_out = simd_srli_64(cursor, 63);\
180  SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0));\
181  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
182} while(0)
183
184static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, int carryno) {
185 BitBlock rslt;
186 carryQ_advance_with_carry_ci_co(strm, cq, rslt);
187 return rslt;
188}
189
190static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, int carryno) {
191 BitBlock rslt;
192 carryQ_advance_with_carry_co(strm, cq, rslt);
193 return rslt;
194}
195
196static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
197 BitBlock sum;
198 carryQ_adc128_ci_co(strm1, strm2, cq, sum);
199 return sum;
200}
201
202static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
203 BitBlock sum;
204 carryQ_adc128_co(strm1, strm2, cq, sum);
205 return sum;
206}
207
208static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
209 BitBlock diff;
210 carryQ_sbb128_ci_co(strm1, strm2, cq, diff);
211 return diff;
212}
213
214static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
215 BitBlock diff;
216 carryQ_sbb128_co(strm1, strm2, cq, diff);
217 return diff;
218}
219
220static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) {
221 BitBlock markers1;
222 carryQ_adc128_ci_co(markers0, charclass, cq, markers1);
223 return simd_andc(markers1, charclass);
224}
225
226static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) {
227 BitBlock markers1;
228 carryQ_adc128_co(markers0, charclass, cq, markers1);
229 return simd_andc(markers1, charclass);
230}
231
232typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
233
234static inline bool CarryTest(CarryQtype cq, int carryno, int carry_count) {
235  BitBlock_int64 t;
236  t.bitblock = cq;
237  uint64_t carryQ_top_N_mask = ((1 << carry_count) -1);
238  return t.int64[0] & carryQ_top_N_mask;
239}
240
241static inline void CarryDequeueEnqueue(CarryQtype & cq, int carryno, int carry_count) {
242  // Given carryin queue with carry_count carries starting from carryno are 0,
243  // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
244  cq = sisd_srli(cq, carry_count);
245}
246
247static inline void CarryCombine(CarryQtype & cq, CarryQtype local_cq, int carryno, int carry_count) {
248  cq = simd_or(cq, local_cq);
249}
250
251static inline void CarryQ_Adjust(CarryQtype & cq, int carry_count) {
252  // Adjust the carryQ so that carries enqueued are readied for dequeiing.
253  cq = sisd_srli(cq, (128-carry_count));
254}
255
256
257#endif
258
259#ifdef ADC64_CARRY_Q
260
261//
262// CarryQueue implementation using 64-bit integer queues.
263// A single 64-bit integer holds both the input and output
264// carries, with bits moving right-to-left.   Thus the
265// high bit in the queue is always the next carry to be
266// dequeued; a newly enqueued carry is always inserted as
267// the low bit.
268//
269// The two typical operations for dequeueing and enqueueing
270// carryies from/to a CarryQueue cq are the following.
271// 1.  Dequeueing:  add(cq, cq)
272//     The high carry bit is dequeued and sets the processor
273//     carry flag to be used as a carry-in variable in the
274//     following bitblock operation.   This also shifts cq
275//     right one position, making room for enqueuing a new carry.
276// 2.  Enqueueing:  adc($0, cq)
277//     The carry out value of an operation as recorded in the
278//     processor carry flag is enqueued by adding it in to the
279//     low bit position of cq (this bit will have been cleared
280//     by the dequeue operation.
281
282#define CarryQtype uint64_t
283
284#define CarryDeclare(name, count)\
285CarryQtype name
286
287#define CarryInit(name, count)\
288name = 0
289
290typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
291
292#define double_int64_adc_ci_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \
293   __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \
294         "adc %[e1], %[z1]\n\t" \
295         "adc %[e2], %[z2]\n\t" \
296         "adc $0, %[cq]\n\t" \
297     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
298         : "[z1]" (x1), "[z2]" (x2), \
299           [e1] "r" (y1), [e2] "r" (y2), \
300           "[cq]" (carryQ) \
301         : "cc")
302
303
304#define carryQ_adc128_ci_co(first, second, carryQ, sum) \
305do {\
306  BitBlock_int64 rslt, x, y;\
307  x.bitblock = first;\
308  y.bitblock = second;\
309  double_int64_adc_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\
310  sum = rslt.bitblock;\
311} while(0)
312
313#define double_int64_adc_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \
314   __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \
315         "add %[e1], %[z1]\n\t" \
316         "adc %[e2], %[z2]\n\t" \
317         "adc $0, %[cq]\n\t" \
318     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
319         : "[z1]" (x1), "[z2]" (x2), \
320           [e1] "r" (y1), [e2] "r" (y2), \
321           "[cq]" (carryQ) \
322         : "cc")
323
324
325#define carryQ_adc128_co(first, second, carryQ, sum) \
326do {\
327  BitBlock_int64 rslt, x, y;\
328  x.bitblock = first;\
329  y.bitblock = second;\
330  double_int64_adc_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\
331  sum = rslt.bitblock;\
332} while(0)
333
334
335#define double_int64_sbb_ci_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \
336  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
337        "sbb %[e1], %[z1]\n\t" \
338        "sbb %[e2], %[z2]\n\t" \
339        "adc $0, %[cq]\n\t" \
340     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \
341         : "[z1]" (x1), "[z2]" (x2), \
342           [e1] "r" (y1), [e2] "r" (y2), \
343           "[cq]" (brwQ) \
344         : "cc")
345
346#define carryQ_sbb128_ci_co(first, second, borrowQ, diff) \
347do {\
348  BitBlock_int64 rslt, x, y;\
349  x.bitblock = first;\
350  y.bitblock = second;\
351  double_int64_sbb_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
352                   rslt.int64[0], rslt.int64[1], borrowQ);\
353  diff = rslt.bitblock;\
354} while(0)
355
356#define double_int64_sbb_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \
357  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
358        "sub %[e1], %[z1]\n\t" \
359        "sbb %[e2], %[z2]\n\t" \
360        "adc $0, %[cq]\n\t" \
361     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \
362         : "[z1]" (x1), "[z2]" (x2), \
363           [e1] "r" (y1), [e2] "r" (y2), \
364           "[cq]" (brwQ) \
365         : "cc")
366
367#define carryQ_sbb128_co(first, second, borrowQ, diff) \
368do {\
369  BitBlock_int64 rslt, x, y;\
370  x.bitblock = first;\
371  y.bitblock = second;\
372  double_int64_sbb_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
373                   rslt.int64[0], rslt.int64[1], borrowQ);\
374  diff = rslt.bitblock;\
375} while(0)
376
377#define double_int64_advance_ci_co(x1, x2, rslt1, rslt2, carryQ) \
378  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
379        "adc %[z1], %[z1]\n\t" \
380        "adc %[z2], %[z2]\n\t" \
381        "adc $0, %[cq]\n\t" \
382         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
383         : "[z1]" (x1), "[z2]" (x2), \
384           "[cq]" (carryQ) \
385         : "cc")
386
387#define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\
388do {\
389  BitBlock_int64 x, z;\
390  x.bitblock = cursor;\
391  double_int64_advance_ci_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\
392  rslt = z.bitblock;\
393} while(0)
394
395
396#define double_int64_advance_co(x1, x2, rslt1, rslt2, carryQ) \
397  __asm__  __volatile__ ("add %[cq], %[cq]\n\t" \
398        "add %[z1], %[z1]\n\t" \
399        "adc %[z2], %[z2]\n\t" \
400        "adc $0, %[cq]\n\t" \
401         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \
402         : "[z1]" (x1), "[z2]" (x2), \
403           "[cq]" (carryQ) \
404         : "cc")
405
406#define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\
407do {\
408  BitBlock_int64 x, z;\
409  x.bitblock = cursor;\
410  double_int64_advance_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\
411  rslt = z.bitblock;\
412} while(0)
413
414
415static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, int carryno) {
416        BitBlock rslt;
417        carryQ_advance_with_carry_ci_co(strm, cq, rslt);
418        return rslt;
419}
420
421static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, int carryno) {
422        BitBlock rslt;
423        carryQ_advance_with_carry_co(strm, cq, rslt);
424        return rslt;
425}
426
427static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
428        BitBlock sum;
429        carryQ_adc128_ci_co(strm1, strm2, cq, sum);
430        return sum;
431}
432
433static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
434        BitBlock sum;
435        carryQ_adc128_co(strm1, strm2, cq, sum);
436        return sum;
437}
438
439static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
440        BitBlock diff;
441        carryQ_sbb128_ci_co(strm1, strm2, cq, diff);
442        return diff;
443}
444
445static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) {
446        BitBlock diff;
447        carryQ_sbb128_co(strm1, strm2, cq, diff);
448        return diff;
449}
450
451static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) {
452        BitBlock markers1;
453        carryQ_adc128_ci_co(markers0, charclass, cq, markers1);
454        return simd_andc(markers1, charclass);
455}
456
457static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) {
458        BitBlock markers1;
459        carryQ_adc128_co(markers0, charclass, cq, markers1);
460        return simd_andc(markers1, charclass);
461}
462
463static inline bool CarryTest(CarryQtype cq, int carryno, int carry_count) {
464        print_general_register_64("cq", cq);
465        uint64_t carryQ_top_N_mask = ~(0xFFFFFFFFFFFFFFFFULL >> carry_count);
466        print_general_register_64("mask", carryQ_top_N_mask);
467
468        return (cq & carryQ_top_N_mask) != 0;
469}
470
471static inline void CarryDequeueEnqueue(CarryQtype & cq, int carryno, int carry_count) {
472        // Given carryin queue with carry_count carries starting from carryno are 0,
473        // ensure that the carryout queue has carry_count carries starting from carryno set to 0.
474        cq <<= carry_count;
475}
476
477static inline void CarryCombine(CarryQtype & cq, CarryQtype local_cq, int carryno, int carry_count) {
478        cq |= local_cq;
479}
480
481static inline void CarryQ_Adjust(CarryQtype & cq, int total_carries) {
482        // Adjust the carryQ so that carries enqueued are readied for dequeiing.
483        cq <<= (64-total_carries);
484}
485
486
487#endif
Note: See TracBrowser for help on using the repository browser.