Changeset 808
 Timestamp:
 Dec 11, 2010, 5:13:55 AM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/carryQ.h
r784 r808 5 5 // November 29, 2010  first version without actual queueing. 6 6 // 7 8 #ifndef SIMD_CARRY_Q 7 #ifdef SIMD_CARRY_Q 8 #define CARRY_Q 9 #endif 10 #ifdef ADC64_CARRY_Q 11 #define CARRY_Q 12 #endif 13 14 15 #ifndef CARRY_Q 9 16 #include "block_carry.h" 10 17 … … 249 256 250 257 #endif 258 259 #ifdef ADC64_CARRY_Q 260 261 // 262 // CarryQueue implementation using 64bit integer queues. 263 // A single 64bit integer holds both the input and output 264 // carries, with bits moving righttoleft. Thus the 265 // high bit in the queue is always the next carry to be 266 // dequeued; a newly enqueued carry is always inserted as 267 // the low bit. 268 // 269 // The two typical operations for dequeueing and enqueueing 270 // carryies from/to a CarryQueue cq are the following. 271 // 1. Dequeueing: add(cq, cq) 272 // The high carry bit is dequeued and sets the processor 273 // carry flag to be used as a carryin variable in the 274 // following bitblock operation. This also shifts cq 275 // right one position, making room for enqueuing a new carry. 276 // 2. Enqueueing: adc($0, cq) 277 // The carry out value of an operation as recorded in the 278 // processor carry flag is enqueued by adding it in to the 279 // low bit position of cq (this bit will have been cleared 280 // by the dequeue operation. 281 282 #define CarryQtype uint64_t 283 284 #define CarryDeclare(name, count)\ 285 CarryQtype name 286 287 #define CarryInit(name, count)\ 288 name = 0 289 290 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64; 291 292 #define double_int64_adc_ci_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \ 293 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \ 294 "adc %[e1], %[z1]\n\t" \ 295 "adc %[e2], %[z2]\n\t" \ 296 "adc $0, %[cq]\n\t" \ 297 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \ 298 : "[z1]" (x1), "[z2]" (x2), \ 299 [e1] "r" (y1), [e2] "r" (y2), \ 300 "[cq]" (carryQ) \ 301 : "cc") 302 303 304 #define carryQ_adc128_ci_co(first, second, carryQ, sum) \ 305 do {\ 306 BitBlock_int64 rslt, x, y;\ 307 x.bitblock = first;\ 308 y.bitblock = second;\ 309 double_int64_adc_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\ 310 sum = rslt.bitblock;\ 311 } while(0) 312 313 #define double_int64_adc_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \ 314 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \ 315 "add %[e1], %[z1]\n\t" \ 316 "adc %[e2], %[z2]\n\t" \ 317 "adc $0, %[cq]\n\t" \ 318 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \ 319 : "[z1]" (x1), "[z2]" (x2), \ 320 [e1] "r" (y1), [e2] "r" (y2), \ 321 "[cq]" (carryQ) \ 322 : "cc") 323 324 325 #define carryQ_adc128_co(first, second, carryQ, sum) \ 326 do {\ 327 BitBlock_int64 rslt, x, y;\ 328 x.bitblock = first;\ 329 y.bitblock = second;\ 330 double_int64_adc_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\ 331 sum = rslt.bitblock;\ 332 } while(0) 333 334 335 #define double_int64_sbb_ci_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \ 336 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \ 337 "sbb %[e1], %[z1]\n\t" \ 338 "sbb %[e2], %[z2]\n\t" \ 339 "adc $0, %[cq]\n\t" \ 340 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \ 341 : "[z1]" (x1), "[z2]" (x2), \ 342 [e1] "r" (y1), [e2] "r" (y2), \ 343 "[cq]" (brwQ) \ 344 : "cc") 345 346 #define carryQ_sbb128_ci_co(first, second, borrowQ, diff) \ 347 do {\ 348 BitBlock_int64 rslt, x, y;\ 349 x.bitblock = first;\ 350 y.bitblock = second;\ 351 double_int64_sbb_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \ 352 rslt.int64[0], rslt.int64[1], borrowQ);\ 353 diff = rslt.bitblock;\ 354 } while(0) 355 356 #define double_int64_sbb_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \ 357 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \ 358 "sub %[e1], %[z1]\n\t" \ 359 "sbb %[e2], %[z2]\n\t" \ 360 "adc $0, %[cq]\n\t" \ 361 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \ 362 : "[z1]" (x1), "[z2]" (x2), \ 363 [e1] "r" (y1), [e2] "r" (y2), \ 364 "[cq]" (brwQ) \ 365 : "cc") 366 367 #define carryQ_sbb128_co(first, second, borrowQ, diff) \ 368 do {\ 369 BitBlock_int64 rslt, x, y;\ 370 x.bitblock = first;\ 371 y.bitblock = second;\ 372 double_int64_sbb_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \ 373 rslt.int64[0], rslt.int64[1], borrowQ);\ 374 diff = rslt.bitblock;\ 375 } while(0) 376 377 #define double_int64_advance_ci_co(x1, x2, rslt1, rslt2, carryQ) \ 378 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \ 379 "adc %[z1], %[z1]\n\t" \ 380 "adc %[z2], %[z2]\n\t" \ 381 "adc $0, %[cq]\n\t" \ 382 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \ 383 : "[z1]" (x1), "[z2]" (x2), \ 384 "[cq]" (carryQ) \ 385 : "cc") 386 387 #define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\ 388 do {\ 389 BitBlock_int64 x, z;\ 390 x.bitblock = cursor;\ 391 double_int64_advance_ci_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\ 392 rslt = z.bitblock;\ 393 } while(0) 394 395 396 #define double_int64_advance_co(x1, x2, rslt1, rslt2, carryQ) \ 397 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \ 398 "add %[z1], %[z1]\n\t" \ 399 "adc %[z2], %[z2]\n\t" \ 400 "adc $0, %[cq]\n\t" \ 401 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \ 402 : "[z1]" (x1), "[z2]" (x2), \ 403 "[cq]" (carryQ) \ 404 : "cc") 405 406 #define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\ 407 do {\ 408 BitBlock_int64 x, z;\ 409 x.bitblock = cursor;\ 410 double_int64_advance_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\ 411 rslt = z.bitblock;\ 412 } while(0) 413 414 415 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, int carryno) { 416 BitBlock rslt; 417 carryQ_advance_with_carry_ci_co(strm, cq, rslt); 418 return rslt; 419 } 420 421 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, int carryno) { 422 BitBlock rslt; 423 carryQ_advance_with_carry_co(strm, cq, rslt); 424 return rslt; 425 } 426 427 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) { 428 BitBlock sum; 429 carryQ_adc128_ci_co(strm1, strm2, cq, sum); 430 return sum; 431 } 432 433 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) { 434 BitBlock sum; 435 carryQ_adc128_co(strm1, strm2, cq, sum); 436 return sum; 437 } 438 439 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) { 440 BitBlock diff; 441 carryQ_sbb128_ci_co(strm1, strm2, cq, diff); 442 return diff; 443 } 444 445 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, int carryno) { 446 BitBlock diff; 447 carryQ_sbb128_co(strm1, strm2, cq, diff); 448 return diff; 449 } 450 451 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) { 452 BitBlock markers1; 453 carryQ_adc128_ci_co(markers0, charclass, cq, markers1); 454 return simd_andc(markers1, charclass); 455 } 456 457 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, int carryno) { 458 BitBlock markers1; 459 carryQ_adc128_co(markers0, charclass, cq, markers1); 460 return simd_andc(markers1, charclass); 461 } 462 463 static inline bool CarryTest(CarryQtype cq, int carryno, int carry_count) { 464 print_general_register_64("cq", cq); 465 uint64_t carryQ_top_N_mask = ~(0xFFFFFFFFFFFFFFFFULL >> carry_count); 466 print_general_register_64("mask", carryQ_top_N_mask); 467 468 return (cq & carryQ_top_N_mask) != 0; 469 } 470 471 static inline void CarryDequeueEnqueue(CarryQtype & cq, int carryno, int carry_count) { 472 // Given carryin queue with carry_count carries starting from carryno are 0, 473 // ensure that the carryout queue has carry_count carries starting from carryno set to 0. 474 cq <<= carry_count; 475 } 476 477 static inline void CarryCombine(CarryQtype & cq, CarryQtype local_cq, int carryno, int carry_count) { 478 cq = local_cq; 479 } 480 481 static inline void CarryQ_Adjust(CarryQtype & cq, int total_carries) { 482 // Adjust the carryQ so that carries enqueued are readied for dequeiing. 483 cq <<= (64total_carries); 484 } 485 486 487 #endif
Note: See TracChangeset
for help on using the changeset viewer.