Changeset 4792
 Timestamp:
 Sep 25, 2015, 8:55:01 AM (3 years ago)
 Location:
 icGREP/icgrepdevel/icgrep
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/do_grep.cpp
r4790 r4792 267 267 //Final Partial Block (may be empty, but there could be carries pending). 268 268 269 269 270 const auto EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE  remaining)); 270 271 271 s2p_do_final_block(reinterpret_cast<BytePack *>(mFileBuffer + (blk * BLOCK_SIZE) + (segment * SEGMENT_SIZE)), basis_bits, EOF_mask); 272 if (remaining == 0) { // No data, we may be at a page boundary. Do not access memory. 273 basis_bits.bit_0 = simd<1>::constant<0>(); 274 basis_bits.bit_1 = simd<1>::constant<0>(); 275 basis_bits.bit_2 = simd<1>::constant<0>(); 276 basis_bits.bit_3 = simd<1>::constant<0>(); 277 basis_bits.bit_4 = simd<1>::constant<0>(); 278 basis_bits.bit_5 = simd<1>::constant<0>(); 279 basis_bits.bit_6 = simd<1>::constant<0>(); 280 basis_bits.bit_7 = simd<1>::constant<0>(); 281 } 282 else { // At least 1 byte, so we are not at a page boundary yet, safe to access a full block. 283 s2p_do_final_block(reinterpret_cast<BytePack *>(mFileBuffer + (blk * BLOCK_SIZE) + (segment * SEGMENT_SIZE)), basis_bits, EOF_mask); 284 } 272 285 273 286 if (finalLineIsUnterminated()) { 
icGREP/icgrepdevel/icgrep/pablo/carry_manager.cpp
r4750 r4792 319 319 320 320 321 // Use field size 32 for BLOCK_SIZE 256, so that signmasks are i8. 322 #if (BLOCK_SIZE==256) 323 //#define PARALLEL_LONG_ADD 324 #define PARALLEL_LONG_ADD_DIGIT_SIZE 32 325 #endif 326 321 327 /* Methods for getting and setting individual carry values. */ 322 328 … … 334 340 void CarryManager::setCarryOpCarryOut(unsigned localIndex, Value * carry_out_strm) { 335 341 unsigned posn = carryOpPosition(localIndex); 342 #ifndef PARALLEL_LONG_ADD 336 343 if (mITEMS_PER_PACK > 1) {// #ifdef PACKING 337 344 extractAndSaveCarryOutBits(carry_out_strm, posn, 1); … … 344 351 } 345 352 } 346 } 347 353 #else 354 if (mITEMS_PER_PACK > 1) {// #ifdef PACKING 355 // Carry is at low bit position 356 unsigned packIndex = posn / mPACK_SIZE; 357 unsigned packOffset = posn % mPACK_SIZE; 358 Value * field = mBuilder>CreateZExt(carry_out_strm, mBuilder>getIntNTy(mPACK_SIZE)); 359 if (packOffset != 0) { 360 field = mBuilder>CreateShl(field, mBuilder>getInt64(packOffset)); 361 } 362 if (mCarryOutPack[packIndex] == nullptr) { 363 mCarryOutPack[packIndex] = field; 364 } 365 else { 366 mCarryOutPack[packIndex] = mBuilder>CreateOr(mCarryOutPack[packIndex], field); 367 } 368 } 369 else { 370 Value * carry_bit = mBuilder>CreateZExt(carry_out_strm, mBuilder>getIntNTy(BLOCK_SIZE)); 371 mCarryOutPack[posn] = mBuilder>CreateBitCast(carry_bit, mBitBlockType); 372 if (mCarryInfo>getWhileDepth() == 0) { 373 storeCarryPack(posn); 374 } 375 } 376 377 378 #endif 379 } 380 381 382 348 383 Value* CarryManager::genShiftLeft64(Value* e) { 349 384 Value* i128_val = mBuilder>CreateBitCast(e, mBuilder>getIntNTy(BLOCK_SIZE)); … … 351 386 } 352 387 388 Value* MatchStar(IRBuilder<> * b, Value * m, Value * c) { 389 return b>CreateOr(b>CreateXor(b>CreateAdd(b>CreateAnd(m, c), c), c), m); 390 } 391 353 392 Value * CarryManager::addCarryInCarryOut(int localIndex, Value* e1, Value* e2) { 354 393 #if (BLOCK_SIZE == 128) … … 365 404 setCarryOpCarryOut(localIndex, carry_out_strm); 366 405 return sum; 406 #elif (defined(PARALLEL_LONG_ADD)) 407 //BLOCK_SIZE == 256, there is no other implementation 408 Type * longAddVectorType = VectorType::get(mBuilder>getIntNTy(PARALLEL_LONG_ADD_DIGIT_SIZE), BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE); 409 Type * longAddBitMaskIntegerType = mBuilder>getIntNTy(BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE); 410 Type * longAddBitMaskVectorType = VectorType::get(mBuilder>getIntNTy(1), BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE); 411 // double the mask size to allow room for carryout. 412 Type * longAddBitMaskManipulationType = mBuilder>getIntNTy(2 * BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE); 413 Value * all_ones = Constant::getAllOnesValue(longAddVectorType); 414 Value * carryin = iBuilder>mvmd_extract(2 * BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE, getCarryOpCarryIn(localIndex), 0); 415 Value * carrygen = mBuilder>CreateAnd(e1, e2, "carrygen"); 416 Value * carryprop = mBuilder>CreateOr(e1, e2, "carryprop"); 417 // Sum individual digits. 418 Value * digitsum = iBuilder>simd_add(PARALLEL_LONG_ADD_DIGIT_SIZE, e1, e2); 419 Value * digitcarry = mBuilder>CreateOr(carrygen, mBuilder>CreateAnd(carryprop, mBuilder>CreateNot(digitsum))); 420 Value * carry_mask = mBuilder>CreateZExt(iBuilder>hsimd_signmask(PARALLEL_LONG_ADD_DIGIT_SIZE, digitcarry), longAddBitMaskManipulationType); 421 Value * bubble_fields = iBuilder>simd_eq(PARALLEL_LONG_ADD_DIGIT_SIZE, digitsum, all_ones); 422 Value * bubble_mask = mBuilder>CreateZExt(iBuilder>hsimd_signmask(PARALLEL_LONG_ADD_DIGIT_SIZE, bubble_fields), longAddBitMaskManipulationType); 423 Value * carry_markers = mBuilder>CreateAdd(mBuilder>CreateAdd(carry_mask, carry_mask), carryin); 424 Value * increments = MatchStar(mBuilder, carry_markers, bubble_mask); 425 Value * carry_out = mBuilder>CreateLShr(increments, BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE); 426 Value * spread = mBuilder>CreateZExt(mBuilder>CreateBitCast(mBuilder>CreateTrunc(increments, longAddBitMaskIntegerType), longAddBitMaskVectorType), longAddVectorType); 427 Value* sum = iBuilder>simd_add(PARALLEL_LONG_ADD_DIGIT_SIZE, digitsum, spread); 428 setCarryOpCarryOut(localIndex, carry_out); 429 return sum; 367 430 #else 368 //BLOCK_SIZE == 256, there is no otherimplementation431 //BLOCK_SIZE == 256, default implementation 369 432 Value * carryq_value = getCarryOpCarryIn(localIndex); 370 433 Value* carrygen = mBuilder>CreateAnd(e1, e2, "carrygen"); … … 459 522 } 460 523 */ 461 462 524 463 525 Value * CarryManager::longAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * carry_out) {
Note: See TracChangeset
for help on using the changeset viewer.