Changeset 5288


Ignore:
Timestamp:
Jan 30, 2017, 5:21:46 PM (8 months ago)
Author:
xwa163
Message:

fix bug of base64 related kernels, add test script for base64

Location:
icGREP/icgrep-devel
Files:
3 added
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5278 r5288  
    210210  COMMAND python run_all.py -e ${CMAKE_BINARY_DIR})
    211211
     212add_test(
     213  NAME base64_test
     214  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA/base64
     215  COMMAND python base64test.py ${CMAKE_BINARY_DIR}/base64)
     216
    212217add_custom_target (u8u16_test
    213218  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA/u8u16
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5286 r5288  
    262262//                                   ba    bits to move 12 positions left
    263263//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    264 void radix64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
     264Value* radix64Kernel::processPackData(llvm::Value* bytepack) const {
    265265    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
    266266    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     
    269269    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
    270270    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
    271     for (unsigned i = 0; i < 8; i++) {
    272         Value * expandedStream = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    273         Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream);
    274         Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
    275         Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
    276         Value * mid = iBuilder->simd_or(right_6_result, right_4_result);
    277         Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
    278         mid = iBuilder->simd_or(mid, right_2_result);
    279         Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
    280         mid = iBuilder->simd_or(mid, left_8_result);
    281         Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
    282         mid = iBuilder->simd_or(mid, left_10_result);
    283         Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
    284         mid = iBuilder->simd_or(mid, left_12_result);
    285         Value * radix64pack = iBuilder->bitCast(mid);
    286         Value * radix64stream = getStream(self, "radix64stream",blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    287         iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream);
    288     }
    289     Value * produced = getProducedItemCount(self, "radix64stream");
    290     produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
    291     setProducedItemCount(self, "radix64stream", produced);
    292 }
    293 
    294 void radix64Kernel::generateFinalBlockMethod(Function * function, Value *self, Value * remainingBytes, Value * blockNo) const {
    295 
    296     BasicBlock * entry = iBuilder->GetInsertBlock();
    297     BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", function, 0);
    298     BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", function, 0);
    299     BasicBlock * handleRemainFirstByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainFirstByte", function, 0);
    300     BasicBlock * handleRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainSecondByte", function, 0);
    301     BasicBlock * handleNoRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleNoRemainSecondByte", function, 0);
    302     BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", function, 0);
    303     // Final Block arguments: self, remaining.
    304     Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
    305 
    306     const unsigned PACK_SIZE = iBuilder->getStride()/8;
    307     Constant * packSize = iBuilder->getSize(PACK_SIZE);
    308 
    309     Value * step_right_6 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00C00000));
    310     Value * step_left_8 = iBuilder->simd_fill(32, iBuilder->getInt32(0x003F0000));
    311     Value * step_right_4 = iBuilder->simd_fill(32, iBuilder->getInt32(0x0000F000));
    312     Value * step_left_10 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00000F00));
    313     Value * step_right_2 = iBuilder->simd_fill(32, iBuilder->getInt32(0x000000FC));
    314     Value * step_left_12 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00000003));
    315 
    316     // Enter the loop only if there is at least one byte remaining to process.
    317     iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, radix64_loop);
    318 
    319     iBuilder->SetInsertPoint(radix64_loop);
    320     PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    321     PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    322     idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
    323     loopRemain->addIncoming(remainingBytes, entry);
    324 
    325     Value * expandedStreamLoopPtr = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), idx);
    326     Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStreamLoopPtr);
     271
    327272    Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
    328273    Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     
    339284    mid = iBuilder->simd_or(mid, left_12_result);
    340285    Value * radix64pack = iBuilder->bitCast(mid);
     286    return radix64pack;
     287}
     288
     289void radix64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
     290    for (unsigned i = 0; i < 8; i++) {
     291        Value * expandedStream = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     292        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream);
     293        Value * radix64pack = processPackData(bytepack);
     294        Value * radix64stream = getStream(self, "radix64stream",blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     295        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream);
     296    }
     297    Value * produced = getProducedItemCount(self, "radix64stream");
     298    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
     299    setProducedItemCount(self, "radix64stream", produced);
     300}
     301
     302void radix64Kernel::generateFinalBlockMethod(Function * function, Value *self, Value * remainingBytes, Value * blockNo) const {
     303
     304    BasicBlock * entry = iBuilder->GetInsertBlock();
     305    BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", function, 0);
     306    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", function, 0);
     307    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", function, 0);
     308    // Final Block arguments: self, remaining.
     309    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
     310
     311    const unsigned PACK_SIZE = iBuilder->getStride()/8;
     312    Constant * packSize = iBuilder->getSize(PACK_SIZE);
     313
     314    // Enter the loop only if there is at least one byte remaining to process.
     315    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, radix64_loop);
     316
     317    iBuilder->SetInsertPoint(radix64_loop);
     318    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
     319    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     320    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
     321    loopRemain->addIncoming(remainingBytes, entry);
     322
     323    Value * expandedStreamLoopPtr = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), idx);
     324    Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStreamLoopPtr);
     325    Value * radix64pack = processPackData(bytepack);
    341326
    342327    Value * radix64streamPtr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), idx);
     
    348333    loopRemain->addIncoming(remainAfterLoop, radix64_loop);
    349334
    350     Value* continueLoop = iBuilder->CreateICmpULT(remainAfterLoop, packSize);
     335    Value* continueLoop = iBuilder->CreateICmpSGT(remainAfterLoop, iBuilder->getInt64(0));
     336
    351337    iBuilder->CreateCondBr(continueLoop, radix64_loop, loopExit);
    352338
    353339    iBuilder->SetInsertPoint(loopExit);
    354     // All base64 data has been computed, but we may need to set one or two '=' padding bytes.
    355     iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(0)), fbExit, handleRemainFirstByte);
    356     iBuilder->SetInsertPoint(handleRemainFirstByte);
    357     // At least one padding byte required.
    358     Value * i8input_ptr = getStreamView(iBuilder->getInt8PtrTy(), self, "expandedStream", blockNo, iBuilder->getInt32(0));
    359     Value * remainOutputStart = iBuilder->CreateSub(remainingBytes, remainMod4);
    360 
    361     Value * firstRemainByte = iBuilder->CreateLoad(i8input_ptr);
    362 
    363     Value * first_move_right_2_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0xFC);
    364     Value * first_output_byte = iBuilder->CreateLShr(iBuilder->CreateAnd(firstRemainByte, first_move_right_2_mask), 2);
    365 
    366     Value * first_move_left_4_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0x03);
    367     Value * first_move_left_4_byte = iBuilder->CreateShl(iBuilder->CreateAnd(firstRemainByte, first_move_left_4_mask), 4);
    368 
    369 
    370     Value * i8OutPtr0 = getStreamView(iBuilder->getInt8PtrTy(), self, "radix64stream", blockNo, remainOutputStart);
    371 
    372     iBuilder->CreateStore(first_output_byte, i8OutPtr0);
    373 
    374     iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(1)), handleNoRemainSecondByte, handleRemainSecondByte);
    375     iBuilder->SetInsertPoint(handleRemainSecondByte);
    376 
    377     Value * secondRemainByte = iBuilder->CreateLoad(iBuilder->CreateGEP(i8input_ptr, iBuilder->getInt32(1)));
    378     Value * second_move_right_4_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0xF0);
    379     Value * second_move_right_4_byte = iBuilder->CreateLShr(iBuilder->CreateAnd(secondRemainByte, second_move_right_4_mask), 4);
    380     Value * second_output_byte = iBuilder->CreateOr(first_move_left_4_byte, second_move_right_4_byte);
    381 
    382     Value * i8OutPtr1 = getStreamView(iBuilder->getInt8PtrTy(), self, "radix64stream", blockNo, iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(1)));
    383 
    384     iBuilder->CreateStore(second_output_byte, i8OutPtr1);
    385 
    386     Value * second_move_left_2_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0x0F);
    387     Value * second_move_left_2_byte = iBuilder->CreateShl(iBuilder->CreateAnd(secondRemainByte, second_move_left_2_mask), 2);
    388 
    389     Value * i8OutPtr2 = getStreamView(iBuilder->getInt8PtrTy(), self, "radix64stream", blockNo, iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(2)));
    390 
    391     iBuilder->CreateStore(second_move_left_2_byte, i8OutPtr2);
    392     iBuilder->CreateBr(fbExit);
    393 
    394     iBuilder->SetInsertPoint(handleNoRemainSecondByte);
    395 
    396     i8OutPtr1 = getStreamView(iBuilder->getInt8PtrTy(), self, "radix64stream", blockNo, iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(1)));
    397 
    398     iBuilder->CreateStore(first_move_left_4_byte, i8OutPtr1);
     340
    399341    iBuilder->CreateBr(fbExit);
    400342
     
    405347}
    406348
     349llvm::Value* base64Kernel::processPackData(llvm::Value* bytepack) const {
     350    Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(25)));
     351    Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(51)));
     352    Value * mask_eq_62 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(62)));
     353    Value * mask_eq_63 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(63)));
     354    // Strategy:
     355    // 1. add ord('A') = 65 to all radix64 values, this sets the correct values for entries 0 to 25.
     356    // 2. add ord('a') - ord('A') - (26 - 0) = 6 to all values >25, this sets the correct values for entries 0 to 51
     357    // 3. subtract ord('a') - ord('0') + (52 - 26) = 75 to all values > 51, this sets the correct values for entries 0 to 61
     358    // 4. subtract ord('0') - ord('+') + (62 - 52) = 15 for all values = 62
     359    // 4. add ord('/') - ord('0') - (63 - 52) = 3 for all values = 63
     360    Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8('A')));
     361    Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, iBuilder->getInt8(6))));
     362    Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, iBuilder->getInt8(75))));
     363    Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, iBuilder->getInt8(15))));
     364    Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(12))));
     365    return base64pack;
     366}
     367
    407368void base64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    408369    for (unsigned i = 0; i < 8; i++) {
    409370        Value * radix64stream_ptr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    410371        Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64stream_ptr);
    411         Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(25)));
    412         Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(51)));
    413         Value * mask_eq_62 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(62)));
    414         Value * mask_eq_63 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(63)));
    415         // Strategy:
    416         // 1. add ord('A') = 65 to all radix64 values, this sets the correct values for entries 0 to 25.
    417         // 2. add ord('a') - ord('A') - (26 - 0) = 6 to all values >25, this sets the correct values for entries 0 to 51
    418         // 3. subtract ord('a') - ord('0') + (52 - 26) = 75 to all values > 51, this sets the correct values for entries 0 to 61
    419         // 4. subtract ord('0') - ord('+') + (62 - 52) = 15 for all values = 62
    420         // 4. subtract ord('0') - ord('/') + (63 - 62) = 2 for all values = 63
    421         Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8('A')));
    422         Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, iBuilder->getInt8(6))));
    423         Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, iBuilder->getInt8(75))));
    424         Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, iBuilder->getInt8(15))));
    425         Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(2))));
     372
     373        Value* base64pack = processPackData(bytepack);
     374
    426375        Value * base64stream_ptr = getStream(self, "base64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    427376        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64stream_ptr);
     
    460409    Value * radix64streamPtr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), idx);
    461410    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
    462     Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(25)));
    463     Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(51)));
    464     Value * mask_eq_62 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(62)));
    465     Value * mask_eq_63 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(63)));
    466     Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8('A')));
    467     Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, iBuilder->getInt8(6))));
    468     Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, iBuilder->getInt8(75))));
    469     Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, iBuilder->getInt8(15))));
    470     Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(2))));
     411    Value * base64pack = processPackData(bytepack);
    471412    Value * base64streamPtr = getStream(self, "base64stream", blockNo, iBuilder->getInt32(0), idx);
    472413    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64streamPtr);
     
    475416    loopRemain->addIncoming(remainAfterLoop, base64_loop);
    476417
    477     Value* continueLoop = iBuilder->CreateICmpULT(remainAfterLoop, packSize);
     418    Value* continueLoop = iBuilder->CreateICmpSGT(remainAfterLoop, iBuilder->getInt64(0));
    478419    iBuilder->CreateCondBr(continueLoop, base64_loop, loopExit);
    479420
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5286 r5288  
    3232    virtual void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    3333    virtual void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
     34    llvm::Value* processPackData(llvm::Value* packData) const;
    3435};
    3536
     
    4041    virtual void generateFinalBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * remainingBytes, llvm::Value * blockNo) const override;
    4142    virtual void generateDoBlockMethod(llvm::Function * function, llvm::Value * self, llvm::Value * blockNo) const override;
    42    
     43    llvm::Value* processPackData(llvm::Value* packData) const;
    4344};
    4445
Note: See TracChangeset for help on using the changeset viewer.