Ignore:
Timestamp:
Feb 3, 2017, 1:25:53 PM (2 years ago)
Author:
nmedfort
Message:

Partial removal of BlockNo?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5292 r5297  
    100100    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
    101101
    102     Value * blockNo = getBlockNo();
    103 
    104102    // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
    105     Value * sourcePackPtr = getStream("sourceStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(0));
    106 
    107     Value * outputGenerated = getProducedItemCount("expandedStream"); // bytes previously generated to output
    108     Value * outputBlockNo = iBuilder->CreateUDiv(outputGenerated, stride);
    109     Value * outputPackPtr = getStream("expandedStream", outputBlockNo, iBuilder->getInt32(0), iBuilder->getInt32(0));
     103    Value * sourcePackPtr = getInputStream("sourceStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     104    Value * outputPackPtr = getOutputStream("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
    110105
    111106    Value * hasFullLoop = iBuilder->CreateICmpUGE(loopItemsToDo, triplePackSize);
     
    124119    Value * pack0 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(loopInput_ptr, packAlign));
    125120    Value * expand0 = iBuilder->bitCast(iBuilder->CreateShuffleVector(undefPack, pack0, expand_3_4_shuffle[0]));
    126     iBuilder->CreateAlignedStore(expand0, loopOutput_ptr, packAlign);
     121    iBuilder->CreateBlockAlignedStore(expand0, loopOutput_ptr);
    127122    // Step 2 of the main loop.
    128123    Value * inPack1_ptr = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(1));
     
    130125    Value * pack1 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(inPack1_ptr, packAlign));
    131126    Value * expand1 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack0, pack1, expand_3_4_shuffle[1]));
    132     iBuilder->CreateAlignedStore(expand1, outPack1_ptr, packAlign);
     127    iBuilder->CreateBlockAlignedStore(expand1, outPack1_ptr);
    133128    // Step 3 of the main loop.
    134129    Value * inPack2_ptr = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(2));
     
    136131    Value * pack2 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(inPack2_ptr, packAlign));
    137132    Value * expand2 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack1, pack2, expand_3_4_shuffle[2]));
    138     iBuilder->CreateAlignedStore(expand2, outPack2_ptr, packAlign);
     133    iBuilder->CreateBlockAlignedStore(expand2, outPack2_ptr);
    139134    Value * outPack3_ptr = iBuilder->CreateGEP(loopOutput_ptr, iBuilder->getInt32(3));
    140135    Value * expand3 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack2, undefPack, expand_3_4_shuffle[3]));
    141     iBuilder->CreateAlignedStore(expand3, outPack3_ptr, packAlign);
     136    iBuilder->CreateBlockAlignedStore(expand3, outPack3_ptr);
    142137
    143138    Value * loopNextInputPack = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(3));
     
    262257//                                   ba    bits to move 12 positions left
    263258//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    264 Value* radix64Kernel::processPackData(llvm::Value* bytepack) const {
     259inline Value * radix64Kernel::processPackData(llvm::Value * bytepack) const {
     260
    265261    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
     262    Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
     263
    266264    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     265    Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
     266    Value * mid = iBuilder->simd_or(right_6_result, left_8_result);
     267
    267268    Value * step_right_4 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0000F000));
     269    Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     270    mid = iBuilder->simd_or(mid, right_4_result);
     271
    268272    Value * step_left_10 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000F00));
     273    Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
     274    mid = iBuilder->simd_or(mid, left_10_result);
     275
    269276    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
     277    Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
     278    mid = iBuilder->simd_or(mid, right_2_result);
     279
    270280    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
    271 
    272     Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
    273     Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
    274     Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
    275     Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
    276     Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
    277281    Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
    278 
    279     Value * mid = right_6_result;
    280     mid = iBuilder->simd_or(mid, right_4_result);
    281     mid = iBuilder->simd_or(mid, right_2_result);
    282     mid = iBuilder->simd_or(mid, left_8_result);
    283     mid = iBuilder->simd_or(mid, left_10_result);
    284282    mid = iBuilder->simd_or(mid, left_12_result);
    285     Value * radix64pack = iBuilder->bitCast(mid);
    286     return radix64pack;
    287 }
    288 
    289 void radix64Kernel::generateDoBlockMethod(Value * blockNo) {
     283
     284    return iBuilder->bitCast(mid);
     285}
     286
     287void radix64Kernel::generateDoBlockMethod() {
    290288    for (unsigned i = 0; i < 8; i++) {
    291         Value * expandedStream = getStream("expandedStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     289        Value * expandedStream = getInputStream("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    292290        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream);
    293291        Value * radix64pack = processPackData(bytepack);
    294         Value * radix64stream = getStream("radix64stream",blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     292        Value * radix64stream = getOutputStream("radix64stream",iBuilder->getInt32(0), iBuilder->getInt32(i));
    295293        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream);
    296294    }
     
    300298}
    301299
    302 void radix64Kernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     300void radix64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
    303301
    304302    BasicBlock * entry = iBuilder->GetInsertBlock();
     
    321319    loopRemain->addIncoming(remainingBytes, entry);
    322320
    323     Value * expandedStreamLoopPtr = getStream("expandedStream", blockNo, iBuilder->getInt32(0), idx);
     321    Value * expandedStreamLoopPtr = getInputStream("expandedStream", iBuilder->getInt32(0), idx);
    324322    Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStreamLoopPtr);
    325323    Value * radix64pack = processPackData(bytepack);
    326324
    327     Value * radix64streamPtr = getStream("radix64stream", blockNo, iBuilder->getInt32(0), idx);
     325    Value * radix64streamPtr = getOutputStream("radix64stream", iBuilder->getInt32(0), idx);
    328326    iBuilder->CreateBlockAlignedStore(radix64pack, radix64streamPtr);
    329327
     
    347345}
    348346
    349 llvm::Value* base64Kernel::processPackData(llvm::Value* bytepack) const {
     347inline llvm::Value* base64Kernel::processPackData(llvm::Value* bytepack) const {
    350348    Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(25)));
    351349    Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, iBuilder->getInt8(51)));
     
    362360    Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, iBuilder->getInt8(75))));
    363361    Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, iBuilder->getInt8(15))));
    364     Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(12))));
    365     return base64pack;
    366 }
    367 
    368 void base64Kernel::generateDoBlockMethod(Value * blockNo) {
     362    return iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, iBuilder->getInt8(12))));
     363}
     364
     365void base64Kernel::generateDoBlockMethod() {
    369366    for (unsigned i = 0; i < 8; i++) {
    370         Value * radix64stream_ptr = getStream("radix64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     367        Value * radix64stream_ptr = getInputStream("radix64stream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    371368        Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64stream_ptr);
    372 
    373369        Value* base64pack = processPackData(bytepack);
    374 
    375         Value * base64stream_ptr = getStream("base64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     370        Value * base64stream_ptr = getOutputStream("base64stream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    376371        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64stream_ptr);
    377372    }
     
    384379// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
    385380// number of radix 64 values
    386 void base64Kernel::generateFinalBlockMethod(Value * remainingBytes, Value * blockNo) {
     381void base64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
    387382
    388383    BasicBlock * entry = iBuilder->GetInsertBlock();
     
    407402    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
    408403    loopRemain->addIncoming(remainingBytes, entry);
    409     Value * radix64streamPtr = getStream("radix64stream", blockNo, iBuilder->getInt32(0), idx);
     404    Value * radix64streamPtr = getInputStream("radix64stream", iBuilder->getInt32(0), idx);
    410405    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
    411406    Value * base64pack = processPackData(bytepack);
    412     Value * base64streamPtr = getStream("base64stream", blockNo, iBuilder->getInt32(0), idx);
     407    Value * base64streamPtr = getOutputStream("base64stream", iBuilder->getInt32(0), idx);
    413408    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64streamPtr);
    414409    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
     
    423418
    424419    iBuilder->SetInsertPoint(doPadding);
    425     Value * i8output_ptr = getStreamView(iBuilder->getInt8PtrTy(), "base64stream", blockNo, iBuilder->getInt32(0));
     420    Value * i8output_ptr = getStreamView(iBuilder->getInt8PtrTy(), "base64stream", getBlockNo(), iBuilder->getInt32(0));
    426421    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8output_ptr, remainingBytes));
    427422    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(3)), fbExit, doPadding2);
     
    437432expand3_4Kernel::expand3_4Kernel(IDISA::IDISA_Builder * iBuilder)
    438433: SegmentOrientedKernel(iBuilder, "expand3_4",
    439               {Binding{iBuilder->getStreamSetTy(1, 8), "sourceStream"}},
    440               {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
    441               {}, {}, {}) {
     434            {Binding{iBuilder->getStreamSetTy(1, 8), "sourceStream"}},
     435            {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
     436            {}, {}, {}) {
    442437    setDoBlockUpdatesProducedItemCountsAttribute(true);
    443438}
    444439
    445440radix64Kernel::radix64Kernel(IDISA::IDISA_Builder * iBuilder)
    446 : BlockOrientedKernel(iBuilder, "radix64", {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}}, {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}}, {}, {}, {}) {
     441: BlockOrientedKernel(iBuilder, "radix64",
     442            {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
     443            {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
     444            {}, {}, {}) {
    447445    setDoBlockUpdatesProducedItemCountsAttribute(true);
    448446}
    449447
    450448base64Kernel::base64Kernel(IDISA::IDISA_Builder * iBuilder)
    451 : BlockOrientedKernel(iBuilder, "base64", {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}}, {Binding{iBuilder->getStreamSetTy(1, 8), "base64stream"}}, {}, {}, {}) {
     449: BlockOrientedKernel(iBuilder, "base64",
     450            {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
     451            {Binding{iBuilder->getStreamSetTy(1, 8), "base64stream"}},
     452            {}, {}, {}) {
    452453    setDoBlockUpdatesProducedItemCountsAttribute(true);
    453454}
Note: See TracChangeset for help on using the changeset viewer.