Changeset 5232


Ignore:
Timestamp:
Dec 19, 2016, 4:09:38 PM (2 years ago)
Author:
xwa163
Message:

Add based64 related kernels

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5227 r5232  
    110110add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp ${PRECOMPILED_FILES})
    111111add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
     112add_executable(base64 base64.cpp kernels/radix64.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
    112113add_executable(wc wc.cpp toolchain.cpp kernels/pipeline.cpp)
    113114add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp toolchain.cpp kernels/pipeline.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp)
     
    126127target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    127128target_link_libraries (u8u16 UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     129target_link_libraries (base64 UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    128130target_link_libraries (wc UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    129131target_link_libraries (editd UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5219 r5232  
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
    5 #include "expand3_4.h"
     5#include "radix64.h"
     6//#include "expand3_4.h"
    67#include <kernels/kernel.h>
    78#include <IDISA/idisa_builder.h>
     
    7980        }
    8081        expand_3_4_shuffle[j] = ConstantVector::get(Idxs);
    81     }   
     82    }
    8283    Constant * Const3 = ConstantInt::get(iBuilder->getSizeTy(), 3);
    8384    Constant * Const4 = ConstantInt::get(iBuilder->getSizeTy(), 4);
    84     Constant * tripleBlockSize = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() * 3);
    8585    Constant * stride = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride());
    8686    Constant * packSize = ConstantInt::get(iBuilder->getSizeTy(), PACK_SIZE);
     
    9292    Value * self = &*(args++);
    9393    Value * blocksToDo = &*(args);
    94     //iBuilder->CallPrintInt("blocksToDo", blocksToDo);
    95     Value * segmentNo = getLogicalSegmentNo(self);
    9694    Value * streamStructPtr = getStreamSetStructPtr(self, "sourceStream");
    97     //iBuilder->CallPrintInt("streamStructPtr", iBuilder->CreatePtrToInt(streamStructPtr, iBuilder->getInt64Ty()));
    98    
     95
    9996    LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(mStreamSetInputBuffers[0]->getProducerPosPtr(streamStructPtr));
    100     //iBuilder->CallPrintInt("producerPos", producerPos);
    10197    Value * processed = getProcessedItemCount(self);
    10298    Value * itemsAvail = iBuilder->CreateSub(producerPos, processed);
    10399   
    104100    // Except for the final segment, we always process an integral number of triple blocks.
    105     Value * tripleBlocksToDo = iBuilder->CreateUDiv(blocksToDo, ConstantInt::get(iBuilder->getSizeTy(), 3));
    106     Value * tripleBlocksAvail = iBuilder->CreateUDiv(itemsAvail, tripleBlockSize);
    107     Value * lessThanFullSegment = iBuilder->CreateICmpULT(tripleBlocksAvail, tripleBlocksToDo);
    108     Value * tripleBlockItems = iBuilder->CreateMul(iBuilder->CreateSelect(lessThanFullSegment, tripleBlocksAvail, tripleBlocksToDo), tripleBlockSize);
    109     Value * endSignal = iBuilder->CreateLoad(mStreamSetInputBuffers[0]->hasEndOfInputPtr(streamStructPtr));
     101    Value * tripleBlocksToDo = iBuilder->CreateMul(blocksToDo, Const3);
     102    Constant * blockItems = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth());
     103    Value * tripleItemMax = iBuilder->CreateMul(tripleBlocksToDo, blockItems);
     104
     105    Value * lessThanFullSegment = iBuilder->CreateICmpULT(itemsAvail, tripleItemMax);
     106    Value * tripleBlockItems = iBuilder->CreateSelect(lessThanFullSegment, itemsAvail, tripleItemMax);
     107
     108    Value * endSignal = iBuilder->CreateLoad(mStreamSetInputBuffers[0]->getEndOfInputPtr(streamStructPtr));
    110109    Value * inFinalSegment = iBuilder->CreateAnd(endSignal, lessThanFullSegment);
    111110    Value * itemsToDo = iBuilder->CreateSelect(inFinalSegment, itemsAvail, tripleBlockItems);
    112     //iBuilder->CallPrintInt("itemsToDo", itemsToDo);
     111
     112//    iBuilder->CallPrintInt("itemsToDo", itemsToDo);
    113113
    114114    Value * blockNo = getScalarField(self, blockNoScalar);
     115
    115116    Value * sourceBlockPtr = getStreamSetBlockPtr(self, "sourceStream", blockNo);
    116    
     117
    117118    Value * outputGenerated = getProducedItemCount(self); // bytes previously generated to output
    118119    Value * outputBlockNo = iBuilder->CreateUDiv(outputGenerated, stride);
     120
    119121    Value * outputBlockPtr = getStreamSetBlockPtr(self, "expandedStream", outputBlockNo);
    120    
     122
    121123    // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
    122124    Value * sourcePackPtr = iBuilder->CreateGEP(sourceBlockPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(0)});
    123125    Value * outputPackPtr = iBuilder->CreateGEP(outputBlockPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(0)});
    124126    Value * hasFullLoop = iBuilder->CreateICmpUGE(itemsToDo, loopItemCount);
     127
     128
    125129    iBuilder->CreateCondBr(hasFullLoop, expand_3_4_loop, expand3_4_loop_exit);
    126130    iBuilder->SetInsertPoint(expand_3_4_loop);
     
    128132    PHINode * loopOutput_ptr = iBuilder->CreatePHI(outputPackPtr->getType(), 2);
    129133    PHINode * loopItemsRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     134
    130135    loopInput_ptr->addIncoming(sourcePackPtr, expand2_3entry);
    131136    loopOutput_ptr->addIncoming(outputPackPtr, expand2_3entry);
    132137    loopItemsRemain->addIncoming(itemsToDo, expand2_3entry);
     138
    133139    // Step 1 of the main loop.
    134140    Value * pack0 = iBuilder->fwCast(8, iBuilder->CreateAlignedLoad(loopInput_ptr, packAlign));
     
    150156    Value * expand3 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack2, undefPack, expand_3_4_shuffle[3]));
    151157    iBuilder->CreateAlignedStore(expand3, outPack3_ptr, packAlign);
    152    
     158
    153159    Value * loopNextInputPack = iBuilder->CreateGEP(loopInput_ptr, {iBuilder->getInt32(3)});
    154     Value * loopNextOutputPack = iBuilder->CreateGEP(loopOutput_ptr, {iBuilder->getInt32(4)});
     160
     161
     162
    155163    Value * remainingItems = iBuilder->CreateSub(loopItemsRemain, loopItemCount);
     164
     165    Value * loopProcessed = iBuilder->CreateSub(itemsToDo, remainingItems);
     166    loopProcessed = iBuilder->CreateMul(iBuilder->CreateUDiv(loopProcessed, iBuilder->getInt64(3)), iBuilder->getInt64(4));
     167
     168    Value * loopNextOutputPack;
     169    loopNextOutputPack = iBuilder->CreateGEP(loopOutput_ptr, {iBuilder->getInt32(4)});
     170
    156171    loopInput_ptr->addIncoming(loopNextInputPack, expand_3_4_loop);
    157172    loopOutput_ptr->addIncoming(loopNextOutputPack, expand_3_4_loop);
    158173    loopItemsRemain->addIncoming(remainingItems, expand_3_4_loop);
    159     //iBuilder->CallPrintInt("loopItemsRemain", remainingItems);
     174
    160175    Value * continueLoop = iBuilder->CreateICmpUGE(remainingItems, loopItemCount);
    161176    iBuilder->CreateCondBr(continueLoop, expand_3_4_loop, expand3_4_loop_exit);
     
    173188    loopExitOutput_ptr->addIncoming(loopNextOutputPack, expand_3_4_loop);
    174189    loopExitItemsRemain->addIncoming(remainingItems, expand_3_4_loop);
    175 
    176     // There may be one or two remaining full packs and/or a partial pack. 
     190    // There may be one or two remaining full packs and/or a partial pack.
    177191    //
    178192    // We have several cases depending on the number of reumaing items.  Let N = packSize
     
    234248    //
    235249    iBuilder->SetInsertPoint(itemsDone);
    236    
     250
    237251    processed = iBuilder->CreateAdd(processed, itemsToDo);
    238252    setProcessedItemCount(self, processed);
     253
    239254    setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, stride));
    240255    // We have produced 4 output bytes for every 3 input bytes.  If the number of input
     
    244259    setProducedItemCount(self, totalProduced);
    245260    Value * ssStructPtr = getStreamSetStructPtr(self, "expandedStream");
    246    
     261
    247262    Value * producerPosPtr = mStreamSetOutputBuffers[0]->getProducerPosPtr(ssStructPtr);
     263
    248264    iBuilder->CreateAtomicStoreRelease(totalProduced, producerPosPtr);
    249265   
     
    251267    iBuilder->SetInsertPoint(setTermination);
    252268#ifndef NDEBUG
    253     iBuilder->CallPrintInt(mKernelName + " termination in segment ", segmentNo);
     269//    iBuilder->CallPrintInt(mKernelName + " termination in segment ", segmentNo);
    254270#endif
    255271    setTerminationSignal(self);
     
    258274    iBuilder->SetInsertPoint(expand3_4_exit);
    259275    // Must be the last action, for synchronization.
    260     setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(iBuilder->getSizeTy(), 1)));
    261276    iBuilder->CreateBr(finalExit);
    262277   
     
    281296}
    282297
    283    
    284298// Radix 64 determination, converting 3 bytes to 4 6-bit values.
    285299//
    286 //  00000000|zyxwvuts|rqpmnlkj|hgfedcba    Original 3 bytes of binary data in a 32-bit field
    287 //                        nlkj|hgfedcba    bits to move 0 positions initially
    288 //           zyxwvuts|rqpm    |            bits to move 4 positions
    289 //      zyxw|vutsrqpm|        |            shift forward 4
    290 //      zyxw|vutsrqpm|    nlkj|hgfedcba    combine with bits moving 0
    291 //          |  tsrqpm|        |  fedcba    bits to move 0 positions in second step
    292 //      zyxw|vu      |    nlkj|hg          bits to move 2 positions in second stap
    293 //    zyxwvu|        |  nlkjhg|            shift forward 2
    294 //    zyxwvu|  tsrqpm|  nlkjhg|  fedcba    The 4 radix64 values have been computed.
    295 
     300//  00000000|zyxwvuts|rqpmnlkj|hgfedcba    Original
     301//           zy                            bits to move 6 positions right
     302//             xwvuts                      bits to move 8 positions left
     303//                    rqpm                 bits to move 4 positions right
     304//                        nlkj             bits to move 10 positions left
     305//                             hqfedc      bits to move 2 positions right
     306//                                   ba    bits to move 12 positions left
     307//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    296308void radix64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) {
    297309    Value * expandedStream = getStreamSetBlockPtr(self, "expandedStream", blockNo);
    298     Value * radix64stream = getStreamSetBlockPtr(self, "radix64stream", blockNo);
    299     Value * step1_bits_to_move4 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0003FFC0));
    300     Value * step1_bits_to_stay = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000FFF));
    301     Value * step2_bits_to_move2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x006F006F));
    302     Value * step2_bits_to_stay = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0FC00FC0));
     310    Value * radix64stream = getStreamSetBlockPtr(self, "radix64stream",blockNo);
     311
     312    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
     313    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     314    Value * step_right_4 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0000F000));
     315    Value * step_left_10 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000F00));
     316    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
     317    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
    303318   
    304319    for (unsigned i = 0; i < 8; i++) {
    305320        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
    306         Value * move4 = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step1_bits_to_move4), 4);
    307         Value * step1 = iBuilder->simd_or(move4, iBuilder->simd_and(bytepack, step1_bits_to_stay));
    308         Value * move2 = iBuilder->simd_slli(32, iBuilder->simd_and(step1, step2_bits_to_move2), 2);
    309         Value * radix64pack = iBuilder->bitCast(iBuilder->simd_or(move2, iBuilder->simd_and(step1, step2_bits_to_stay)));
     321
     322        Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
     323        Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     324        Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
     325        Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
     326        Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
     327        Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
     328
     329        Value * mid = right_6_result;
     330        mid = iBuilder->simd_or(mid, right_4_result);
     331        mid = iBuilder->simd_or(mid, right_2_result);
     332        mid = iBuilder->simd_or(mid, left_8_result);
     333        mid = iBuilder->simd_or(mid, left_10_result);
     334        mid = iBuilder->simd_or(mid, left_12_result);
     335        Value * radix64pack = iBuilder->bitCast(mid);
     336
    310337        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
    311338    }
     
    313340    produced = iBuilder->CreateAdd(produced, ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride()));
    314341    setProducedItemCount(self, produced);   
     342}
     343
     344void radix64Kernel::generateFinalBlockMethod() {
     345    auto savePoint = iBuilder->saveIP();
     346    Module * m = iBuilder->getModule();
     347    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     348    BasicBlock * radix64_fb_entry = BasicBlock::Create(iBuilder->getContext(), "radix64_fb_entry", finalBlockFunction, 0);
     349    iBuilder->SetInsertPoint(radix64_fb_entry);
     350    BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", finalBlockFunction, 0);
     351    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", finalBlockFunction, 0);
     352    BasicBlock * handleRemainFirstByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainFirstByte", finalBlockFunction, 0);
     353    BasicBlock * handleRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainSecondByte", finalBlockFunction, 0);
     354    BasicBlock * handleNoRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleNoRemainSecondByte", finalBlockFunction, 0);
     355    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", finalBlockFunction, 0);
     356    // Final Block arguments: self, remaining.
     357    Function::arg_iterator args = finalBlockFunction->arg_begin();
     358    Value * self = &*(args++);
     359    Value * remainingBytes = &*(args++);
     360    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, ConstantInt::get(iBuilder->getSizeTy(), 3));
     361
     362    const unsigned PACK_SIZE = iBuilder->getStride()/8;
     363    Constant * packSize = ConstantInt::get(iBuilder->getSizeTy(), PACK_SIZE);
     364    Value * blockNo = getScalarField(self, blockNoScalar);
     365    Value * expandedstream_ptr = getStreamSetBlockPtr(self, "expandedStream", blockNo);
     366    Value * radix64stream_ptr = getStreamSetBlockPtr(self, "radix64stream", blockNo);
     367    Type * i8_t = iBuilder->getInt8Ty();
     368
     369    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
     370    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     371    Value * step_right_4 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0000F000));
     372    Value * step_left_10 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000F00));
     373    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
     374    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
     375
     376
     377    // Enter the loop only if there is at least one byte remaining to process.
     378    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(iBuilder->getSizeTy(), 0)), fbExit, radix64_loop);
     379
     380    iBuilder->SetInsertPoint(radix64_loop);
     381    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
     382    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     383    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), radix64_fb_entry);
     384    loopRemain->addIncoming(remainingBytes, radix64_fb_entry);
     385
     386    Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedstream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), idx});
     387    Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
     388    Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     389    Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
     390    Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
     391    Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
     392    Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
     393
     394    Value * mid = right_6_result;
     395    mid = iBuilder->simd_or(mid, right_4_result);
     396    mid = iBuilder->simd_or(mid, right_2_result);
     397    mid = iBuilder->simd_or(mid, left_8_result);
     398    mid = iBuilder->simd_or(mid, left_10_result);
     399    mid = iBuilder->simd_or(mid, left_12_result);
     400    Value * radix64pack = iBuilder->bitCast(mid);
     401
     402    iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), idx});
     403
     404    Value* nextIdx = iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1));
     405    idx->addIncoming(nextIdx, radix64_loop);
     406    Value* remainAfterLoop = iBuilder->CreateSub(loopRemain, packSize);
     407    loopRemain->addIncoming(remainAfterLoop, radix64_loop);
     408
     409    Value* continueLoop = iBuilder->CreateICmpULT(remainAfterLoop, packSize);
     410    iBuilder->CreateCondBr(continueLoop, radix64_loop, loopExit);
     411
     412    iBuilder->SetInsertPoint(loopExit);
     413    // All base64 data has been computed, but we may need to set one or two '=' padding bytes.
     414    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, ConstantInt::get(iBuilder->getSizeTy(), 0)), fbExit, handleRemainFirstByte);
     415    iBuilder->SetInsertPoint(handleRemainFirstByte);
     416    // At least one padding byte required.
     417
     418    Value * i8output_ptr = iBuilder->CreatePointerCast(radix64stream_ptr, iBuilder->getInt8PtrTy());
     419    Value * i8input_ptr = iBuilder->CreatePointerCast(expandedstream_ptr, iBuilder->getInt8PtrTy());
     420    Value * remainOutputStart = iBuilder->CreateSub(remainingBytes, remainMod4);
     421
     422    Value * firstRemainByte = iBuilder->CreateLoad(iBuilder->CreateGEP(i8input_ptr, {iBuilder->getInt32(0)}));
     423
     424    Value * first_move_right_2_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0xFC);
     425    Value * first_output_byte = iBuilder->CreateLShr(iBuilder->CreateAnd(firstRemainByte, first_move_right_2_mask), 2);
     426
     427    Value * first_move_left_4_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0x03);
     428    Value * first_move_left_4_byte = iBuilder->CreateShl(iBuilder->CreateAnd(firstRemainByte, first_move_left_4_mask), 4);
     429
     430    iBuilder->CreateStore(first_output_byte, iBuilder->CreateGEP(i8output_ptr, {iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(0))}));
     431
     432
     433    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, ConstantInt::get(iBuilder->getSizeTy(), 1)), handleNoRemainSecondByte, handleRemainSecondByte);
     434    iBuilder->SetInsertPoint(handleRemainSecondByte);
     435
     436    Value * secondRemainByte = iBuilder->CreateLoad(iBuilder->CreateGEP(i8input_ptr, {iBuilder->getInt32(1)}));
     437    Value * second_move_right_4_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0xF0);
     438    Value * second_move_right_4_byte = iBuilder->CreateLShr(iBuilder->CreateAnd(secondRemainByte, second_move_right_4_mask), 4);
     439    Value * second_output_byte = iBuilder->CreateOr(first_move_left_4_byte, second_move_right_4_byte);
     440    iBuilder->CreateStore(second_output_byte, iBuilder->CreateGEP(i8output_ptr, {iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(1))}));
     441
     442    Value * second_move_left_2_mask = ConstantInt::get(iBuilder->getInt8Ty(), 0x0F);
     443    Value * second_move_left_2_byte = iBuilder->CreateShl(iBuilder->CreateAnd(secondRemainByte, second_move_left_2_mask), 2);
     444    iBuilder->CreateStore(second_move_left_2_byte, iBuilder->CreateGEP(i8output_ptr, {iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(2))}));
     445    iBuilder->CreateBr(fbExit);
     446
     447    iBuilder->SetInsertPoint(handleNoRemainSecondByte);
     448    iBuilder->CreateStore(first_move_left_4_byte, iBuilder->CreateGEP(i8output_ptr, {iBuilder->CreateAdd(remainOutputStart, iBuilder->getInt64(1))}));
     449    iBuilder->CreateBr(fbExit);
     450
     451    iBuilder->SetInsertPoint(fbExit);
     452    Value * outputNumberAdd = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(remainMod4, ConstantInt::get(iBuilder->getSizeTy(), 0)), ConstantInt::get(iBuilder->getSizeTy(), 0), ConstantInt::get(iBuilder->getSizeTy(), 1));
     453    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self), iBuilder->CreateAdd(remainingBytes, outputNumberAdd));
     454    setProducedItemCount(self, produced);
     455
     456    iBuilder->CreateRetVoid();
     457    iBuilder->restoreIP(savePoint);
     458}
     459
     460void radix64Kernel::generateDoBlockMethod() {
     461    auto savePoint = iBuilder->saveIP();
     462
     463    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     464
     465    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     466
     467    Value * self = getParameter(doBlockFunction, "self");
     468    Value * blockNo = getScalarField(self, blockNoScalar);
     469
     470    generateDoBlockLogic(self, blockNo);
     471
     472    iBuilder->CreateRetVoid();
     473    iBuilder->restoreIP(savePoint);
    315474}
    316475
     
    329488        // Strategy:
    330489        // 1. add ord('A') = 65 to all radix64 values, this sets the correct values for entries 0 to 25.
    331         // 2. add ord('a') - ord('A') = 32 to all values >25, this sets the correct values for entries 0 to 51
    332         // 3. subtract ord('a') - ord('0') = 49 to all values > 51, this sets the correct values for entries 0 to 61
    333         // 4. subtract ord('0') - ord('+') = 5 for all values = 62
    334         // 4. subtract ord('0') - ord('/') = 1 for all values = 63
     490        // 2. add ord('a') - ord('A') - (26 - 0) = 6 to all values >25, this sets the correct values for entries 0 to 51
     491        // 3. subtract ord('a') - ord('0') + (52 - 26) = 75 to all values > 51, this sets the correct values for entries 0 to 61
     492        // 4. subtract ord('0') - ord('+') + (62 - 52) = 15 for all values = 62
     493        // 4. subtract ord('0') - ord('/') + (63 - 62) = 2 for all values = 63
    335494        Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 'A')));
    336         Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 32))));
    337         Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 49))));
    338         Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 5))));
    339         Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 1))));
    340         iBuilder->CreateBlockAlignedStore(base64pack, base64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     495        Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 6))));
     496        Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 75))));
     497        Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 15))));
     498        Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 2))));
     499        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
    341500    }
    342501    Value * produced = getProducedItemCount(self);
     
    389548    Value * mask_eq_63 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 63)));
    390549    Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 'A')));
    391     Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 32))));
    392     Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 49))));
    393     Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 5))));
    394     Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 1))));
    395     iBuilder->CreateBlockAlignedStore(base64pack, base64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), idx});
     550    Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 6))));
     551    Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 75))));
     552    Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 15))));
     553    Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 2))));
     554    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), idx});
    396555    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
    397     loopRemain->addIncoming(iBuilder->CreateSub(loopRemain, packSize), base64_loop);
     556    Value* remainAfterLoop = iBuilder->CreateSub(loopRemain, packSize);
     557    loopRemain->addIncoming(remainAfterLoop, base64_loop);
     558
     559    Value* continueLoop = iBuilder->CreateICmpULT(remainAfterLoop, packSize);
     560    iBuilder->CreateCondBr(continueLoop, base64_loop, loopExit);
     561
    398562    iBuilder->SetInsertPoint(loopExit);
    399     // All base64 data has been computed, but we may need to set one or two '=' padding bytes.
    400563    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(padBytes, ConstantInt::get(iBuilder->getSizeTy(), 0)), fbExit, doPadding);
    401564    iBuilder->SetInsertPoint(doPadding);
    402     // At least one padding byte required.
    403565    Value * i8output_ptr = iBuilder->CreatePointerCast(base64stream_ptr, iBuilder->getInt8PtrTy());
    404     iBuilder->CreateStore(iBuilder->CreateGEP(i8output_ptr, {remainingBytes}), ConstantInt::get(iBuilder->getInt8Ty(), '='));
     566    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8output_ptr, {remainingBytes}));
    405567    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, ConstantInt::get(iBuilder->getSizeTy(), 3)), fbExit, doPadding2);
    406568    iBuilder->SetInsertPoint(doPadding2);
    407     // One more padding byte required.
    408569    Value * finalPadPos = iBuilder->CreateAdd(remainingBytes, ConstantInt::get(iBuilder->getSizeTy(), 1));
    409     iBuilder->CreateStore(iBuilder->CreateGEP(i8output_ptr, {finalPadPos}), ConstantInt::get(iBuilder->getInt8Ty(), '='));
     570    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8output_ptr, {finalPadPos}));
    410571    iBuilder->CreateBr(fbExit);
    411572    iBuilder->SetInsertPoint(fbExit);
    412573    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self), iBuilder->CreateAdd(remainingBytes, padBytes));
    413574    setProducedItemCount(self, produced);
     575
     576
    414577    iBuilder->CreateRetVoid();
    415578    iBuilder->restoreIP(savePoint);
    416579}
    417 }
     580
     581void base64Kernel::generateDoBlockMethod() {
     582    auto savePoint = iBuilder->saveIP();
     583
     584    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     585
     586    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     587
     588    Value * self = getParameter(doBlockFunction, "self");
     589    Value * blockNo = getScalarField(self, blockNoScalar);
     590
     591    generateDoBlockLogic(self, blockNo);
     592
     593    iBuilder->CreateRetVoid();
     594    iBuilder->restoreIP(savePoint);
     595}
     596
     597}
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5219 r5232  
    3939   
    4040    radix64Kernel(IDISA::IDISA_Builder * iBuilder) :
    41     KernelBuilder(iBuilder, "expand3_4",
     41    KernelBuilder(iBuilder, "radix64",
    4242                  {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
    4343                  {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
    4444                  {}, {}, {}) {}
    45    
     45
    4646private:
    47     void generateDoBlockLogic() override;
    48    
     47    virtual void generateDoBlockLogic(Value * self, Value * blockNo) override;
     48    virtual void generateDoBlockMethod() override;
     49    virtual void generateFinalBlockMethod() override;
    4950};
    5051
     
    5354   
    5455    base64Kernel(IDISA::IDISA_Builder * iBuilder) :
    55     KernelBuilder(iBuilder, "expand3_4",
     56    KernelBuilder(iBuilder, "base64",
    5657                  {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
    5758                  {Binding{iBuilder->getStreamSetTy(1, 8), "base64stream"}},
     
    5960   
    6061private:
    61     void generateDoBlockLogic() override;
    62     void generateFinalBlockMethod() override;
     62    virtual void generateDoBlockLogic(Value * self, Value * blockNo) override;
     63    virtual void generateFinalBlockMethod() override;
     64    virtual void generateDoBlockMethod() override;
    6365   
    6466};
Note: See TracChangeset for help on using the changeset viewer.