Changeset 5219 for icGREP


Ignore:
Timestamp:
Nov 25, 2016, 10:46:14 AM (3 years ago)
Author:
cameron
Message:

radix64 kernels in progress

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
2 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5217 r5219  
    281281}
    282282
    283 }
     283   
     284// Radix 64 determination, converting 3 bytes to 4 6-bit values.
     285//
     286//  00000000|zyxwvuts|rqpmnlkj|hgfedcba    Original 3 bytes of binary data in a 32-bit field
     287//                        nlkj|hgfedcba    bits to move 0 positions initially
     288//           zyxwvuts|rqpm    |            bits to move 4 positions
     289//      zyxw|vutsrqpm|        |            shift forward 4
     290//      zyxw|vutsrqpm|    nlkj|hgfedcba    combine with bits moving 0
     291//          |  tsrqpm|        |  fedcba    bits to move 0 positions in second step
     292//      zyxw|vu      |    nlkj|hg          bits to move 2 positions in second stap
     293//    zyxwvu|        |  nlkjhg|            shift forward 2
     294//    zyxwvu|  tsrqpm|  nlkjhg|  fedcba    The 4 radix64 values have been computed.
     295
     296void radix64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) {
     297    Value * expandedStream = getStreamSetBlockPtr(self, "expandedStream", blockNo);
     298    Value * radix64stream = getStreamSetBlockPtr(self, "radix64stream", blockNo);
     299    Value * step1_bits_to_move4 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0003FFC0));
     300    Value * step1_bits_to_stay = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000FFF));
     301    Value * step2_bits_to_move2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x006F006F));
     302    Value * step2_bits_to_stay = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x0FC00FC0));
     303   
     304    for (unsigned i = 0; i < 8; i++) {
     305        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     306        Value * move4 = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step1_bits_to_move4), 4);
     307        Value * step1 = iBuilder->simd_or(move4, iBuilder->simd_and(bytepack, step1_bits_to_stay));
     308        Value * move2 = iBuilder->simd_slli(32, iBuilder->simd_and(step1, step2_bits_to_move2), 2);
     309        Value * radix64pack = iBuilder->bitCast(iBuilder->simd_or(move2, iBuilder->simd_and(step1, step2_bits_to_stay)));
     310        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     311    }
     312    Value * produced = getProducedItemCount(self);
     313    produced = iBuilder->CreateAdd(produced, ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride()));
     314    setProducedItemCount(self, produced);   
     315}
     316
     317
     318void base64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) {
     319    Value * radix64stream_ptr = getStreamSetBlockPtr(self, "radix64stream", blockNo);
     320    Value * base64stream_ptr = getStreamSetBlockPtr(self, "base64stream", blockNo);
     321    Type * i8_t = iBuilder->getInt8Ty();
     322   
     323    for (unsigned i = 0; i < 8; i++) {
     324        Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     325        Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 25)));
     326        Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 51)));
     327        Value * mask_eq_62 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 62)));
     328        Value * mask_eq_63 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 63)));
     329        // Strategy:
     330        // 1. add ord('A') = 65 to all radix64 values, this sets the correct values for entries 0 to 25.
     331        // 2. add ord('a') - ord('A') = 32 to all values >25, this sets the correct values for entries 0 to 51
     332        // 3. subtract ord('a') - ord('0') = 49 to all values > 51, this sets the correct values for entries 0 to 61
     333        // 4. subtract ord('0') - ord('+') = 5 for all values = 62
     334        // 4. subtract ord('0') - ord('/') = 1 for all values = 63
     335        Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 'A')));
     336        Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 32))));
     337        Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 49))));
     338        Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 5))));
     339        Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 1))));
     340        iBuilder->CreateBlockAlignedStore(base64pack, base64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
     341    }
     342    Value * produced = getProducedItemCount(self);
     343    produced = iBuilder->CreateAdd(produced, ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride()));
     344    setProducedItemCount(self, produced);   
     345}
     346
     347
     348// Special processing for the base 64 format.   The output must always contain a multiple
     349// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
     350// number of radix 64 values
     351void base64Kernel::generateFinalBlockMethod() {
     352    auto savePoint = iBuilder->saveIP();
     353    Module * m = iBuilder->getModule();
     354    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     355    BasicBlock * base64_fb_entry = BasicBlock::Create(iBuilder->getContext(), "base64_fb_entry", finalBlockFunction, 0);
     356    iBuilder->SetInsertPoint(base64_fb_entry);
     357    BasicBlock * base64_loop = BasicBlock::Create(iBuilder->getContext(), "base64_loop", finalBlockFunction, 0);
     358    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", finalBlockFunction, 0);
     359    BasicBlock * doPadding = BasicBlock::Create(iBuilder->getContext(), "doPadding", finalBlockFunction, 0);
     360    BasicBlock * doPadding2 = BasicBlock::Create(iBuilder->getContext(), "doPadding2", finalBlockFunction, 0);
     361    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", finalBlockFunction, 0);
     362    // Final Block arguments: self, remaining.
     363    Function::arg_iterator args = finalBlockFunction->arg_begin();
     364    Value * self = &*(args++);
     365    Value * remainingBytes = &*(args++);
     366    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, ConstantInt::get(iBuilder->getSizeTy(), 3));
     367    Value * padBytes = iBuilder->CreateSub(ConstantInt::get(iBuilder->getSizeTy(), 4), remainMod4);
     368    padBytes = iBuilder->CreateAnd(padBytes, ConstantInt::get(iBuilder->getSizeTy(), 3));
     369
     370    const unsigned PACK_SIZE = iBuilder->getStride()/8;
     371    Constant * packSize = ConstantInt::get(iBuilder->getSizeTy(), PACK_SIZE);
     372    Value * blockNo = getScalarField(self, blockNoScalar);
     373    Value * radix64stream_ptr = getStreamSetBlockPtr(self, "radix64stream", blockNo);
     374    Value * base64stream_ptr = getStreamSetBlockPtr(self, "base64stream", blockNo);
     375    Type * i8_t = iBuilder->getInt8Ty();
     376   
     377    // Enter the loop only if there is at least one byte remaining to process.
     378    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(iBuilder->getSizeTy(), 0)), fbExit, base64_loop);
     379   
     380    iBuilder->SetInsertPoint(base64_loop);
     381    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
     382    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     383    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), base64_fb_entry);
     384    loopRemain->addIncoming(remainingBytes, base64_fb_entry);
     385    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), idx});
     386    Value * mask_gt_25 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 25)));
     387    Value * mask_gt_51 = iBuilder->simd_ugt(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 51)));
     388    Value * mask_eq_62 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 62)));
     389    Value * mask_eq_63 = iBuilder->simd_eq(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 63)));
     390    Value * t0_25 = iBuilder->simd_add(8, bytepack, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 'A')));
     391    Value * t0_51 = iBuilder->simd_add(8, t0_25, iBuilder->simd_and(mask_gt_25, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 32))));
     392    Value * t0_61 = iBuilder->simd_sub(8, t0_51, iBuilder->simd_and(mask_gt_51, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 49))));
     393    Value * t0_62 = iBuilder->simd_sub(8, t0_61, iBuilder->simd_and(mask_eq_62, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 5))));
     394    Value * base64pack = iBuilder->simd_sub(8, t0_62, iBuilder->simd_and(mask_eq_63, iBuilder->simd_fill(8, ConstantInt::get(i8_t, 1))));
     395    iBuilder->CreateBlockAlignedStore(base64pack, base64stream_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), idx});
     396    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
     397    loopRemain->addIncoming(iBuilder->CreateSub(loopRemain, packSize), base64_loop);
     398    iBuilder->SetInsertPoint(loopExit);
     399    // All base64 data has been computed, but we may need to set one or two '=' padding bytes.
     400    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(padBytes, ConstantInt::get(iBuilder->getSizeTy(), 0)), fbExit, doPadding);
     401    iBuilder->SetInsertPoint(doPadding);
     402    // At least one padding byte required.
     403    Value * i8output_ptr = iBuilder->CreatePointerCast(base64stream_ptr, iBuilder->getInt8PtrTy());
     404    iBuilder->CreateStore(iBuilder->CreateGEP(i8output_ptr, {remainingBytes}), ConstantInt::get(iBuilder->getInt8Ty(), '='));
     405    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, ConstantInt::get(iBuilder->getSizeTy(), 3)), fbExit, doPadding2);
     406    iBuilder->SetInsertPoint(doPadding2);
     407    // One more padding byte required.
     408    Value * finalPadPos = iBuilder->CreateAdd(remainingBytes, ConstantInt::get(iBuilder->getSizeTy(), 1));
     409    iBuilder->CreateStore(iBuilder->CreateGEP(i8output_ptr, {finalPadPos}), ConstantInt::get(iBuilder->getInt8Ty(), '='));
     410    iBuilder->CreateBr(fbExit);
     411    iBuilder->SetInsertPoint(fbExit);
     412    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self), iBuilder->CreateAdd(remainingBytes, padBytes));
     413    setProducedItemCount(self, produced);
     414    iBuilder->CreateRetVoid();
     415    iBuilder->restoreIP(savePoint);
     416}
     417}
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5217 r5219  
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
    5 #ifndef EXPAND3_4_H
    6 #define EXPAND3_4_H
     5#ifndef RADIX64_H
     6#define RADIX64_H
    77
    88#include "streamset.h"
     
    2525    expand3_4Kernel(IDISA::IDISA_Builder * iBuilder) :
    2626    KernelBuilder(iBuilder, "expand3_4",
    27                   {Binding{parabix::StreamSetType(iBuilder, 1, parabix::i8), "sourceStream"}},
    28                   {Binding{parabix::StreamSetType(iBuilder, 1, parabix::i8), "expandedStream"}},
    29                   {}, {},
    30                   {Binding{iBuilder->fwVectorType(parabix::i8), "pendingPack"}}) {}
    31    
     27                  {Binding{iBuilder->getStreamSetTy(1, 8), "sourceStream"}},
     28                  {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
     29                  {}, {}, {}) {}
    3230   
    3331private:
     
    3735};
    3836
     37class radix64Kernel : public KernelBuilder {
     38public:
    3939   
     40    radix64Kernel(IDISA::IDISA_Builder * iBuilder) :
     41    KernelBuilder(iBuilder, "expand3_4",
     42                  {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream"}},
     43                  {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
     44                  {}, {}, {}) {}
     45   
     46private:
     47    void generateDoBlockLogic() override;
     48   
     49};
     50
     51class base64Kernel : public KernelBuilder {
     52public:
     53   
     54    base64Kernel(IDISA::IDISA_Builder * iBuilder) :
     55    KernelBuilder(iBuilder, "expand3_4",
     56                  {Binding{iBuilder->getStreamSetTy(1, 8), "radix64stream"}},
     57                  {Binding{iBuilder->getStreamSetTy(1, 8), "base64stream"}},
     58                  {}, {}, {}) {}
     59   
     60private:
     61    void generateDoBlockLogic() override;
     62    void generateFinalBlockMethod() override;
     63   
     64};
    4065
    4166}
Note: See TracChangeset for help on using the changeset viewer.