Changeset 6089


Ignore:
Timestamp:
Jun 14, 2018, 10:41:45 AM (4 months ago)
Author:
cameron
Message:

Little-endian/big-endian bit number options, default to little-endian

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/alphabet.h

    r5823 r6089  
    1313
    1414namespace cc {
     15   
     16// Set of parallel bit streams may be numbered using either the
     17// LittleEndian (right-to-left) or BigEndian (left-to-right) conventions.
     18enum class BitNumbering {LittleEndian, BigEndian};
     19
     20inline std::string numberingSuffix(BitNumbering numbering) {
     21    return (numbering == BitNumbering::LittleEndian) ? "-LE" : "-BE";
     22}
     23
    1524//
    1625// An Alphabet is the universe of characters used to form strings in
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r6069 r6089  
    2626    : mBuilder(scope) {
    2727    }
    28    
    29 
    30 Parabix_CC_Compiler::Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet)
     28
     29Parabix_CC_Compiler::Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet, cc::BitNumbering basisSetNumbering)
    3130: CC_Compiler(scope)
    3231, mEncodingBits(basisBitSet.size())
     32, mBasisSetNumbering(basisSetNumbering)
    3333, mBasisBit(basisBitSet) {
    3434    mEncodingMask = (static_cast<unsigned>(1) << mEncodingBits) - static_cast<unsigned>(1);
     
    224224    llvm::report_fatal_error(std::string("Invalid Character Set Range: [") + std::to_string(lo) + "," + std::to_string(hi) + "]");
    225225}
    226 //#define LITTLE_ENDIAN_BIT_NUMBERING
    227226
    228227inline PabloAST * Parabix_CC_Compiler::getBasisVar(const unsigned i) const {
    229228    assert (i < mEncodingBits);
    230 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    231     const unsigned index = mEncodingBits - i - 1; assert (index < mEncodingBits);
    232 #else
    233     const unsigned index = i;
    234 #endif
    235     assert (mBasisBit[index]);
    236     return mBasisBit[index];
     229    if (mBasisSetNumbering == cc::BitNumbering::BigEndian)
     230        return mBasisBit[mEncodingBits - i - 1];
     231    else return mBasisBit[i];
    237232}
    238233
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r5872 r6089  
    1313#include <kernels/interface.h>
    1414#include <string>
     15#include <cc/alphabet.h>
    1516
    1617namespace cc {
     
    4041public:
    4142   
    42     Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet);
     43    Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet, cc::BitNumbering b = BitNumbering::LittleEndian);
    4344   
    4445    pablo::PabloAST * compileCC(const re::CC *cc) override;
     
    7273private:   
    7374    const unsigned                  mEncodingBits;
     75    BitNumbering               mBasisSetNumbering;
    7476    std::vector<pablo::PabloAST *>  mBasisBit;
    7577    unsigned                        mEncodingMask;
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r6004 r6089  
    1818    s0 = iBuilder->esimd_mergel(8, t1, t0);
    1919}
    20 
    21 inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
     20#define LITTLE_ENDIAN_BIT_NUMBERING
     21
     22inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[], cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian) {
    2223    Value * bit00004444[2];
    2324    Value * bit22226666[2];
    2425    Value * bit11115555[2];
    2526    Value * bit33337777[2];
    26     p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
    27     p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
    28     p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
    29     p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
     27    if (basisNumbering == cc::BitNumbering::BigEndian) {
     28        p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
     29        p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
     30        p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
     31        p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
     32    }  else {
     33        p2s_step(iBuilder, p[7], p[3], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
     34        p2s_step(iBuilder, p[6], p[2], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
     35        p2s_step(iBuilder, p[5], p[1], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
     36        p2s_step(iBuilder, p[4], p[0], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
     37    }
    3038    Value * bit00224466[4];
    3139    Value * bit11335577[4];
     
    4553    }
    4654    Value * s_bytepack[8];
    47     p2s(b, p_bitblock, s_bytepack);
     55    p2s(b, p_bitblock, s_bytepack, mBasisSetNumbering);
    4856    for (unsigned j = 0; j < 8; ++j) {
    4957        b->storeOutputStreamPack("byteStream", b->getInt32(0), b->getInt32(j), s_bytepack[j]);
     
    7078    }
    7179    Value * bytePack[8];
    72     p2s(b, basisBits, bytePack);
     80    p2s(b, basisBits, bytePack, mBasisSetNumbering);
    7381
    7482    Value * const fieldCounts = b->loadInputStreamBlock("fieldCounts", b->getInt32(0));
     
    9199    Value * hi_input[8];
    92100    for (unsigned j = 0; j < 8; ++j) {
    93         hi_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j));
     101        const unsigned idx = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? j + 8 : j;
     102        hi_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(idx));
    94103    }
    95104    Value * hi_bytes[8];
    96     p2s(b, hi_input, hi_bytes);
     105    p2s(b, hi_input, hi_bytes, mBasisSetNumbering);
    97106    Value * lo_input[8];
    98107    for (unsigned j = 0; j < 8; ++j) {
    99         lo_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j + 8));
     108        const unsigned idx = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? j : j + 8;
     109        lo_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(idx));
    100110    }
    101111    Value * lo_bytes[8];
    102     p2s(b, lo_input, lo_bytes);
     112    p2s(b, lo_input, lo_bytes, mBasisSetNumbering);
    103113    for (unsigned j = 0; j < 8; ++j) {
    104114        Value * merge0 = b->bitCast(b->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     
    118128    Value * hi_input[8];
    119129    for (unsigned j = 0; j < 8; ++j) {
    120         hi_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j));
     130        const unsigned idx = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? j + 8 : j;
     131        hi_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(idx));
    121132    }
    122133    Value * hi_bytes[8];
    123     p2s(b, hi_input, hi_bytes);
     134    p2s(b, hi_input, hi_bytes, mBasisSetNumbering);
    124135
    125136    Value * lo_input[8];
    126137    for (unsigned j = 0; j < 8; ++j) {
    127         lo_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j + 8));
     138        const unsigned idx = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? j : j + 8;
     139        lo_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(idx));
    128140    }
    129141    Value * lo_bytes[8];
    130     p2s(b, lo_input, lo_bytes);
     142    p2s(b, lo_input, lo_bytes, mBasisSetNumbering);
    131143
    132144    Value * const fieldCounts = b->loadInputStreamBlock("fieldCounts", b->getInt32(0));
     
    160172}
    161173
    162 P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    163 : BlockOrientedKernel("p2s",
     174P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
     175    : BlockOrientedKernel("p2s" + cc::numberingSuffix(numbering),
    164176              {Binding{b->getStreamSetTy(8, 1), "basisBits"}},
    165177              {Binding{b->getStreamSetTy(1, 8), "byteStream"}},
    166               {}, {}, {}) {
    167 }
    168 
    169 P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
    170 : BlockOrientedKernel("p2s_compress",
     178              {}, {}, {}),
     179    mBasisSetNumbering(numbering) {
     180}
     181
     182P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
     183: BlockOrientedKernel("p2s_compress" + cc::numberingSuffix(numbering),
    171184              {Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "fieldCounts"}},
    172185              {Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
    173               {}, {}, {}) {
    174 }
    175 
    176 P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    177 : BlockOrientedKernel("p2s_16",
     186                      {}, {}, {}),
     187    mBasisSetNumbering(numbering) {
     188}
     189
     190P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
     191: BlockOrientedKernel("p2s_16" + cc::numberingSuffix(numbering),
    178192              {Binding{b->getStreamSetTy(16, 1), "basisBits"}},
    179193              {Binding{b->getStreamSetTy(1, 16), "i16Stream"}},
    180               {}, {}, {}) {
    181 }
    182 
    183 
    184 P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
    185 : BlockOrientedKernel("p2s_16_compress",
     194                      {}, {}, {}),
     195    mBasisSetNumbering(numbering) {
     196}
     197
     198
     199P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
     200: BlockOrientedKernel("p2s_16_compress" + cc::numberingSuffix(numbering),
    186201              {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "fieldCounts"}},
    187202              {Binding{b->getStreamSetTy(1, 16), "i16Stream", BoundedRate(0, 1)}},
    188203              {},
    189204              {},
    190               {}) {
    191 
    192 }
    193    
    194    
    195 }
     205              {}),
     206    mBasisSetNumbering(numbering) {
     207}
     208   
     209   
     210}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5755 r6089  
    66#define P2S_KERNEL_H
    77
     8#include <cc/alphabet.h>
    89#include "kernel.h"  // for KernelBuilder
     10
    911namespace IDISA { class IDISA_Builder; }
    1012
    1113namespace kernel {
    1214
    13    
    1415class P2SKernel final : public BlockOrientedKernel {
    1516public:
    16     P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
     17    P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    1718    bool isCachable() const override { return true; }
    1819    bool hasSignature() const override { return false; }
    1920private:
     21    cc::BitNumbering mBasisSetNumbering;
    2022    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    2123};
     
    2325class P2SKernelWithCompressedOutput final : public BlockOrientedKernel {
    2426public:
    25     P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b);
     27    P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    2628    bool isCachable() const override { return true; }
    2729    bool hasSignature() const override { return false; }
    2830private:
     31    cc::BitNumbering mBasisSetNumbering;
    2932    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    3033};
     
    3235class P2S16Kernel final : public BlockOrientedKernel {
    3336public:
    34     P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b);
     37    P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    3538    bool isCachable() const override { return true; }
    3639    bool hasSignature() const override { return false; }
    3740private:
     41    cc::BitNumbering mBasisSetNumbering;
    3842    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    3943};
     
    4145class P2S16KernelWithCompressedOutput final : public BlockOrientedKernel {
    4246public:
    43     P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b);
     47    P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    4448    bool isCachable() const override { return true; }
    4549    bool hasSignature() const override { return false; }
    4650private:
     51    cc::BitNumbering mBasisSetNumbering;
    4752    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    4853};
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r6069 r6089  
    3333    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
    3434}
    35 //#define LITTLE_ENDIAN_BIT_NUMBERING
    36 
    37 void s2p(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
    38     Value * bit00224466[4];
    39     Value * bit11335577[4];
     35
     36void s2p(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[], cc::BitNumbering basisNumbering) {
     37    // Little-endian bit number is used for variables.
     38    Value * bit66442200[4];
     39    Value * bit77553311[4];
    4040
    4141    for (unsigned i = 0; i < 4; i++) {
    4242        Value * s0 = input[2 * i];
    4343        Value * s1 = input[2 * i + 1];
    44         s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
    45     }
    46     Value * bit00004444[2];
    47     Value * bit22226666[2];
    48     Value * bit11115555[2];
    49     Value * bit33337777[2];
     44        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit77553311[i], bit66442200[i]);
     45    }
     46    Value * bit44440000[2];
     47    Value * bit66662222[2];
     48    Value * bit55551111[2];
     49    Value * bit77773333[2];
    5050    for (unsigned j = 0; j<2; j++) {
    51         s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
    52                  iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
    53         s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
    54                  iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
    55     }
    56 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    57     s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
    58     s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
    59     s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
    60     s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
    61 #else
    62     s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[7], output[3]);
    63     s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[6], output[2]);
    64     s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[5], output[1]);
    65     s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[4], output[0]);
    66 #endif   
     51        s2p_step(iBuilder, bit66442200[2*j], bit66442200[2*j+1],
     52                 iBuilder->simd_himask(4), 2, bit66662222[j], bit44440000[j]);
     53        s2p_step(iBuilder, bit77553311[2*j], bit77553311[2*j+1],
     54                 iBuilder->simd_himask(4), 2, bit77773333[j], bit55551111[j]);
     55    }
     56    if (basisNumbering == cc::BitNumbering::LittleEndian) {
     57        s2p_step(iBuilder, bit44440000[0], bit44440000[1], iBuilder->simd_himask(8), 4, output[4], output[0]);
     58        s2p_step(iBuilder, bit55551111[0], bit55551111[1], iBuilder->simd_himask(8), 4, output[5], output[1]);
     59        s2p_step(iBuilder, bit66662222[0], bit66662222[1], iBuilder->simd_himask(8), 4, output[6], output[2]);
     60        s2p_step(iBuilder, bit77773333[0], bit77773333[1], iBuilder->simd_himask(8), 4, output[7], output[3]);
     61    }
     62    else {
     63        s2p_step(iBuilder, bit44440000[0], bit44440000[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
     64        s2p_step(iBuilder, bit55551111[0], bit55551111[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
     65        s2p_step(iBuilder, bit66662222[0], bit66662222[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
     66        s2p_step(iBuilder, bit77773333[0], bit77773333[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
     67    }
    6768}
    6869
    6970/* Alternative transposition model, but small field width packs are problematic. */
    7071#if 0
    71 void s2p_ideal(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
     72void s2p_ideal(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[], cc::BitNumbering basisNumbering) {
    7273    Value * hi_nybble[4];
    7374    Value * lo_nybble[4];
     
    7879        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
    7980    }
    80     Value * pair01[2];
    81     Value * pair23[2];
    82     Value * pair45[2];
    83     Value * pair67[2];
     81    Value * pair76[2];
     82    Value * pair54[2];
     83    Value * pair32[2];
     84    Value * pair10[2];
    8485    for (unsigned i = 0; i<2; i++) {
    85         pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    86         pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
    87         pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    88         pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
    89     }
    90     output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
    91     output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
    92     output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
    93     output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
    94     output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
    95     output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
    96     output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
    97     output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
     86        pair76[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     87        pair54[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
     88        pair32[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     89        pair10[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
     90    }
     91    if (basisNumbering == cc::BitNumbering::LittleEndian) {
     92        output[7] = iBuilder->hsimd_packh(2, pair76[0], pair76[1]);
     93        output[6] = iBuilder->hsimd_packl(2, pair76[0], pair76[1]);
     94        output[5] = iBuilder->hsimd_packh(2, pair54[0], pair54[1]);
     95        output[4] = iBuilder->hsimd_packl(2, pair54[0], pair54[1]);
     96        output[3] = iBuilder->hsimd_packh(2, pair32[0], pair32[1]);
     97        output[2] = iBuilder->hsimd_packl(2, pair32[0], pair32[1]);
     98        output[1] = iBuilder->hsimd_packh(2, pair10[0], pair10[1]);
     99        output[0] = iBuilder->hsimd_packl(2, pair10[0], pair10[1]);
     100    } else {
     101        output[0] = iBuilder->hsimd_packh(2, pair76[0], pair76[1]);
     102        output[1] = iBuilder->hsimd_packl(2, pair76[0], pair76[1]);
     103        output[2] = iBuilder->hsimd_packh(2, pair54[0], pair54[1]);
     104        output[3] = iBuilder->hsimd_packl(2, pair54[0], pair54[1]);
     105        output[4] = iBuilder->hsimd_packh(2, pair32[0], pair32[1]);
     106        output[5] = iBuilder->hsimd_packl(2, pair32[0], pair32[1]);
     107        output[6] = iBuilder->hsimd_packh(2, pair10[0], pair10[1]);
     108        output[7] = iBuilder->hsimd_packl(2, pair10[0], pair10[1]);
     109    }
    98110}
    99111#endif
    100112   
    101 #if 0
    102 void generateS2P_16Kernel(const std::unique_ptr<KernelBuilder> & iBuilder, Kernel * kBuilder) {
    103     kBuilder->addInputStream(16, "unit_pack");
    104     for(unsigned i = 0; i < 16; i++) {
    105             kBuilder->addOutputStream(1);
    106     }
    107     Value * ptr = kBuilder->getInputStream(0);
    108 
    109     Value * lo[8];
    110     Value * hi[8];
    111     for (unsigned i = 0; i < 8; i++) {
    112         Value * s0 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
    113         Value * s1 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
    114         lo[i] = iBuilder->hsimd_packl(16, s0, s1);
    115         hi[i] = iBuilder->hsimd_packh(16, s0, s1);
    116     }
    117 
    118     Value * output[16];
    119     s2p(iBuilder, lo, output);
    120     s2p(iBuilder, hi, output + 8);
    121     for (unsigned j = 0; j < 16; j++) {
    122         iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
    123     }
    124 }   
    125 #endif
    126 
    127113void S2PKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, Value * const numOfBlocks) {
    128114    BasicBlock * entry = kb->GetInsertBlock();
     
    148134    }
    149135    Value * basisbits[8];
    150     s2p(kb, bytepack, basisbits);
     136    s2p(kb, bytepack, basisbits, mBasisSetNumbering);
    151137    for (unsigned i = 0; i < 8; ++i) {
    152138        kb->storeOutputStreamBlock("basisBits", kb->getInt32(i), blockOffsetPhi, basisbits[i]);
     
    158144    kb->SetInsertPoint(s2pDone);
    159145}
    160 S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, bool aligned, std::string prefix)
    161 : MultiBlockKernel(aligned ? prefix + "s2p" : prefix + "s2p_unaligned",
     146
     147S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, cc::BitNumbering numbering, bool aligned, std::string prefix)
     148    : MultiBlockKernel(aligned ? prefix + "s2p" + cc::numberingSuffix(numbering): prefix + "s2p_unaligned" + cc::numberingSuffix(numbering),
    162149    {Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()}},
    163150    {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
     151  mBasisSetNumbering(numbering),
    164152  mAligned(aligned) {
    165153    if (!aligned) {
     
    168156}
    169157   
    170 S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> & b)
    171 : MultiBlockKernel("s2p_21",
     158S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> & b, cc::BitNumbering numbering)
     159: MultiBlockKernel("s2p_21" + cc::numberingSuffix(numbering),
    172160                   {Binding{b->getStreamSetTy(1, 32), "codeUnitStream", FixedRate(), Principal()}},
    173                    {Binding{b->getStreamSetTy(21, 1), "basisBits"}}, {}, {}, {}) {
    174 }
    175 
     161                   {Binding{b->getStreamSetTy(21, 1), "basisBits"}}, {}, {}, {}),
     162    mBasisSetNumbering(numbering) {
     163}
    176164
    177165void S2P_21Kernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, Value * const numOfBlocks) {
     
    209197    }
    210198    Value * basisbits[24];
    211     s2p(kb, u32byte0, basisbits);
    212     s2p(kb, u32byte1, &basisbits[8]);
    213     s2p(kb, u32byte2, &basisbits[16]);
     199    s2p(kb, u32byte0, basisbits, cc::BitNumbering::LittleEndian);
     200    s2p(kb, u32byte1, &basisbits[8], cc::BitNumbering::LittleEndian);
     201    s2p(kb, u32byte2, &basisbits[16], cc::BitNumbering::LittleEndian);
    214202    for (unsigned i = 0; i < 21; ++i) {
    215 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    216         const unsigned idx = (i/3) * 3 + 7 - (i & 7);
    217         kb->storeOutputStreamBlock("basisBits", kb->getInt32(idx), blockOffsetPhi, basisbits[i]);
    218 #else
    219         kb->storeOutputStreamBlock("basisBits", kb->getInt32(i), blockOffsetPhi, basisbits[i]);
    220 #endif
     203        const unsigned bitIdx = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? i : 21 - i;
     204        kb->storeOutputStreamBlock("basisBits", kb->getInt32(i), blockOffsetPhi, basisbits[bitIdx]);
    221205    }
    222206    Value * nextBlk = kb->CreateAdd(blockOffsetPhi, kb->getSize(1));
     
    245229    }
    246230    for (unsigned bit = 0; bit < mCodeUnitWidth; bit++) {
    247 #ifndef LITTLE_ENDIAN_BIT_NUMBERING
    248         pb->createAssign(pb->createExtract(getOutputStreamVar("basisBits"), pb->getInteger(bit)), streamSet[steps][mCodeUnitWidth-1-bit]);
    249 #else
    250         pb->createAssign(pb->createExtract(getOutputStreamVar("basisBits"), pb->getInteger(bit)), streamSet[steps][bit]);
    251 #endif
    252     }
    253 }
    254 
    255 S2P_PabloKernel::S2P_PabloKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth)
    256 : PabloKernel(b, "s2p_pablo" + std::to_string(codeUnitWidth),
     231        const unsigned bitIndex = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? bit : mCodeUnitWidth-1-bit;
     232        pb->createAssign(pb->createExtract(getOutputStreamVar("basisBits"), pb->getInteger(bitIndex)), streamSet[steps][bit]);
     233    }
     234}
     235
     236S2P_PabloKernel::S2P_PabloKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth, cc::BitNumbering numbering)
     237: PabloKernel(b, "s2p_pablo" + std::to_string(codeUnitWidth) + cc::numberingSuffix(numbering),
    257238    {Binding{b->getStreamSetTy(1, codeUnitWidth), "codeUnitStream"}},
    258239    {Binding{b->getStreamSetTy(codeUnitWidth, 1), "basisBits"}}),
    259   mCodeUnitWidth(codeUnitWidth) {
    260 }
    261 
    262 
    263 S2PByPextKernel::S2PByPextKernel(const std::unique_ptr<kernel::KernelBuilder> &b, std::string prefix)
    264         : BlockOrientedKernel(prefix + "s2pByPext",
     240  mCodeUnitWidth(codeUnitWidth),
     241  mBasisSetNumbering(numbering) {
     242}
     243
     244
     245S2PByPextKernel::S2PByPextKernel(const std::unique_ptr<kernel::KernelBuilder> &b, cc::BitNumbering numbering, std::string prefix)
     246        : BlockOrientedKernel(prefix + "s2pByPext" + cc::numberingSuffix(numbering),
    265247                           {Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()}},
    266                            {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {})
    267 {
    268 
     248                           {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
     249    mBasisSetNumbering(numbering) {
    269250}
    270251
     
    283264        for (size_t iDataIndex = 0; iDataIndex < 8; iDataIndex++) {
    284265            Value* inputData = b->CreateLoad(b->CreateGEP(inputBasePtr, b->getSize(iDataIndex + iBlockIndex * 8)));
    285             for (int iStreamIndex = 0; iStreamIndex < 8; iStreamIndex++) {
    286                 Value* targetMask = b->getInt64(base_mask << iStreamIndex);
     266            for (unsigned iStreamIndex = 0; iStreamIndex < 8; iStreamIndex++) {
     267                const unsigned bitIndex = mBasisSetNumbering == cc::BitNumbering::LittleEndian ? iStreamIndex : 7 - iStreamIndex;
     268                Value* targetMask = b->getInt64(base_mask << bitIndex);
    287269                Value * const outputValue = b->CreateCall(pext, {inputData, targetMask});
    288270                tempValues[iStreamIndex] = b->CreateOr(tempValues[iStreamIndex], b->CreateShl(outputValue, b->getInt64(iDataIndex * 8)));
     
    290272        }
    291273        for (int iStreamIndex = 0; iStreamIndex < 8; iStreamIndex++) {
    292             b->CreateStore(tempValues[iStreamIndex], b->CreateGEP(outputPtrs[7 - iStreamIndex], b->getSize(iBlockIndex)));
    293         }
    294     }
    295 }
    296 }
     274            b->CreateStore(tempValues[iStreamIndex], b->CreateGEP(outputPtrs[iStreamIndex], b->getSize(iBlockIndex)));
     275        }
     276    }
     277}
     278}
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r6069 r6089  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
     
    77
    88#include "kernel.h"  // for KernelBuilder
    9 
     9#include <cc/alphabet.h>
    1010#include <pablo/pablo_kernel.h>
    1111#include <string>
     
    1818class S2PKernel final : public MultiBlockKernel {
    1919public:
    20     S2PKernel(const std::unique_ptr<kernel::KernelBuilder> & b, bool aligned = true, std::string prefix = "");
     20    S2PKernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian, bool aligned = true, std::string prefix = "");
    2121    bool isCachable() const override { return true; }
    2222    bool hasSignature() const override { return false; }
     
    2424    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
    2525private:
     26    cc::BitNumbering mBasisSetNumbering;
    2627    bool mAligned;
    2728};
     
    2930class S2P_21Kernel final : public MultiBlockKernel {
    3031public:
    31     S2P_21Kernel(const std::unique_ptr<kernel::KernelBuilder> & b);
     32    S2P_21Kernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    3233    bool isCachable() const override { return true; }
    3334    bool hasSignature() const override { return false; }
    3435protected:
    3536    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
     37    cc::BitNumbering mBasisSetNumbering;
    3638};
    3739
    3840class S2P_PabloKernel final : public pablo::PabloKernel {
    3941public:
    40     S2P_PabloKernel(const std::unique_ptr<KernelBuilder> & b, unsigned codeUnitWidth = 8);
     42    S2P_PabloKernel(const std::unique_ptr<KernelBuilder> & b, unsigned codeUnitWidth = 8, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian);
    4143    bool isCachable() const override { return true; }
    4244    bool hasSignature() const override { return false; }
     
    4446    void generatePabloMethod() override;
    4547    unsigned mCodeUnitWidth;
     48    cc::BitNumbering mBasisSetNumbering;
    4649};
    4750
     
    4952class S2PByPextKernel final : public BlockOrientedKernel {
    5053public:
    51     S2PByPextKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::string prefix = "");
     54    S2PByPextKernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian, std::string prefix = "");
    5255    bool isCachable() const override { return true; }
    5356    bool hasSignature() const override { return false; }
    5457protected:
    5558    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     59    cc::BitNumbering mBasisSetNumbering;
    5660};
    5761
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6081 r6089  
    402402    sourceK->setInitialArguments({mInputStream, mFileSize});
    403403    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
    404     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
     404    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
    405405    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
    406406}
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6081 r6089  
    66#include <llvm/Support/PrettyStackTrace.h>
    77
     8#include <cc/alphabet.h>
    89#include <cc/cc_compiler.h>
    910
     
    615616
    616617    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    617     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
    618 //    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
     618    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
     619//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, cc::BitNumbering::BigEndian, "a");
    619620    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
    620621
     
    656657
    657658    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    658     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
     659    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
    659660//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
    660661    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r6047 r6089  
    1818
    1919#include <lz4FrameDecoder.h>
     20#include <cc/alphabet.h>
    2021#include <cc/cc_compiler.h>
    2122#include <kernels/cc_kernel.h>
     
    8485
    8586    // Input stream is not aligned due to the offset.
    86     Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ false);
     87    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ false);
    8788    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    8889   
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r6047 r6089  
    66
    77#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
     8#include <cc/alphabet.h>
    89#include <cc/cc_compiler.h>                        // for CC_Compiler
    910#include <kernels/deletion.h>                      // for DeletionKernel
     
    8485    Var * error_mask = main.createVar("error_mask", zeroes);
    8586
    86     cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits);
     87    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits, cc::BitNumbering::BigEndian);
    8788
    8889    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
     
    291292    StreamSetBuffer * BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferSize);
    292293
    293     Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
     294    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
    294295    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    295296
     
    329330        Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 16, 1, 4);
    330331        pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3}, {u16bits});
    331         Kernel * p2sk = pxDriver.addKernelInstance<P2S16Kernel>(iBuilder);
     332        Kernel * p2sk = pxDriver.addKernelInstance<P2S16Kernel>(iBuilder, cc::BitNumbering::BigEndian);
    332333        pxDriver.makeKernelCall(p2sk, {u16bits}, {u16bytes});
    333334    } else {
     
    335336        Kernel * delK = pxDriver.addKernelInstance<FieldCompressKernel>(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
    336337        pxDriver.makeKernelCall(delK, {u8bits, DelMask}, {u16bits, DeletionCounts});
    337         Kernel * p2sk = pxDriver.addKernelInstance<P2S16KernelWithCompressedOutput>(iBuilder);
     338        Kernel * p2sk = pxDriver.addKernelInstance<P2S16KernelWithCompressedOutput>(iBuilder, cc::BitNumbering::BigEndian);
    338339        pxDriver.makeKernelCall(p2sk, {u16bits, DeletionCounts}, {u16bytes});
    339340    }
Note: See TracChangeset for help on using the changeset viewer.