Changeset 6199


Ignore:
Timestamp:
Nov 12, 2018, 5:19:32 PM (5 months ago)
Author:
cameron
Message:

Fix FieldCompress? kernels to have user settable field width; update to use extraction method

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6189 r6199  
    106106        kb->storeOutputStreamBlock("outputStreamSet", kb->getInt32(j), blockOffsetPhi, output);
    107107    }
    108 #ifndef STREAM_COMPRESS_USING_EXTRACTION_MASK
    109     Value * unitCount = kb->simd_popcount(mCompressFieldWidth, extractionMask);
    110     kb->storeOutputStreamBlock("unitCounts", kb->getInt32(0), blockOffsetPhi, kb->bitCast(unitCount));
    111 #endif
    112108    Value * nextBlk = kb->CreateAdd(blockOffsetPhi, kb->getSize(1));
    113109    blockOffsetPhi->addIncoming(nextBlk, processBlock);
     
    117113}
    118114
    119 #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    120 FieldCompressKernel::FieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b
     115FieldCompressKernel::FieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw
    121116                                         , StreamSet * inputStreamSet, StreamSet * extractionMask
    122117                                         , StreamSet * outputStreamSet)
    123 : MultiBlockKernel("fieldCompress" + std::to_string(b->getBitBlockWidth() / inputStreamSet->getNumElements()) + "_" + std::to_string(inputStreamSet->getNumElements()),
     118: MultiBlockKernel("fieldCompress" + std::to_string(fw) + "_" + std::to_string(inputStreamSet->getNumElements()),
    124119// inputs
    125120{Binding{"inputStreamSet", inputStreamSet},
     
    128123{Binding{"outputStreamSet", outputStreamSet}},
    129124{}, {}, {})
    130 , mCompressFieldWidth(b->getBitBlockWidth() / inputStreamSet->getNumElements())
     125, mCompressFieldWidth(fw)
    131126, mStreamCount(inputStreamSet->getNumElements()) {
    132127
    133128}
    134 #else
    135 FieldCompressKernel::FieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b
    136                                          , StreamSet * inputStreamSet, StreamSet * extractionMask
    137                                          , StreamSet * outputStreamSet, StreamSet * unitCounts)
    138 : MultiBlockKernel("fieldCompress" + std::to_string(b->getBitBlockWidth() / inputStreamSet->getNumElements()) + "_" + std::to_string(inputStreamSet->getNumElements()),
    139 // inputs
    140 {Binding{"inputStreamSet", inputStreamSet},
    141 Binding{"extractionMask", extractionMask}},
    142 // outputs
    143 {Binding{"outputStreamSet", outputStreamSet},
    144 Binding{"unitCounts", unitCounts, FixedRate(), RoundUpTo(b->getBitBlockWidth())}},
    145 {}, {}, {})
    146 , mCompressFieldWidth(b->getBitBlockWidth() / inputStreamSet->getNumElements())
    147 , mStreamCount(inputStreamSet->getNumElements()) {
    148 
    149 }
    150 #endif
    151129
    152130void PEXTFieldCompressKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfBlocks) {
     
    154132    Type * fieldPtrTy = PointerType::get(fieldTy, 0);
    155133    Constant * PEXT_func = nullptr;
    156     Constant * popc_func = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, fieldTy);
    157134    if (mPEXTWidth == 64) {
    158135        PEXT_func = Intrinsic::getDeclaration(kb->getModule(), Intrinsic::x86_bmi_pext_64);
     
    172149    Value * extractionMaskPtr = kb->getInputStreamBlockPtr("extractionMask", ZERO, blockOffsetPhi);
    173150    extractionMaskPtr = kb->CreatePointerCast(extractionMaskPtr, fieldPtrTy);
    174 #ifndef STREAM_COMPRESS_USING_EXTRACTION_MASK
    175     Value * unitCountPtr = kb->getOutputStreamBlockPtr("unitCounts", ZERO, blockOffsetPhi);
    176     unitCountPtr = kb->CreatePointerCast(unitCountPtr, fieldPtrTy);
    177151    for (unsigned i = 0; i < fieldsPerBlock; i++) {
    178152        mask[i] = kb->CreateLoad(kb->CreateGEP(extractionMaskPtr, kb->getInt32(i)));
    179         Value * popc = kb->CreateCall(popc_func, mask[i]);
    180         kb->CreateStore(popc, kb->CreateGEP(unitCountPtr, kb->getInt32(i)));
    181     }
    182 #else
    183     for (unsigned i = 0; i < fieldsPerBlock; i++) {
    184         mask[i] = kb->CreateLoad(kb->CreateGEP(extractionMaskPtr, kb->getInt32(i)));
    185     }
    186 #endif
     153    }
    187154    for (unsigned j = 0; j < mStreamCount; ++j) {
    188155        Value * inputPtr = kb->getInputStreamBlockPtr("inputStreamSet", kb->getInt32(j), blockOffsetPhi);
     
    207174                   {Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"},
    208175                       Binding{kb->getStreamSetTy(), "extractionMask"}},
    209 #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    210176                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
    211 #else
    212                    {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"},
    213                        Binding{kb->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(kb->getBitBlockWidth())}},
    214 #endif
    215177                   {}, {}, {})
    216178, mPEXTWidth(fieldWidth)
     
    221183StreamCompressKernel::StreamCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b
    222184                                           , StreamSet * source
    223                                            #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    224185                                           , StreamSet * extractionMask
    225                                            #else
    226                                            , StreamSet * unitCounts
    227                                            #endif
    228                                            , StreamSet * compresedOutput
     186                                           , StreamSet * compressedOutput
    229187                                           , const unsigned FieldWidth)
    230188: MultiBlockKernel("streamCompress" + std::to_string(FieldWidth) + "_" + std::to_string(source->getNumElements()),
    231189{Binding{"sourceStreamSet", source},
    232 #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    233190Binding{"extractionMask", extractionMask}},
    234 #else
    235 Binding{"unitCounts", unitCounts}},
    236 #endif
    237 {Binding{"compressedOutput", compresedOutput, BoundedRate(0, 1)}},
     191{Binding{"compressedOutput", compressedOutput, BoundedRate(0, 1)}},
    238192{}, {}, {})
    239193, mCompressedFieldWidth(FieldWidth)
     
    286240        pendingDataPhi[i]->addIncoming(pendingData[i], entry);
    287241    }
    288     #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    289     Value * fieldPopCounts = b->simd_popcount(fw, b->loadInputStreamBlock("extractionMask", ZERO, blockOffsetPhi));
    290     #else
    291     Value * fieldPopCounts = b->loadInputStreamBlock("unitCounts", ZERO, blockOffsetPhi);
    292     #endif
     242    Value * fieldPopCounts = b->simd_popcount(mCompressedFieldWidth, b->loadInputStreamBlock("extractionMask", ZERO, blockOffsetPhi));
    293243    // For each field determine the (partial) sum popcount of all fields up to and
    294244    // including the current field.
     
    317267    Value * pendingSum = b->simd_add(mCompressedFieldWidth, partialSum, splatPending);
    318268    Value * fieldNo = b->simd_srli(mCompressedFieldWidth, pendingSum, std::log2(mCompressedFieldWidth));
    319   //
    320269    // Now process the input data block of each stream in the input stream set.
    321270    //
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r6189 r6199  
    3636    const unsigned mStreamCount;
    3737};
    38 
     38   
     39// Compress within fields of size fw.
    3940class FieldCompressKernel final : public MultiBlockKernel {
    4041public:
    41     #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    42     FieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * inputStreamSet, StreamSet * extractionMask, StreamSet * outputStreamSet);
    43     #else
    44     FieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * inputStreamSet, StreamSet * extractionMask, StreamSet * outputStreamSet, StreamSet * unitCounts);
    45     #endif
     42    FieldCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, StreamSet * inputStreamSet, StreamSet * extractionMask, StreamSet * outputStreamSet);
    4643    bool isCachable() const override { return true; }
    4744    bool hasSignature() const override { return false; }
     
    7269    StreamCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b
    7370                         , StreamSet * source
    74                          #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    7571                         , StreamSet * extractionMask
    76                          #else
    77                          , StreamSet * unitCounts
    78                          #endif
    79                          , StreamSet * compresedOutput
     72                         , StreamSet * compressedOutput
    8073                         , const unsigned FieldWidth = sizeof(size_t) * 8);
    8174
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r6189 r6199  
    169169    Value * lo_bytes[8];
    170170    p2s(b, lo_input, lo_bytes, mBasisSetNumbering);
    171 
    172     Value * const fieldCounts = b->loadInputStreamBlock("fieldCounts", b->getInt32(0));
     171    Value * const extractionMask = b->loadInputStreamBlock("extractionMask", b->getInt32(0));
     172    Value * const fieldCounts = b->simd_popcount(unitsPerRegister, extractionMask);
    173173    Value * unitCounts = partial_sum_popcounts(b, unitsPerRegister, fieldCounts);
    174    
    175174    Value * outputPtr = b->getOutputStreamBlockPtr("i16Stream", b->getInt32(0));
    176175    outputPtr = b->CreatePointerCast(outputPtr, int16PtrTy);
     
    228227P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
    229228: BlockOrientedKernel("p2s_compress" + cc::numberingSuffix(numbering),
    230 {Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "fieldCounts"}},
     229{Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "extractionMask"}},
    231230{Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
    232231{}, {}, {}),
     
    245244
    246245P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> &,
    247                                                                  StreamSet * basisBits, StreamSet * fieldCounts, StreamSet * i16Stream,
     246                                                                 StreamSet * basisBits, StreamSet * extractionMask, StreamSet * i16Stream,
    248247                                                                 cc::BitNumbering numbering)
    249248: BlockOrientedKernel("p2s_16_compress" + cc::numberingSuffix(numbering),
    250249{Binding{"basisBits", basisBits},
    251 Binding{"fieldCounts", fieldCounts}},
     250Binding{"extractionMask", extractionMask}},
    252251{Binding{"i16Stream", i16Stream, BoundedRate(0, 1)}},
    253252{}, {}, {}),
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r6184 r6199  
    88#include <cc/alphabet.h>
    99#include "kernel.h"  // for KernelBuilder
    10 
    1110namespace IDISA { class IDISA_Builder; }
    1211
  • icGREP/icgrep-devel/icgrep/u32u8.cpp

    r6189 r6199  
    9797{Binding{"basis", u32basis}},
    9898{Binding{"fieldDepositMask", u8fieldMask, FixedRate(4)},
    99 #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    10099Binding{"extractionMask", u8unitCounts, FixedRate(4)}},
    101 #else
    102 Binding{"codeUnitCounts", u8unitCounts, FixedRate(4), RoundUpTo(b->getBitBlockWidth())}},
    103 #endif
    104100{}, {}, {Binding{b->getBitBlockType(), "EOFmask"}})
    105101, mDepositFieldWidth(depositFieldWidth) {
     
    152148        Value * deposit_mask = b->simd_pext(mDepositFieldWidth, mask1000, extraction_mask[j]);
    153149        b->storeOutputStreamBlock("fieldDepositMask", b->getSize(0), b->getSize(j), deposit_mask);
    154 #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    155150        b->storeOutputStreamBlock("extractionMask", b->getSize(0), b->getSize(j), extraction_mask[j]);
    156 #else
    157         Value * unit_counts = b->simd_popcount(mDepositFieldWidth, extraction_mask[j]);
    158         b->storeOutputStreamBlock("codeUnitCounts", b->getSize(0), b->getSize(j), unit_counts);
    159 #endif
    160151    }
    161152}
     
    325316
    326317    // Calculate the u8final deposit mask.
    327     #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    328318    StreamSet * const extractionMask = P->CreateStreamSet();
    329319    P->CreateKernelCall<UTF8fieldDepositMask>(u32basis, u8fieldMask, extractionMask);
    330320    P->CreateKernelCall<StreamCompressKernel>(u8fieldMask, extractionMask, u8final);
    331     #else
    332     StreamSet * const u8unitCounts = P->CreateStreamSet();
    333     P->CreateKernelCall<UTF8fieldDepositMask>(u32basis, u8fieldMask, u8unitCounts);
    334     P->CreateKernelCall<StreamCompressKernel>(u8fieldMask, u8unitCounts, u8final);
    335     #endif
    336321
    337322    P->CreateKernelCall<UTF8_DepositMasks>(u8final, u8initial, u8mask12_17, u8mask6_11);
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r6184 r6199  
    303303        P->CreateKernelCall<P2S16Kernel>(u16bits, u16bytes, cc::BitNumbering::BigEndian);
    304304    } else {
    305         StreamSet * DeletionCounts = P->CreateStreamSet();
    306         P->CreateKernelCall<FieldCompressKernel>(u8bits, selectors, u16bits, DeletionCounts);
    307         P->CreateKernelCall<P2S16KernelWithCompressedOutput>(u16bits, DeletionCounts, u16bytes, cc::BitNumbering::BigEndian);
     305        P->CreateKernelCall<FieldCompressKernel>(b->getBitBlockWidth()/16, u8bits, selectors, u16bits);
     306        P->CreateKernelCall<P2S16KernelWithCompressedOutput>(u16bits, selectors, u16bytes, cc::BitNumbering::BigEndian);
    308307    }
    309308
Note: See TracChangeset for help on using the changeset viewer.