Changeset 6135
- Timestamp:
- Jul 30, 2018, 1:36:48 AM (7 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 6 added
- 2 deleted
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/CMakeLists.txt
r6133 r6135 103 103 add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp) 104 104 add_library(GrepEngine ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp) 105 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/aio/lz4_bytestream_aio.cpp kernels/lz4/aio/lz4_swizzled_aio.cpp kernels/lz4/aio/lz4_parallel_bytestream_aio.cpp kernels/lz4/aio/lz4_sequential_aio_base.cpp kernels/lz4/aio/lz4_ sequential_aio_base.h kernels/lz4/aio/lz4_bitstream_aio.cpp kernels/lz4/aio/lz4_bitstream_aio.h kernels/lz4/aio/lz4_i4_bytestream_aio.cpp kernels/lz4/aio/lz4_i4_bytestream_aio.h)105 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/aio/lz4_bytestream_aio.cpp kernels/lz4/aio/lz4_swizzled_aio.cpp kernels/lz4/aio/lz4_parallel_bytestream_aio.cpp kernels/lz4/aio/lz4_sequential_aio_base.cpp kernels/lz4/aio/lz4_bitstream_aio.cpp kernels/lz4/aio/lz4_twist_aio.cpp kernels/lz4/aio/twist_kernel.cpp kernels/lz4/aio/untwist_kernel.cpp) 106 106 add_library(LZParabix_Lib lzparabix/LZParabixGenerator.cpp kernels/lzparabix/decoder/LZParabixBlockDecoder.cpp kernels/lzparabix/decoder/LZParabixAioBaseKernel.cpp lzparabix/LZParabixGrepGenerator.cpp kernels/fake_stream_generating_kernel.cpp kernels/lzparabix/encoder/LZParabixCompressionKernel.cpp kernels/lzparabix/decoder/LZParabixLiteralDecoderKernel.cpp kernels/lzparabix/decoder/LZParabixBitStreamAioKernel.cpp kernels/lzparabix/decoder/LZParabixSwizzledAioKernel.cpp) 107 107 -
icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_sequential_aio_base.cpp
r6132 r6135 43 43 44 44 Binding{b->getInt1Ty(), "hasCallInitialization"} 45 46 47 45 }), 48 46 mBlockSize(blockSize) { -
icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_sequential_aio_base.h
r6132 r6135 29 29 const static unsigned int ACCELERATION_WIDTH = 64; 30 30 const unsigned mBlockSize; 31 const unsigned COPY_FW = 64; 32 const unsigned BYTE_WIDTH = 8; 31 33 32 34 // ---- Kernel Methods -
icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp
r6132 r6135 47 47 48 48 49 P2S4StreamByPDEP::P2S4StreamByPDEP(const std::unique_ptr<kernel::KernelBuilder> & b) 50 : BlockOrientedKernel("P2S4StreamByPDEP", 51 {Binding{b->getStreamSetTy(4, 1), "basisBits"}}, 52 {Binding{b->getStreamSetTy(1, 4), "byteStream"}}, 53 {}, {}, {}) 54 { 55 } 56 57 58 void P2S4StreamByPDEP::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) { 59 Function * PDEPFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_64); 60 uint64_t pdepBaseMask = 0x1111111111111111; 61 62 Value* inputBlocks[4]; 63 64 for (unsigned i = 0; i < 4; i++) { 65 inputBlocks[i] = b->loadInputStreamBlock("basisBits", b->getInt32(i)); 66 } 67 Value* outputBasePtr = b->CreatePointerCast(b->getOutputStreamBlockPtr("byteStream", b->getSize(0)), b->getInt64Ty()->getPointerTo()); 68 69 for (unsigned i = 0; i < b->getBitBlockWidth() / 64; i++) { 70 Value* currentInput[4]; 71 for (unsigned iIndex = 0; iIndex < 4; iIndex++) { 72 currentInput[iIndex] = b->CreateExtractElement(inputBlocks[iIndex], i); 73 } 74 75 for (unsigned j = 0; j < 4; j++) { 76 unsigned outputIndex = i * 4 + j; 77 Value* retI64 = b->getInt64(0); 78 for (unsigned k = 0; k < 4; k++) { 79 Value* newBits = b->CreateCall( 80 PDEPFunc,{ 81 b->CreateLShr(currentInput[k], b->getInt64(j * 16)), 82 b->getInt64(pdepBaseMask << k) 83 } 84 ); 85 retI64 = b->CreateOr(retI64, newBits); 86 } 87 b->CreateStore(retI64, b->CreateGEP(outputBasePtr, b->getInt32(outputIndex))); 88 } 89 } 90 91 // for (unsigned i = 0; i < 4; i++) { 92 // b->CallPrintRegister("input" + std::to_string(i), inputBlocks[i]); 93 // } 94 // 95 // Value* outputBaseBlockPtr = b->CreatePointerCast(b->getOutputStreamBlockPtr("byteStream", b->getSize(0)), b->getBitBlockType()->getPointerTo()); 96 // for (unsigned i = 0; i < 4; i++) { 97 // b->CallPrintRegister("output" + std::to_string(i), b->CreateLoad(b->CreateGEP(outputBaseBlockPtr, b->getInt32(i)))); 98 // } 99 100 } 49 101 50 102 51 … … 269 218 mBasisSetNumbering(numbering) { 270 219 } 271 272 273 } 220 221 } -
icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h
r6132 r6135 12 12 13 13 namespace kernel { 14 15 class P2S4StreamByPDEP final : public BlockOrientedKernel{16 public:17 P2S4StreamByPDEP(const std::unique_ptr<kernel::KernelBuilder> & b);18 private:19 void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;20 };21 14 22 15 class P2SKernel final : public BlockOrientedKernel { -
icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp
r6132 r6135 17 17 18 18 const int PACK_LANES = 2; 19 20 19 void s2p_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) { 21 20 Value * t0 = nullptr; … … 31 30 t0 = iBuilder->hsimd_packh(16, s0, s1); 32 31 t1 = iBuilder->hsimd_packl(16, s0, s1); 33 }34 if (shift == 1) {35 // iBuilder->CallPrintRegister("t0", t0);36 // iBuilder->CallPrintRegister("t1", t1);37 32 } 38 33 … … 56 51 s2p_step(iBuilder, bit2200[0], bit2200[1], 57 52 iBuilder->simd_himask(4), 2, out[2], out[0]); 58 for (unsigned i = 0; i < 4; i++) {59 // iBuilder->CallPrintRegister("input" + std::to_string(i), input[i]);60 }61 for (unsigned i = 0; i < 4; i++) {62 // iBuilder->CallPrintRegister("out" + std::to_string(i), out[i]);63 }64 53 } 65 54 … … 68 57 Value * bit66442200[4]; 69 58 Value * bit77553311[4]; 70 // iBuilder->CallPrintRegister("himask2", iBuilder->simd_himask(2));71 // iBuilder->CallPrintRegister("himask4", iBuilder->simd_himask(4));72 // iBuilder->CallPrintRegister("himask8", iBuilder->simd_himask(8));73 59 74 60 for (unsigned i = 0; i < 4; i++) { 75 61 Value * s0 = input[2 * i]; 76 62 Value * s1 = input[2 * i + 1]; 77 // iBuilder->CallPrintRegister("s0_" + std::to_string(2 * i), s0);78 // iBuilder->CallPrintRegister("s1_" + std::to_string(2 * i + 1), s1);79 63 s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit77553311[i], bit66442200[i]); 80 // iBuilder->CallPrintRegister("bit77553311", bit77553311[i]);81 // iBuilder->CallPrintRegister("bit66442200", bit66442200[i]);82 64 } 83 65 Value * bit44440000[2]; … … 102 84 s2p_step(iBuilder, bit66662222[0], bit66662222[1], iBuilder->simd_himask(8), 4, output[1], output[5]); 103 85 s2p_step(iBuilder, bit77773333[0], bit77773333[1], iBuilder->simd_himask(8), 4, output[0], output[4]); 104 }105 106 for (unsigned i = 0; i < 8; i++) {107 // iBuilder->CallPrintRegister("input" + std::to_string(i), input[i]);108 }109 for (unsigned i = 0; i < 8; i++) {110 // iBuilder->CallPrintRegister("output" + std::to_string(i), output[i]);111 86 } 112 87 } … … 155 130 #endif 156 131 157 158 S2P4StreamByPEXTKernel::S2P4StreamByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> & b)159 :BlockOrientedKernel("s2p4StreamByPEXT",160 {161 Binding{b->getStreamSetTy(1, 4), "byteStream", FixedRate(), Principal()}162 },163 {164 Binding{b->getStreamSetTy(4, 1), "basisBits"}165 }, {}, {}, {}) {166 167 }168 169 void S2P4StreamByPEXTKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {170 Function* PEXT_func = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);171 uint64_t pextBaseMask = 0x1111111111111111;172 173 Value* inputBasePtr = b->CreatePointerCast(b->getInputStreamBlockPtr("byteStream", b->getSize(0)), b->getInt64Ty()->getPointerTo());174 175 Value* outputBlocks[4];176 for (unsigned i = 0; i < 4; i++) {177 outputBlocks[i] = ConstantVector::getNullValue(b->getBitBlockType());178 }179 180 for (unsigned i = 0; i < b->getBitBlockWidth() / 64; i++) {181 Value* currentOutput[4];182 for (unsigned iIndex = 0; iIndex < 4; iIndex++) {183 currentOutput[iIndex] = b->getInt64(0);184 }185 186 for (unsigned j = 0; j < 4; j++) {187 unsigned inputIndex = i * 4 + j;188 189 Value* currentInput = b->CreateLoad(b->CreateGEP(inputBasePtr, b->getInt32(inputIndex)));190 for (unsigned k = 0; k < 4; k++) {191 192 Value* newBits = b->CreateCall(193 PEXT_func,{194 currentInput,195 b->getInt64(pextBaseMask << k)196 }197 );198 199 currentOutput[k] = b->CreateOr(currentOutput[k], b->CreateShl(newBits, 16 * j));200 }201 }202 203 for (unsigned iIndex = 0; iIndex < 4; iIndex++) {204 outputBlocks[iIndex] = b->CreateInsertElement(outputBlocks[iIndex], currentOutput[iIndex], i);205 }206 }207 208 for (unsigned i = 0; i < 4; i++) {209 b->storeOutputStreamBlock("basisBits", b->getInt32(i), outputBlocks[i]);210 // b->CallPrintRegister("outputBlocks" + std::to_string(i), outputBlocks[i]);211 }212 }213 214 132 void S2PKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, Value * const numOfBlocks) { 215 133 BasicBlock * entry = kb->GetInsertBlock(); … … 345 263 } 346 264 347 } 265 266 } -
icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h
r6132 r6135 15 15 16 16 namespace kernel { 17 18 class S2P4StreamByPEXTKernel final : public BlockOrientedKernel{19 public:20 S2P4StreamByPEXTKernel(const std::unique_ptr<kernel::KernelBuilder> & b);21 protected:22 void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) override;23 };24 17 25 18 class S2PKernel final : public MultiBlockKernel { -
icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp
r6132 r6135 27 27 #include <kernels/lz4/aio/lz4_swizzled_aio.h> 28 28 #include <kernels/lz4/aio/lz4_bitstream_aio.h> 29 #include <kernels/lz4/aio/lz4_ i4_bytestream_aio.h>29 #include <kernels/lz4/aio/lz4_twist_aio.h> 30 30 #include <kernels/bitstream_pdep_kernel.h> 31 31 #include <kernels/lz4/lz4_bitstream_not_kernel.h> 32 #include <kernels/lz4/aio/twist_kernel.h> 33 #include <kernels/lz4/aio/untwist_kernel.h> 32 34 33 35 namespace re { class CC; } … … 428 430 429 431 430 if (compressedBitStreams[0]->getNumOfStreams() == 4) { 431 StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder)); 432 kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::P2S4StreamByPDEP>(iBuilder); 432 size_t numOfStreams = compressedBitStreams[0]->getNumOfStreams(); 433 434 // 1, 2, 4, 8 435 436 if (numOfStreams <= 2) { 437 StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder)); 438 kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 2); 433 439 mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses}); 434 440 435 436 StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder)); 437 Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4I4ByteStreamAioKernel>(iBuilder); 441 StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder)); 442 Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 2); 438 443 lz4I4AioK->setInitialArguments({mFileSize}); 439 444 mGrepDriver->makeKernelCall(lz4I4AioK, { … … 450 455 }); 451 456 452 StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder)); 453 kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::S2P4StreamByPEXTKernel>(iBuilder); 457 StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder)); 458 kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 2); 459 mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses}); 460 return {untwistedCharClasses}; 461 } 462 463 if (numOfStreams <= 4) { 464 StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder)); 465 kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 4); 466 mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses}); 467 468 469 StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder)); 470 471 Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 4); 472 lz4I4AioK->setInitialArguments({mFileSize}); 473 mGrepDriver->makeKernelCall(lz4I4AioK, { 474 mCompressedByteStream, 475 476 // Block Data 477 BlockData_IsCompressed, 478 BlockData_BlockStart, 479 BlockData_BlockEnd, 480 481 twistedCharClasses 482 }, { 483 uncompressedTwistedCharClasses 484 }); 485 486 StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder)); 487 kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 4); 454 488 mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses}); 455 489 return {untwistedCharClasses}; -
icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp
r6133 r6135 22 22 #include <kernels/lz4/lz4_bitstream_match_copy_kernel.h> 23 23 #include <kernels/lz4/lz4_bitstream_not_kernel.h> 24 #include <kernels/lz4/aio/lz4_i4_bytestream_aio.h>25 24 #include <kernels/fake_stream_generating_kernel.h> 26 25 #include <kernels/bitstream_pdep_kernel.h> … … 401 400 402 401 StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb)); 403 Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8); 404 mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits}); 402 StreamSetBuffer * u8NoFinalStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), this->getInputBufferBlocks(idb), 1); 403 404 Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, std::vector<unsigned>({8, 1})); 405 mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits, u8NoFinalStream}); 405 406 406 407 StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb)); … … 409 410 410 411 412 externalStreamNames.push_back("UTF8_nonfinal"); 413 411 414 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian); 412 mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});415 mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, u8NoFinalStream, decompressedCharClasses}, {MatchResults}); 413 416 MatchResultsBufs[0] = MatchResults; 414 417 … … 774 777 775 778 // GeneratePipeline 776 this->generateLoadByteStream(iBuilder);777 //this->generateLoadByteStreamAndBitStream(iBuilder);779 // this->generateLoadByteStream(iBuilder); 780 this->generateLoadByteStreamAndBitStream(iBuilder); 778 781 779 782 parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder); -
icGREP/icgrep-devel/icgrep/lzparabix/LZParabixGrepGenerator.cpp
r6133 r6135 188 188 StreamSetBuffer * const LiteralBitStream = this->extractLiteralBitStream(idb); 189 189 190 bool allCcByteLength = re::isAllCcByteLength(mREs[0]);191 192 190 mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets); 193 191 mREs[0] = transformCCs(mpx.get(), mREs[0]); … … 208 206 StreamSetBuffer * fakeMatchCopiedBits = nullptr; 209 207 208 209 bool allCcByteLength = re::isAllCcByteLength(mREs[0]); 210 210 if (allCcByteLength) { 211 211 auto ret = this->generateBitStreamDecompression(idb, {CharClasses});
Note: See TracChangeset
for help on using the changeset viewer.