source: icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bytestream_aio.cpp @ 6135

Last change on this file since 6135 was 6132, checked in by xwa163, 11 months ago
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
File size: 7.2 KB
Line 
1
2#include "lz4_bytestream_aio.h"
3
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15
16namespace kernel{
17    std::string LZ4ByteStreamAioKernel::getCopyByteStreamName() {
18        return mCopyOtherByteStream ? "targetByteStream" : "byteStream";
19    }
20
21    LZ4ByteStreamAioKernel::LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream, unsigned blockSize)
22            : LZ4SequentialAioBaseKernel(b, "LZ4ByteStreamAioKernel", blockSize),
23              mCopyOtherByteStream(copyOtherByteStream) {
24        mStreamSetOutputs.push_back(Binding{b->getStreamSetTy(1, 8), "outputStream", BoundedRate(0, 1)});
25        this->addScalar(b->getInt8PtrTy(), "temporaryInputPtr");
26        if (copyOtherByteStream) {
27            mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 8), "targetByteStream", RateEqualTo("byteStream")});
28        }
29    }
30
31    void LZ4ByteStreamAioKernel::doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
32                                               llvm::Value *literalLength, llvm::Value* blockStart) {
33        unsigned fw = 64;
34        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
35
36        Value* inputBytePtr = b->getScalarField("temporaryInputPtr");
37        inputBytePtr = b->CreateGEP(inputBytePtr, b->CreateSub(literalStart, blockStart));
38
39        Value* inputPtr = b->CreatePointerCast(inputBytePtr, INT_FW_PTR);
40
41        Value* outputPos = b->getScalarField("outputPos");
42        Value* outputBufferSize = b->getCapacity("outputStream");
43        Value* outputPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
44        outputPtr = b->CreatePointerCast(outputPtr, INT_FW_PTR);
45
46        BasicBlock* entryBlock = b->GetInsertBlock();
47        BasicBlock* literalCopyCon = b->CreateBasicBlock("literalCopyCon");
48        BasicBlock* literalCopyBody = b->CreateBasicBlock("literalCopyBody");
49        BasicBlock* literalCopyExit = b->CreateBasicBlock("literalCopyExit");
50
51        b->CreateBr(literalCopyCon);
52
53        // ---- literalCopyCon
54        b->SetInsertPoint(literalCopyCon);
55        PHINode* phiOutputPtr = b->CreatePHI(outputPtr->getType(), 2);
56        phiOutputPtr->addIncoming(outputPtr, entryBlock);
57        PHINode* phiInputPtr = b->CreatePHI(inputPtr->getType(), 2);
58        phiInputPtr->addIncoming(inputPtr, entryBlock);
59        PHINode* phiCopiedLength = b->CreatePHI(literalLength->getType(), 2);
60        phiCopiedLength->addIncoming(b->getSize(0), entryBlock);
61        b->CreateCondBr(b->CreateICmpULT(phiCopiedLength, literalLength), literalCopyBody, literalCopyExit);
62
63        // ---- literalCopyBody
64        b->SetInsertPoint(literalCopyBody);
65        // Always copy fw bits to improve performance
66        b->CreateStore(b->CreateLoad(phiInputPtr), phiOutputPtr);
67
68        phiInputPtr->addIncoming(b->CreateGEP(phiInputPtr, b->getSize(1)), b->GetInsertBlock());
69        phiOutputPtr->addIncoming(b->CreateGEP(phiOutputPtr, b->getSize(1)), b->GetInsertBlock());
70        phiCopiedLength->addIncoming(b->CreateAdd(phiCopiedLength, b->getSize(fw / 8)), b->GetInsertBlock());
71        b->CreateBr(literalCopyCon);
72
73        // ---- literalCopyExit
74        b->SetInsertPoint(literalCopyExit);
75        b->setScalarField("outputPos", b->CreateAdd(outputPos, literalLength));
76    }
77
78    void LZ4ByteStreamAioKernel::doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
79                                             llvm::Value *matchLength) {
80        unsigned fw = 64;
81        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
82
83        BasicBlock* entryBlock = b->GetInsertBlock();
84
85        Value* outputPos = b->getScalarField("outputPos");
86        Value* outputBufferSize = b->getCapacity("outputStream");
87
88        Value* copyToPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
89        Value* copyFromPtr = b->getRawOutputPointer("outputStream", b->CreateURem(b->CreateSub(outputPos, matchOffset), outputBufferSize));
90
91        BasicBlock* matchCopyCon = b->CreateBasicBlock("matchCopyCon");
92        BasicBlock* matchCopyBody = b->CreateBasicBlock("matchCopyBody");
93        BasicBlock* matchCopyExit = b->CreateBasicBlock("matchCopyExit");
94
95        b->CreateBr(matchCopyCon);
96
97        // ---- matchCopyCon
98        b->SetInsertPoint(matchCopyCon);
99        PHINode* phiFromPtr = b->CreatePHI(b->getInt8PtrTy(), 2);
100        phiFromPtr->addIncoming(copyFromPtr, entryBlock);
101        PHINode* phiToPtr = b->CreatePHI(b->getInt8PtrTy(), 2);
102        phiToPtr->addIncoming(copyToPtr, entryBlock);
103        PHINode* phiCopiedSize = b->CreatePHI(b->getSizeTy(), 2);
104        phiCopiedSize->addIncoming(b->getSize(0), entryBlock);
105
106        b->CreateCondBr(b->CreateICmpULT(phiCopiedSize, matchLength), matchCopyBody, matchCopyExit);
107
108        // ---- matchCopyBody
109        b->SetInsertPoint(matchCopyBody);
110        b->CreateStore(
111                b->CreateLoad(b->CreatePointerCast(phiFromPtr, INT_FW_PTR)),
112        b->CreatePointerCast(phiToPtr, INT_FW_PTR)
113        );
114
115        Value* copySize = b->CreateUMin(matchOffset, b->getSize(fw / 8));
116        phiFromPtr->addIncoming(b->CreateGEP(phiFromPtr, copySize), b->GetInsertBlock());
117        phiToPtr->addIncoming(b->CreateGEP(phiToPtr, copySize), b->GetInsertBlock());
118        phiCopiedSize->addIncoming(b->CreateAdd(phiCopiedSize, copySize), b->GetInsertBlock());
119        b->CreateBr(matchCopyCon);
120
121        // ---- matchCopyExit
122        b->SetInsertPoint(matchCopyExit);
123        b->setScalarField("outputPos", b->CreateAdd(outputPos, matchLength));
124    }
125
126    void LZ4ByteStreamAioKernel::setProducedOutputItemCount(const std::unique_ptr<KernelBuilder> &b, llvm::Value* produced) {
127        b->setProducedItemCount("outputStream", produced);
128    }
129
130    void LZ4ByteStreamAioKernel::initializationMethod(const std::unique_ptr<KernelBuilder> &b) {
131        b->setScalarField("temporaryInputPtr", b->CreateMalloc(b->getSize(mBlockSize)));
132    }
133
134    void LZ4ByteStreamAioKernel::prepareProcessBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockStart, llvm::Value* blockEnd) {
135        Value* rawInputPtr = b->CreatePointerCast(b->getRawInputPointer(this->getCopyByteStreamName(), b->getSize(0)), b->getInt8PtrTy());
136        Value* inputCapacity = b->getCapacity(this->getCopyByteStreamName());
137
138        Value* blockStartRem = b->CreateURem(blockStart, inputCapacity);
139        Value* remSize = b->CreateSub(inputCapacity, blockStartRem);
140
141        Value* blockSize = b->CreateSub(blockEnd, blockStart);
142
143        Value* copySize1 = b->CreateUMin(remSize, blockSize);
144        Value* copySize2 = b->CreateSub(blockSize, copySize1);
145
146        Value* temporayInputPtr = b->getScalarField("temporaryInputPtr");
147
148        b->CreateMemCpy(temporayInputPtr, b->CreateGEP(rawInputPtr, blockStartRem), copySize1, 1);
149        b->CreateMemCpy(b->CreateGEP(temporayInputPtr, copySize1), rawInputPtr, copySize2, 1);
150    }
151
152    void LZ4ByteStreamAioKernel::beforeTermination(const std::unique_ptr<KernelBuilder> &b) {
153        b->CreateFree(b->getScalarField("temporaryInputPtr"));
154//        b->CallPrintInt("beforeTermination", b->getSize(0));
155    }
156
157}
Note: See TracBrowser for help on using the repository browser.