source: icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bytestream_aio.cpp @ 6132

Last change on this file since 6132 was 6132, checked in by xwa163, 9 months ago
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
File size: 7.2 KB
Line 
1
2#include "lz4_bytestream_aio.h"
3
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15
16namespace kernel{
17    std::string LZ4ByteStreamAioKernel::getCopyByteStreamName() {
18        return mCopyOtherByteStream ? "targetByteStream" : "byteStream";
19    }
20
21    LZ4ByteStreamAioKernel::LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream, unsigned blockSize)
22            : LZ4SequentialAioBaseKernel(b, "LZ4ByteStreamAioKernel", blockSize),
23              mCopyOtherByteStream(copyOtherByteStream) {
24        mStreamSetOutputs.push_back(Binding{b->getStreamSetTy(1, 8), "outputStream", BoundedRate(0, 1)});
25        this->addScalar(b->getInt8PtrTy(), "temporaryInputPtr");
26        if (copyOtherByteStream) {
27            mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 8), "targetByteStream", RateEqualTo("byteStream")});
28        }
29    }
30
31    void LZ4ByteStreamAioKernel::doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
32                                               llvm::Value *literalLength, llvm::Value* blockStart) {
33        unsigned fw = 64;
34        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
35
36        Value* inputBytePtr = b->getScalarField("temporaryInputPtr");
37        inputBytePtr = b->CreateGEP(inputBytePtr, b->CreateSub(literalStart, blockStart));
38
39        Value* inputPtr = b->CreatePointerCast(inputBytePtr, INT_FW_PTR);
40
41        Value* outputPos = b->getScalarField("outputPos");
42        Value* outputBufferSize = b->getCapacity("outputStream");
43        Value* outputPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
44        outputPtr = b->CreatePointerCast(outputPtr, INT_FW_PTR);
45
46        BasicBlock* entryBlock = b->GetInsertBlock();
47        BasicBlock* literalCopyCon = b->CreateBasicBlock("literalCopyCon");
48        BasicBlock* literalCopyBody = b->CreateBasicBlock("literalCopyBody");
49        BasicBlock* literalCopyExit = b->CreateBasicBlock("literalCopyExit");
50
51        b->CreateBr(literalCopyCon);
52
53        // ---- literalCopyCon
54        b->SetInsertPoint(literalCopyCon);
55        PHINode* phiOutputPtr = b->CreatePHI(outputPtr->getType(), 2);
56        phiOutputPtr->addIncoming(outputPtr, entryBlock);
57        PHINode* phiInputPtr = b->CreatePHI(inputPtr->getType(), 2);
58        phiInputPtr->addIncoming(inputPtr, entryBlock);
59        PHINode* phiCopiedLength = b->CreatePHI(literalLength->getType(), 2);
60        phiCopiedLength->addIncoming(b->getSize(0), entryBlock);
61        b->CreateCondBr(b->CreateICmpULT(phiCopiedLength, literalLength), literalCopyBody, literalCopyExit);
62
63        // ---- literalCopyBody
64        b->SetInsertPoint(literalCopyBody);
65        // Always copy fw bits to improve performance
66        b->CreateStore(b->CreateLoad(phiInputPtr), phiOutputPtr);
67
68        phiInputPtr->addIncoming(b->CreateGEP(phiInputPtr, b->getSize(1)), b->GetInsertBlock());
69        phiOutputPtr->addIncoming(b->CreateGEP(phiOutputPtr, b->getSize(1)), b->GetInsertBlock());
70        phiCopiedLength->addIncoming(b->CreateAdd(phiCopiedLength, b->getSize(fw / 8)), b->GetInsertBlock());
71        b->CreateBr(literalCopyCon);
72
73        // ---- literalCopyExit
74        b->SetInsertPoint(literalCopyExit);
75        b->setScalarField("outputPos", b->CreateAdd(outputPos, literalLength));
76    }
77
78    void LZ4ByteStreamAioKernel::doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
79                                             llvm::Value *matchLength) {
80        unsigned fw = 64;
81        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
82
83        BasicBlock* entryBlock = b->GetInsertBlock();
84
85        Value* outputPos = b->getScalarField("outputPos");
86        Value* outputBufferSize = b->getCapacity("outputStream");
87
88        Value* copyToPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
89        Value* copyFromPtr = b->getRawOutputPointer("outputStream", b->CreateURem(b->CreateSub(outputPos, matchOffset), outputBufferSize));
90
91        BasicBlock* matchCopyCon = b->CreateBasicBlock("matchCopyCon");
92        BasicBlock* matchCopyBody = b->CreateBasicBlock("matchCopyBody");
93        BasicBlock* matchCopyExit = b->CreateBasicBlock("matchCopyExit");
94
95        b->CreateBr(matchCopyCon);
96
97        // ---- matchCopyCon
98        b->SetInsertPoint(matchCopyCon);
99        PHINode* phiFromPtr = b->CreatePHI(b->getInt8PtrTy(), 2);
100        phiFromPtr->addIncoming(copyFromPtr, entryBlock);
101        PHINode* phiToPtr = b->CreatePHI(b->getInt8PtrTy(), 2);
102        phiToPtr->addIncoming(copyToPtr, entryBlock);
103        PHINode* phiCopiedSize = b->CreatePHI(b->getSizeTy(), 2);
104        phiCopiedSize->addIncoming(b->getSize(0), entryBlock);
105
106        b->CreateCondBr(b->CreateICmpULT(phiCopiedSize, matchLength), matchCopyBody, matchCopyExit);
107
108        // ---- matchCopyBody
109        b->SetInsertPoint(matchCopyBody);
110        b->CreateStore(
111                b->CreateLoad(b->CreatePointerCast(phiFromPtr, INT_FW_PTR)),
112        b->CreatePointerCast(phiToPtr, INT_FW_PTR)
113        );
114
115        Value* copySize = b->CreateUMin(matchOffset, b->getSize(fw / 8));
116        phiFromPtr->addIncoming(b->CreateGEP(phiFromPtr, copySize), b->GetInsertBlock());
117        phiToPtr->addIncoming(b->CreateGEP(phiToPtr, copySize), b->GetInsertBlock());
118        phiCopiedSize->addIncoming(b->CreateAdd(phiCopiedSize, copySize), b->GetInsertBlock());
119        b->CreateBr(matchCopyCon);
120
121        // ---- matchCopyExit
122        b->SetInsertPoint(matchCopyExit);
123        b->setScalarField("outputPos", b->CreateAdd(outputPos, matchLength));
124    }
125
126    void LZ4ByteStreamAioKernel::setProducedOutputItemCount(const std::unique_ptr<KernelBuilder> &b, llvm::Value* produced) {
127        b->setProducedItemCount("outputStream", produced);
128    }
129
130    void LZ4ByteStreamAioKernel::initializationMethod(const std::unique_ptr<KernelBuilder> &b) {
131        b->setScalarField("temporaryInputPtr", b->CreateMalloc(b->getSize(mBlockSize)));
132    }
133
134    void LZ4ByteStreamAioKernel::prepareProcessBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockStart, llvm::Value* blockEnd) {
135        Value* rawInputPtr = b->CreatePointerCast(b->getRawInputPointer(this->getCopyByteStreamName(), b->getSize(0)), b->getInt8PtrTy());
136        Value* inputCapacity = b->getCapacity(this->getCopyByteStreamName());
137
138        Value* blockStartRem = b->CreateURem(blockStart, inputCapacity);
139        Value* remSize = b->CreateSub(inputCapacity, blockStartRem);
140
141        Value* blockSize = b->CreateSub(blockEnd, blockStart);
142
143        Value* copySize1 = b->CreateUMin(remSize, blockSize);
144        Value* copySize2 = b->CreateSub(blockSize, copySize1);
145
146        Value* temporayInputPtr = b->getScalarField("temporaryInputPtr");
147
148        b->CreateMemCpy(temporayInputPtr, b->CreateGEP(rawInputPtr, blockStartRem), copySize1, 1);
149        b->CreateMemCpy(b->CreateGEP(temporayInputPtr, copySize1), rawInputPtr, copySize2, 1);
150    }
151
152    void LZ4ByteStreamAioKernel::beforeTermination(const std::unique_ptr<KernelBuilder> &b) {
153        b->CreateFree(b->getScalarField("temporaryInputPtr"));
154//        b->CallPrintInt("beforeTermination", b->getSize(0));
155    }
156
157}
Note: See TracBrowser for help on using the repository browser.