source: icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bytestream_aio.cpp @ 6111

Last change on this file since 6111 was 6111, checked in by xwa163, 10 months ago
  1. Cleanup LZ4 AIO related kernels
  2. Improve LZ4ParallelByteStreamAIOKernel
  3. Implement simd_cttz
File size: 5.4 KB
Line 
1
2#include "lz4_bytestream_aio.h"
3
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15
16namespace kernel{
17
18    LZ4ByteStreamAioKernel::LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, unsigned blockSize)
19            : LZ4SequentialAioBaseKernel(b, "LZ4ByteStreamAioKernel", blockSize) {
20        mStreamSetOutputs.push_back(Binding{b->getStreamSetTy(1, 8), "outputStream", BoundedRate(0, 1)});
21    }
22
23    void LZ4ByteStreamAioKernel::doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
24                                               llvm::Value *literalLength) {
25        unsigned fw = 64;
26        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
27
28        Value* inputBytePtr = b->getRawInputPointer("byteStream", literalStart);
29        Value* inputPtr = b->CreatePointerCast(inputBytePtr, INT_FW_PTR);
30
31        Value* outputPos = b->getScalarField("outputPos");
32        Value* outputBufferSize = b->getCapacity("outputStream");
33        Value* outputPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
34        outputPtr = b->CreatePointerCast(outputPtr, INT_FW_PTR);
35
36        // We can always assume that we have enough output buffer based on our output buffer allocation strategy (except in extract only case)
37
38        BasicBlock* entryBlock = b->GetInsertBlock();
39        BasicBlock* literalCopyCon = b->CreateBasicBlock("literalCopyCon");
40        BasicBlock* literalCopyBody = b->CreateBasicBlock("literalCopyBody");
41        BasicBlock* literalCopyExit = b->CreateBasicBlock("literalCopyExit");
42
43        b->CreateBr(literalCopyCon);
44
45        // ---- literalCopyCon
46        b->SetInsertPoint(literalCopyCon);
47        PHINode* phiOutputPtr = b->CreatePHI(outputPtr->getType(), 2);
48        phiOutputPtr->addIncoming(outputPtr, entryBlock);
49        PHINode* phiInputPtr = b->CreatePHI(inputPtr->getType(), 2);
50        phiInputPtr->addIncoming(inputPtr, entryBlock);
51        PHINode* phiCopiedLength = b->CreatePHI(literalLength->getType(), 2);
52        phiCopiedLength->addIncoming(b->getSize(0), entryBlock);
53        b->CreateCondBr(b->CreateICmpULT(phiCopiedLength, literalLength), literalCopyBody, literalCopyExit);
54
55        // ---- literalCopyBody
56        b->SetInsertPoint(literalCopyBody);
57        // Always copy fw bits to improve performance
58        b->CreateStore(b->CreateLoad(phiInputPtr), phiOutputPtr);
59
60        phiInputPtr->addIncoming(b->CreateGEP(phiInputPtr, b->getSize(1)), b->GetInsertBlock());
61        phiOutputPtr->addIncoming(b->CreateGEP(phiOutputPtr, b->getSize(1)), b->GetInsertBlock());
62        phiCopiedLength->addIncoming(b->CreateAdd(phiCopiedLength, b->getSize(fw / 8)), b->GetInsertBlock());
63        b->CreateBr(literalCopyCon);
64
65        // ---- literalCopyExit
66        b->SetInsertPoint(literalCopyExit);
67        b->setScalarField("outputPos", b->CreateAdd(outputPos, literalLength));
68    }
69
70    void LZ4ByteStreamAioKernel::doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
71                                             llvm::Value *matchLength) {
72        unsigned fw = 64;
73        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
74
75        BasicBlock* entryBlock = b->GetInsertBlock();
76
77        Value* outputPos = b->getScalarField("outputPos");
78        Value* outputBufferSize = b->getCapacity("outputStream");
79
80        Value* copyToPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
81        Value* copyFromPtr = b->getRawOutputPointer("outputStream", b->CreateURem(b->CreateSub(outputPos, matchOffset), outputBufferSize));
82
83        BasicBlock* matchCopyCon = b->CreateBasicBlock("matchCopyCon");
84        BasicBlock* matchCopyBody = b->CreateBasicBlock("matchCopyBody");
85        BasicBlock* matchCopyExit = b->CreateBasicBlock("matchCopyExit");
86
87        b->CreateBr(matchCopyCon);
88
89        // ---- matchCopyCon
90        b->SetInsertPoint(matchCopyCon);
91        PHINode* phiFromPtr = b->CreatePHI(b->getInt8PtrTy(), 2);
92        phiFromPtr->addIncoming(copyFromPtr, entryBlock);
93        PHINode* phiToPtr = b->CreatePHI(b->getInt8PtrTy(), 2);
94        phiToPtr->addIncoming(copyToPtr, entryBlock);
95        PHINode* phiCopiedSize = b->CreatePHI(b->getSizeTy(), 2);
96        phiCopiedSize->addIncoming(b->getSize(0), entryBlock);
97
98        b->CreateCondBr(b->CreateICmpULT(phiCopiedSize, matchLength), matchCopyBody, matchCopyExit);
99
100        // ---- matchCopyBody
101        b->SetInsertPoint(matchCopyBody);
102        b->CreateStore(
103                b->CreateLoad(b->CreatePointerCast(phiFromPtr, INT_FW_PTR)),
104        b->CreatePointerCast(phiToPtr, INT_FW_PTR)
105        );
106
107        Value* copySize = b->CreateUMin(matchOffset, b->getSize(fw / 8));
108        phiFromPtr->addIncoming(b->CreateGEP(phiFromPtr, copySize), b->GetInsertBlock());
109        phiToPtr->addIncoming(b->CreateGEP(phiToPtr, copySize), b->GetInsertBlock());
110        phiCopiedSize->addIncoming(b->CreateAdd(phiCopiedSize, copySize), b->GetInsertBlock());
111        b->CreateBr(matchCopyCon);
112
113        // ---- matchCopyExit
114        b->SetInsertPoint(matchCopyExit);
115        b->setScalarField("outputPos", b->CreateAdd(outputPos, matchLength));
116    }
117
118    void LZ4ByteStreamAioKernel::setProducedOutputItemCount(const std::unique_ptr<KernelBuilder> &b, llvm::Value* produced) {
119        b->setProducedItemCount("outputStream", produced);
120    }
121
122}
Note: See TracBrowser for help on using the repository browser.