source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5926

Last change on this file since 5926 was 5926, checked in by xwa163, 17 months ago

Fix lz4 related GEP instructions and TODO

File size: 30.2 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17namespace kernel{
18    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
19            : MultiBlockKernel("LZ4IndexBuilderKernel",
20            // Inputs
21                               {
22                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
23                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
24                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
25                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
26
27                                       // block data
28                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                                               ConstantStrideLengthOne()},
30                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                                               ConstantStrideLengthOne()},
32                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                                               ConstantStrideLengthOne()}
34
35                               },
36            //Outputs
37                               {
38                                       // Uncompressed_data
39                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                                               BoundedRate(0, 1)},
41                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                                               BoundedRate(0, 1)},
43                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                                               BoundedRate(0, 1)},
45
46                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
47                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
49                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
50                               },
51            //Arguments
52                               {},
53                               {},
54            //Internal states:
55                               {
56                                       Binding{iBuilder->getSizeTy(), "blockDataIndex"},
57                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
58                               }) {
59//        addAttribute(MustExplicitlyTerminate());
60    }
61
62    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
63//        iBuilder->CallPrintInt("entry", iBuilder->getSize(0));
64//        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
65
66
67
68        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
69        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
70
71        this->resetPreviousProducedMap(iBuilder, {"e1Marker", "m0Start", "m0End", "matchOffset"});
72
73        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
74
75        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
76
77//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
78//        iBuilder->CallPrintInt("totalNumber", totalNumber);
79//        iBuilder->setTerminationSignal(iBuilder->CreateICmpEQ(availableBlockEnd, iBuilder->getSize(0)));
80
81        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
82
83        iBuilder->SetInsertPoint(blockEndConBlock);
84        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
85        iBuilder->CallPrintInt("blockEnd", blockEnd);
86
87        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
88//        iBuilder->CallPrintInt("totalExtender", totalExtender);
89
90//        iBuilder->CallPrintInt("processByteStream", iBuilder->getProcessedItemCount("byteStream"));
91//        iBuilder->CallPrintInt("availableByteStream", iBuilder->getAvailableItemCount("byteStream"));
92
93
94//        iBuilder->CallPrintInt("consumedExtender", iBuilder->getConsumedItemCount("extender"));
95//        iBuilder->CallPrintInt("processExtender", iBuilder->getProcessedItemCount("extender"));
96//        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
97//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
98
99
100        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
101
102        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
103//        iBuilder->CallPrintInt("----totalExtender", totalExtender);
104//        iBuilder->CallPrintInt("----blockStart", blockStart);
105//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
106
107        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
108
109        iBuilder->SetInsertPoint(processBlock);
110
111
112        //TODO handle uncompressed block
113        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
114
115
116
117        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
118        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
119        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
120        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
121        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
122
123
124        iBuilder->setProcessedItemCount("byteStream", blockEnd);
125//        iBuilder->setProcessedItemCount("extender", blockEnd);
126//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
127//        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
128
129        iBuilder->CreateBr(exitBlock);
130
131        iBuilder->SetInsertPoint(exitBlock);
132    }
133
134    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
135        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
136
137        Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
138//        iBuilder->CallPrintInt("token", token);
139
140        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
141        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
142
143        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
144
145        iBuilder->SetInsertPoint(extendLiteralLengthBody);
146        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
147        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
148
149        iBuilder->CreateBr(extendLiteralLengthExit);
150
151        iBuilder->SetInsertPoint(extendLiteralLengthExit);
152
153        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
154        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
155        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
156
157        Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
158//        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
159        Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
160        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
161        Value* literalLengthExtendValue = iBuilder->CreateSelect(
162                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
163                iBuilder->CreateAdd(
164                        iBuilder->CreateMul(
165                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
166                                iBuilder->getSize(255)
167                        ),
168                        finalLengthByte
169                ),
170                iBuilder->getSize(0)
171        );
172        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
173        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
174        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
175
176        Value* offsetPos = iBuilder->CreateAdd(
177                iBuilder->CreateAdd(
178                        phiCursorPosAfterLiteral,
179                        literalLength),
180                iBuilder->getSize(1));
181
182        // TODO Clear Output Buffer at the beginning instead of marking 0
183        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
184        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
185        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
186        return offsetPos;
187    }
188
189    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
190        Constant* INT64_ONE = iBuilder->getInt64(1);
191
192        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
193
194        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
195        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
196
197        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
198        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
199
200        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
201
202        iBuilder->SetInsertPoint(extendMatchBodyBlock);
203
204        //ExtendMatchBodyBlock
205        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
206        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
207
208        // ----May be in a different segment now
209        iBuilder->CreateBr(extendMatchExitBlock);
210
211        //ExtendMatchExitBlock
212        iBuilder->SetInsertPoint(extendMatchExitBlock);
213        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
214        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
215        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
216
217        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
218//        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
219//        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
220
221        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
222        Value* matchExtensionSize = iBuilder->CreateSelect(
223                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
224                oldMatchExtensionSize,
225                iBuilder->getSize(0)
226        );
227        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
228        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
229
230
231        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
232        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
233
234        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
235        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
236
237
238        Value* matchLengthAddValue = iBuilder->CreateSelect(
239                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
240                iBuilder->CreateAdd(
241                        iBuilder->CreateMul(
242                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
243                                iBuilder->getSize(255)
244                        ),
245                        extensionLastBitValue
246                )
247                ,
248                iBuilder->getSize(0)
249        );
250        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
251
252        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
253
254        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
255
256        Value* outputEndPos = iBuilder->CreateSub(
257                iBuilder->CreateAdd(outputPos, matchLength),
258                iBuilder->getInt64(1)
259        );
260
261        Value* matchOffset = iBuilder->CreateAdd(
262                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
263                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
264        );
265//        iBuilder->CallPrintInt("matchOffset", matchOffset);
266        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
267//    iBuilder->CallPrintInt("m0Start", outputPos);
268        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
269//    iBuilder->CallPrintInt("m0End", outputEndPos);
270        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
271//    iBuilder->CallPrintInt("matchOffset", matchOffset);
272        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
273        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
274    }
275
276
277    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
278        // Constant
279        Constant* INT64_ONE = iBuilder->getInt64(1);
280
281        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
282        //TODO use memset to clear output buffer
283        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
284
285        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
286        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
287
288        iBuilder->CreateBr(processCon);
289        iBuilder->SetInsertPoint(processCon);
290
291        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
292        phiCursorValue->addIncoming(blockStart, entryBlock);
293
294        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
295
296        // Process Body
297        iBuilder->SetInsertPoint(processBody);
298
299        //TODO add acceleration here
300        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
301//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
302//        iBuilder->CallPrintInt("token", token);
303
304        // Process Literal
305        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
306        iBuilder->CreateBr(processLiteralBlock);
307        iBuilder->SetInsertPoint(processLiteralBlock);
308
309        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
310//        iBuilder->CallPrintInt("offsetPos", offsetPos);
311        // Process Match
312        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
313        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
314
315        iBuilder->CreateCondBr(
316                iBuilder->CreateICmpULT(offsetPos, blockEnd),
317                handleM0BodyBlock,
318                handleM0ElseBlock
319        );
320
321        // HandleM0Body
322        iBuilder->SetInsertPoint(handleM0BodyBlock);
323        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
324//        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
325        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
326
327        iBuilder->CreateBr(processCon);
328
329
330        // HandleM0Else
331        iBuilder->SetInsertPoint(handleM0ElseBlock);
332
333        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
334        // Store final M0 pos to make sure the bit stream will be long enough
335        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
336//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
337        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
338        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
339        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
340
341        iBuilder->CreateBr(processCon);
342
343
344        iBuilder->SetInsertPoint(exitBlock);
345    }
346
347    Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
348        return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
349    }
350
351    Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
352        return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
353    }
354
355    Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
356        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
357        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
358        Constant* SIZE_ZERO = iBuilder->getSize(0);
359        Type* bitBlockType = iBuilder->getBitBlockType();
360        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
361
362        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
363
364        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
365
366        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
367        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
368        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
369
370        iBuilder->CreateBr(advanceConBlock);
371        // TODO special handling for the first advance may have better performance
372        iBuilder->SetInsertPoint(advanceConBlock);
373
374        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
375        phiCurrentPos->addIncoming(startPos, entryBlock);
376        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
377        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
378        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
379
380        iBuilder->SetInsertPoint(advanceBodyBlock);
381
382        Value* currentPosBitBlockIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH), baseInputBlockIndex);
383
384        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
385
386        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
387
388        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
389
390        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
391        if (isNextZero) {
392            currentBitValue = iBuilder->CreateNot(currentBitValue);
393        }
394        Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
395        Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
396        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
397
398        Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
399        Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
400        if (maxPos) {
401            newPos = iBuilder->CreateUMin(maxPos, newPos);
402            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
403            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
404        }
405
406        phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
407        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
408        iBuilder->CreateBr(advanceConBlock);
409
410        iBuilder->SetInsertPoint(advanceExitBlock);
411        return phiCurrentPos;
412    }
413
414    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
415        // Stride Size here is Constant 1 instead of BitBlockWidth
416        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
417        Constant* SIZE_ZERO = iBuilder->getSize(0);
418
419//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
420
421        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->getProcessedItemCount(inputBufferName));
422
423        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
424        Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
425
426        //[64 x <4 x i64>]*
427        Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
428        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
429        //GEP here is safe
430        Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
431        return iBuilder->CreateLoad(valuePtr);
432    }
433
434    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
435        // The external buffer is always linear accessible, so the GEP here is safe
436        Value *blockStartPtr = iBuilder->CreatePointerCast(
437                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
438                iBuilder->getInt8PtrTy()
439        );
440        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
441        return iBuilder->CreateLoad(ptr);
442    }
443
444    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
445        Value *fieldValue = iBuilder->getScalarField(fieldName);
446        fieldValue = iBuilder->CreateAdd(fieldValue, value);
447        iBuilder->setScalarField(fieldName, fieldValue);
448    }
449
450    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
451        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
452    }
453
454    // Assume we have enough output buffer
455    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
456                                                                    const std::string &bitstreamName,
457                                                                    llvm::Value *start, llvm::Value *end, bool isOne,
458                                                                    bool setProduced) {
459        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
460        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
461        Value* SIZE_ONE = iBuilder->getSize(1);
462        Value* SIZE_ZERO = iBuilder->getSize(0);
463        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
464        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
465        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
466        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
467
468        Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
469        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
470
471        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
472        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
473        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
474        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
475
476        Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
477
478        iBuilder->CreateBr(conBlock);
479
480        // Con
481        iBuilder->SetInsertPoint(conBlock);
482
483        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
484        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
485        iBuilder->CreateCondBr(
486                iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
487                bodyBlock,
488                exitBlock
489        );
490
491        // Body
492        iBuilder->SetInsertPoint(bodyBlock);
493
494        Value *outputLowestBitValue = iBuilder->CreateSelect(
495                iBuilder->CreateICmpULE(
496                        iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
497                        start
498                ),
499                iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
500                INT_BIT_BLOCK_ONE
501        );
502
503        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
504                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
505                end
506        );
507        Value *producedItemsCount = iBuilder->CreateSelect(
508                hasNotReachEnd,
509                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
510                end
511        );
512
513
514        Value *outputHighestBitValue = iBuilder->CreateSelect(
515                hasNotReachEnd,
516                INT_BIT_BLOCK_ZERO,
517                iBuilder->CreateShl(
518                        INT_BIT_BLOCK_ONE,
519                        iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
520                )
521        );
522
523
524        Value *bitMask = iBuilder->CreateSub(
525                outputHighestBitValue,
526                outputLowestBitValue
527        );
528
529        if (!isOne) {
530            bitMask = iBuilder->CreateNot(bitMask);
531        }
532
533        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
534        Value *oldValue = iBuilder->CreateLoad(targetPtr);
535        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
536        Value *newValue = NULL;
537        if (isOne) {
538            newValue = iBuilder->CreateOr(oldValue, bitMask);
539        } else {
540            newValue = iBuilder->CreateAnd(oldValue, bitMask);
541        }
542
543        iBuilder->CreateStore(
544                iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
545                targetPtr
546        );
547        if (setProduced) {
548            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
549        }
550
551        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
552        iBuilder->CreateBr(conBlock);
553
554        // Exit
555        iBuilder->SetInsertPoint(exitBlock);
556        return exitBlock;
557    }
558
559
560
561    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
562                                                             const string &outputBufferName, Type *pointerType,
563                                                             Value *value) {
564        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
565        Value* SIZE_ZERO = iBuilder->getSize(0);
566        Value* SIZE_ONE = iBuilder->getSize(1);
567
568        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
569
570        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
571        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
572        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
573
574        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
575
576        // i8, [8 x <4 x i64>]*
577        // i64, [64 x <4 x i64>]*
578        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
579        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
580        // GEP here is safe
581        iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
582
583        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
584    }
585
586
587    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
588                                                            std::vector<std::string> outputList) {
589        previousProducedMap.clear();
590        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
591            previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
592        }
593    }
594}
Note: See TracBrowser for help on using the repository browser.