source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5921

Last change on this file since 5921 was 5921, checked in by xwa163, 13 months ago
  1. Initial checkin for new approach for lz4 index decoder that always use 4MB buffer
  2. Add test case for new approach (for now test cases will fail when test file is larger than 4MB)
File size: 29.9 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17namespace kernel{
18    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
19            : MultiBlockKernel("LZ4IndexBuilderKernel",
20            // Inputs
21                               {
22                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
23                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
24                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
25                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
26
27                                       // block data
28                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                                               ConstantStrideLengthOne()},
30                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                                               ConstantStrideLengthOne()},
32                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                                               ConstantStrideLengthOne()}
34
35                               },
36            //Outputs
37                               {
38                                       // Uncompressed_data
39                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                                               BoundedRate(0, 1)},
41                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                                               BoundedRate(0, 1)},
43                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                                               BoundedRate(0, 1)},
45
46                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
47                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
49                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
50                               },
51            //Arguments
52                               {},
53                               {},
54            //Internal states:
55                               {
56                                       Binding{iBuilder->getSizeTy(), "blockDataIndex"},
57                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
58                               }) {
59//        addAttribute(MustExplicitlyTerminate());
60    }
61
62    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
63//        iBuilder->CallPrintInt("entry", iBuilder->getSize(0));
64//        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
65
66        // Clear Output Buffer
67        previousE1Produced = iBuilder->getProducedItemCount("e1Marker");
68
69        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
70        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
71        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
72
73        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
74
75//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
76//        iBuilder->CallPrintInt("totalNumber", totalNumber);
77//        iBuilder->setTerminationSignal(iBuilder->CreateICmpEQ(availableBlockEnd, iBuilder->getSize(0)));
78
79        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
80
81        iBuilder->SetInsertPoint(blockEndConBlock);
82        Value* blockEnd = this->generateLoadCircularInput(iBuilder, "blockEnd", blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
83
84
85        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
86//        iBuilder->CallPrintInt("totalExtender", totalExtender);
87
88//        iBuilder->CallPrintInt("processByteStream", iBuilder->getProcessedItemCount("byteStream"));
89//        iBuilder->CallPrintInt("availableByteStream", iBuilder->getAvailableItemCount("byteStream"));
90
91
92//        iBuilder->CallPrintInt("consumedExtender", iBuilder->getConsumedItemCount("extender"));
93//        iBuilder->CallPrintInt("processExtender", iBuilder->getProcessedItemCount("extender"));
94//        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
95//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
96//        iBuilder->CallPrintInt("blockEnd", blockEnd);
97
98        Value* blockStart = this->generateLoadCircularInput(iBuilder, "blockStart", blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
99
100        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
101//        iBuilder->CallPrintInt("----totalExtender", totalExtender);
102//        iBuilder->CallPrintInt("----blockStart", blockStart);
103//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
104
105        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
106
107        iBuilder->SetInsertPoint(processBlock);
108
109
110        //TODO handle uncompressed block
111        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
112
113
114
115        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
116        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
117        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
118        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
119        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
120
121
122        iBuilder->setProcessedItemCount("byteStream", blockEnd);
123//        iBuilder->setProcessedItemCount("extender", blockEnd);
124//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
125//        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
126
127        iBuilder->CreateBr(exitBlock);
128
129        iBuilder->SetInsertPoint(exitBlock);
130    }
131
132    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
133        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
134
135        Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
136//        iBuilder->CallPrintInt("token", token);
137
138        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
139        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
140
141        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
142
143        iBuilder->SetInsertPoint(extendLiteralLengthBody);
144        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
145        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
146
147        iBuilder->CreateBr(extendLiteralLengthExit);
148
149        iBuilder->SetInsertPoint(extendLiteralLengthExit);
150
151        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
152        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
153        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
154
155        Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
156//        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
157        Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
158        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
159        Value* literalLengthExtendValue = iBuilder->CreateSelect(
160                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
161                iBuilder->CreateAdd(
162                        iBuilder->CreateMul(
163                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
164                                iBuilder->getSize(255)
165                        ),
166                        finalLengthByte
167                ),
168                iBuilder->getSize(0)
169        );
170        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
171        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
172        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
173
174        Value* offsetPos = iBuilder->CreateAdd(
175                iBuilder->CreateAdd(
176                        phiCursorPosAfterLiteral,
177                        literalLength),
178                iBuilder->getSize(1));
179//        iBuilder->CallPrintInt("offsetPos", offsetPos);
180        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
181        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
182        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
183        return offsetPos;
184    }
185
186    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
187        Constant* INT64_ONE = iBuilder->getInt64(1);
188
189        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
190
191        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
192        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
193
194        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
195        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
196
197        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
198
199        iBuilder->SetInsertPoint(extendMatchBodyBlock);
200
201        //ExtendMatchBodyBlock
202        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
203        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
204
205        // ----May be in a different segment now
206        iBuilder->CreateBr(extendMatchExitBlock);
207
208        //ExtendMatchExitBlock
209        iBuilder->SetInsertPoint(extendMatchExitBlock);
210        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
211        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
212        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
213
214        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
215//        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
216//        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
217
218        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
219        Value* matchExtensionSize = iBuilder->CreateSelect(
220                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
221                oldMatchExtensionSize,
222                iBuilder->getSize(0)
223        );
224        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
225        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
226
227
228        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
229        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
230
231        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
232        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
233
234
235        Value* matchLengthAddValue = iBuilder->CreateSelect(
236                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
237                iBuilder->CreateAdd(
238                        iBuilder->CreateMul(
239                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
240                                iBuilder->getSize(255)
241                        ),
242                        extensionLastBitValue
243                )
244                ,
245                iBuilder->getSize(0)
246        );
247        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
248
249        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
250
251        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
252
253        Value* outputEndPos = iBuilder->CreateSub(
254                iBuilder->CreateAdd(outputPos, matchLength),
255                iBuilder->getInt64(1)
256        );
257
258        Value* matchOffset = iBuilder->CreateAdd(
259                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
260                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
261        );
262//        iBuilder->CallPrintInt("matchOffset", matchOffset);
263        this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
264//    iBuilder->CallPrintInt("m0Start", outputPos);
265        this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
266//    iBuilder->CallPrintInt("m0End", outputEndPos);
267        this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
268//    iBuilder->CallPrintInt("matchOffset", matchOffset);
269        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
270        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
271    }
272
273
274    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
275        // Constant
276        Constant* INT64_ONE = iBuilder->getInt64(1);
277
278        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
279        //TODO use memset to clear output buffer
280        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
281
282        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
283        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
284
285        iBuilder->CreateBr(processCon);
286        iBuilder->SetInsertPoint(processCon);
287
288        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
289        phiCursorValue->addIncoming(blockStart, entryBlock);
290
291        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
292
293        // Process Body
294        iBuilder->SetInsertPoint(processBody);
295
296        //TODO add acceleration here
297        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
298//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
299//        iBuilder->CallPrintInt("token", token);
300
301        // Process Literal
302        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
303        iBuilder->CreateBr(processLiteralBlock);
304        iBuilder->SetInsertPoint(processLiteralBlock);
305
306        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
307//        iBuilder->CallPrintInt("offsetPos", offsetPos);
308        // Process Match
309        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
310        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
311
312        iBuilder->CreateCondBr(
313                iBuilder->CreateICmpULT(offsetPos, blockEnd),
314                handleM0BodyBlock,
315                handleM0ElseBlock
316        );
317
318        // HandleM0Body
319        iBuilder->SetInsertPoint(handleM0BodyBlock);
320        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
321//        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
322        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
323
324        iBuilder->CreateBr(processCon);
325
326
327        // HandleM0Else
328        iBuilder->SetInsertPoint(handleM0ElseBlock);
329
330        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
331        // Store final M0 pos to make sure the bit stream will be long enough
332        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
333//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
334        this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
335        this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
336        this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
337
338        iBuilder->CreateBr(processCon);
339
340
341        iBuilder->SetInsertPoint(exitBlock);
342    }
343
344    Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
345        return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
346    }
347
348    Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
349        return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
350    }
351
352    Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
353        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
354        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
355        Constant* SIZE_ZERO = iBuilder->getSize(0);
356        Type* bitBlockType = iBuilder->getBitBlockType();
357        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
358
359
360        Value* baseOffset = iBuilder->getProcessedItemCount(inputName);
361        baseOffset = iBuilder->CreateSub(baseOffset, iBuilder->CreateURem(baseOffset, INT64_BIT_BLOCK_WIDTH));
362
363        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
364
365
366        Value* inputBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
367
368        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
369        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
370        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
371
372        iBuilder->CreateBr(advanceConBlock);
373        // TODO special handling for the first advance may have better performance
374        iBuilder->SetInsertPoint(advanceConBlock);
375
376        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
377        phiCurrentPos->addIncoming(startPos, entryBlock);
378        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
379        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
380        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
381
382        iBuilder->SetInsertPoint(advanceBodyBlock);
383
384
385//        iBuilder->CallPrintInt("phiCurrentPos", phiCurrentPos);
386//        iBuilder->CallPrintInt("baseOffset", baseOffset);
387        Value* currentPosBitBlockIndex = iBuilder->CreateUDiv(iBuilder->CreateSub(phiCurrentPos, baseOffset), INT64_BIT_BLOCK_WIDTH);
388//        iBuilder->CallPrintInt("currentPosBitBlockIndex", currentPosBitBlockIndex);
389        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
390
391        Value* ptr = iBuilder->CreateGEP(inputBasePtr, iBuilder->CreateTruncOrBitCast(currentPosBitBlockIndex, iBuilder->getSizeTy()));
392//        iBuilder->CallPrintInt("ptr", ptr);
393//        iBuilder->CallPrintInt("blockBasePtr", iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO));
394        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
395//        iBuilder->CallPrintRegister("ptrValue", currentBitValue);
396
397        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
398        if (isNextZero) {
399            currentBitValue = iBuilder->CreateNot(currentBitValue);
400        }
401        Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
402        Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
403        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
404
405        Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
406        Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
407        if (maxPos) {
408            newPos = iBuilder->CreateUMin(maxPos, newPos);
409            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
410            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
411        }
412
413        phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
414        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
415        iBuilder->CreateBr(advanceConBlock);
416
417        iBuilder->SetInsertPoint(advanceExitBlock);
418        return phiCurrentPos;
419    }
420
421    Value *
422    LZ4IndexBuilderKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
423                                                Value *offset, Type *pointerType) {
424        size_t inputSize = this->getInputStreamSetBuffer(inputBufferName)->getBufferBlocks() * iBuilder->getStride();
425        Value *offsetMask = iBuilder->getSize(inputSize - 1);
426        Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
427
428        Value *inputBufferPtr = iBuilder->getRawInputPointer(inputBufferName, iBuilder->getSize(0));
429
430        inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
431        return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
432    }
433
434    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
435        Value *blockStartPtr = iBuilder->CreatePointerCast(
436                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
437                iBuilder->getInt8PtrTy()
438        );
439        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
440
441        return iBuilder->CreateLoad(ptr);
442    }
443
444    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
445        Value *fieldValue = iBuilder->getScalarField(fieldName);
446        fieldValue = iBuilder->CreateAdd(fieldValue, value);
447        iBuilder->setScalarField(fieldName, fieldValue);
448    }
449
450    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
451        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
452    }
453
454    // Assume we have enough output buffer
455    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
456                                                                    const std::string &bitstreamName,
457                                                                    llvm::Value *start, llvm::Value *end, bool isOne,
458                                                                    bool setProduced) {
459        Value* originalEnd = end;
460        Value* baseOffset = iBuilder->CreateSub(previousE1Produced, iBuilder->CreateURem(previousE1Produced, iBuilder->getInt64(iBuilder->getBitBlockWidth())));;
461//        iBuilder->CallPrintInt("baseOffset", baseOffset);
462//        iBuilder->CallPrintInt("start", start);
463//        iBuilder->CallPrintInt("end", end);
464        start = iBuilder->CreateSub(start, baseOffset);
465        end = iBuilder->CreateSub(end, baseOffset);
466        //TODO possible bug here
467        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
468
469
470
471        Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, iBuilder->getSize(0));
472//        iBuilder->CallPrintInt("outputBasePtr", outputBasePtr);
473//        iBuilder->CallPrintInt("a", iBuilder->getRawOutputPointer(bitstreamName, iBuilder->getSize(0)));
474
475        outputBasePtr = iBuilder->CreatePointerCast(outputBasePtr, iBuilder->getInt64Ty()->getPointerTo());
476
477//        size_t outputBufferSize = this->getOutputBufferSize(iBuilder, bitstreamName);
478//        Value *outputMask = iBuilder->getSize(outputBufferSize / 64 - 1);
479
480        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
481        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
482        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
483
484        Value *startOffset = iBuilder->CreateLShr(start, iBuilder->getSize(std::log2(64)), "startOffset");
485
486        iBuilder->CreateBr(conBlock);
487
488        // Con
489        iBuilder->SetInsertPoint(conBlock);
490
491
492        PHINode *curOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
493        curOffset->addIncoming(startOffset, entryBlock);
494//        iBuilder->CallPrintInt("curOffset", curOffset);
495//        iBuilder->CallPrintInt("end", end);
496
497        iBuilder->CreateCondBr(
498                iBuilder->CreateICmpULT(iBuilder->CreateShl(curOffset, std::log2(64)), end),
499                bodyBlock,
500                exitBlock
501        );
502
503        // Body
504        iBuilder->SetInsertPoint(bodyBlock);
505        Value *maskedOffset = curOffset;
506
507        Value *outputLowestBitValue = iBuilder->CreateSelect(
508                iBuilder->CreateICmpULE(
509                        iBuilder->CreateShl(curOffset, std::log2(64)),
510                        start
511                ),
512                iBuilder->CreateShl(iBuilder->getSize(1), iBuilder->CreateAnd(start, iBuilder->getSize(64 - 1))),
513                iBuilder->getSize(1)
514        );
515
516        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
517                iBuilder->CreateShl(iBuilder->CreateAdd(curOffset, iBuilder->getSize(1)), std::log2(64)),
518                end
519        );
520        Value *producedItemsCount = iBuilder->CreateSelect(
521                hasNotReachEnd,
522                iBuilder->CreateShl(iBuilder->CreateAdd(curOffset, iBuilder->getSize(1)), std::log2(64)),
523                end
524        );
525        producedItemsCount = iBuilder->CreateAdd(producedItemsCount, baseOffset);
526
527        Value *outputHighestBitValue = iBuilder->CreateSelect(
528                hasNotReachEnd,
529                iBuilder->getSize(0),
530                iBuilder->CreateShl(
531                        iBuilder->getSize(1),
532                        iBuilder->CreateAnd(end, iBuilder->getSize(64 - 1))
533                )
534        );
535
536
537        Value *bitMask = iBuilder->CreateSub(
538                outputHighestBitValue,
539                outputLowestBitValue
540        );
541
542        if (!isOne) {
543            bitMask = iBuilder->CreateNot(bitMask);
544        }
545
546        Value *targetPtr = iBuilder->CreateGEP(outputBasePtr, maskedOffset);
547//        iBuilder->CallPrintInt("maskedOffset", maskedOffset);
548        Value *oldValue = iBuilder->CreateLoad(targetPtr);
549        Value *newValue = NULL;
550        if (isOne) {
551            newValue = iBuilder->CreateOr(oldValue, bitMask);
552        } else {
553            newValue = iBuilder->CreateAnd(oldValue, bitMask);
554        }
555        iBuilder->CreateStore(
556                newValue,
557                targetPtr
558        );
559//        iBuilder->CallPrintInt("targetPtr", targetPtr);
560        if (setProduced) {
561            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
562        }
563
564        curOffset->addIncoming(iBuilder->CreateAdd(curOffset, iBuilder->getSize(1)), bodyBlock);
565        iBuilder->CreateBr(conBlock);
566
567        // Exit
568        iBuilder->SetInsertPoint(exitBlock);
569        return exitBlock;
570    }
571
572
573    void
574    LZ4IndexBuilderKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName,
575                                                  Type *pointerType, Value *value) {
576        //TODO possible bug here
577        Value *offset = iBuilder->getProducedItemCount(outputBufferName);
578
579        size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
580        Value *offsetMask = iBuilder->getSize(inputSize - 1);
581        Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
582
583        Value *outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
584
585        outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
586        iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
587
588        offset = iBuilder->CreateAdd(offset, iBuilder->getSize(1));
589        iBuilder->setProducedItemCount(outputBufferName, offset);
590    }
591}
Note: See TracBrowser for help on using the repository browser.