source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5948

Last change on this file since 5948 was 5948, checked in by xwa163, 12 months ago
  1. Remove legacy kernels and codes for lz4
  2. Remove old approach for lz4 decoder
  3. Fixed some bugs of lz4 decoder new approach in large file by adding workaround attribute
  4. Add related test cases
File size: 32.2 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17
18namespace kernel{
19    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
20            : MultiBlockKernel("LZ4IndexBuilderKernel",
21            // Inputs
22                               {
23                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
24                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream"), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
25//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
26//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
27
28                                       // block data
29                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
30                                               AlwaysConsume()},
31                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
32                                               AlwaysConsume()},
33                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
34                                               AlwaysConsume()}
35
36                               },
37            //Outputs
38                               {
39                                       // Uncompressed_data
40                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
41                                               BoundedRate(0, 1)},
42                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
43                                               BoundedRate(0, 1)},
44                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
45                                               BoundedRate(0, 1)},
46
47                                       Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableSufficientChecking()}},
48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()}, //TODO disable temporary buffer for all output streams
49                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
50                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
51                                       Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableTemporaryBuffer()}}
52                               },
53            //Arguments
54                               {
55                                       Binding{iBuilder->getSizeTy(), "fileSize"}
56                               },
57                               {},
58            //Internal states:
59                               {
60                                       Binding{iBuilder->getSizeTy(), "blockDataIndex"},
61                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
62                               }) {
63        this->setStride(4 * 1024 * 1024);
64        addAttribute(MustExplicitlyTerminate());
65    }
66
67    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
68        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
69        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
70
71        this->resetPreviousProducedMap(iBuilder, {"deletionMarker", "m0Start", "m0End", "matchOffset", "M0Marker"});
72
73        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
74
75        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
76        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
77
78        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
79
80        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
81
82        iBuilder->SetInsertPoint(blockEndConBlock);
83
84
85        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
86
87        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
88//        iBuilder->CallPrintInt("----totalExtender", totalExtender);
89//        iBuilder->CallPrintInt("----blockStart", blockStart);
90//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
91
92        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
93//        iBuilder->CreateBr(processBlock);
94
95        iBuilder->SetInsertPoint(processBlock);
96
97        //TODO handle uncompressed block
98
99        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
100
101        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
102        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
103        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
104        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
105        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
106
107
108        iBuilder->setProcessedItemCount("byteStream", blockEnd);
109
110
111//        iBuilder->setProcessedItemCount("extender", blockEnd);
112//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
113//        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
114
115        iBuilder->CreateBr(exitBlock);
116
117        iBuilder->SetInsertPoint(exitBlock);
118    }
119
120    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
121        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
122
123        Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
124//        iBuilder->CallPrintInt("token", token);
125
126        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
127        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
128
129        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
130
131        iBuilder->SetInsertPoint(extendLiteralLengthBody);
132        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
133        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
134
135        iBuilder->CreateBr(extendLiteralLengthExit);
136
137        iBuilder->SetInsertPoint(extendLiteralLengthExit);
138
139        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
140        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
141        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
142
143        Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
144//        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
145        Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
146        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
147        Value* literalLengthExtendValue = iBuilder->CreateSelect(
148                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
149                iBuilder->CreateAdd(
150                        iBuilder->CreateMul(
151                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
152                                iBuilder->getSize(255)
153                        ),
154                        finalLengthByte
155                ),
156                iBuilder->getSize(0)
157        );
158        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
159        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
160        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
161
162        Value* offsetPos = iBuilder->CreateAdd(
163                iBuilder->CreateAdd(
164                        phiCursorPosAfterLiteral,
165                        literalLength),
166                iBuilder->getSize(1));
167
168        // TODO Clear Output Buffer at the beginning instead of marking 0
169        this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), true);
170//        iBuilder->CallPrintInt("markStart", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
171//        iBuilder->CallPrintInt("phiCursorPosAfterLiteral", phiCursorPosAfterLiteral);
172        this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, false);
173        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
174        return offsetPos;
175    }
176
177    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
178        Constant* INT64_ONE = iBuilder->getInt64(1);
179
180        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
181
182        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
183        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
184
185        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
186        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
187
188        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
189
190        iBuilder->SetInsertPoint(extendMatchBodyBlock);
191
192        //ExtendMatchBodyBlock
193        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
194        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
195
196        // ----May be in a different segment now
197        iBuilder->CreateBr(extendMatchExitBlock);
198
199        //ExtendMatchExitBlock
200        iBuilder->SetInsertPoint(extendMatchExitBlock);
201        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
202        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
203        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
204
205        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
206//        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
207//        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
208
209        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
210        Value* matchExtensionSize = iBuilder->CreateSelect(
211                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
212                oldMatchExtensionSize,
213                iBuilder->getSize(0)
214        );
215        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
216        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
217
218
219        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
220        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
221
222        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
223        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
224
225
226        Value* matchLengthAddValue = iBuilder->CreateSelect(
227                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
228                iBuilder->CreateAdd(
229                        iBuilder->CreateMul(
230                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
231                                iBuilder->getSize(255)
232                        ),
233                        extensionLastBitValue
234                )
235                ,
236                iBuilder->getSize(0)
237        );
238        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
239
240        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
241
242        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
243
244        Value* outputEndPos = iBuilder->CreateSub(
245                iBuilder->CreateAdd(outputPos, matchLength),
246                iBuilder->getInt64(1)
247        );
248
249        Value* matchOffset = iBuilder->CreateAdd(
250                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
251                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
252        );
253        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
254        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
255        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
256        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
257        this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false);
258
259        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
260    }
261
262
263    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
264        // Constant
265        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
266
267        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
268        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
269
270
271        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
272        iBuilder->setTerminationSignal(isTerminal);
273
274        //TODO use memset to clear output buffer for extract marker
275
276        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
277
278        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
279        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
280
281        iBuilder->CreateBr(processCon);
282        iBuilder->SetInsertPoint(processCon);
283
284        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
285        phiCursorValue->addIncoming(blockStart, entryBlock);
286
287        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
288
289        // Process Body
290        iBuilder->SetInsertPoint(processBody);
291
292        //TODO add acceleration here
293        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
294
295//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
296//        iBuilder->CallPrintInt("token", token);
297
298        // Process Literal
299        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
300        iBuilder->CreateBr(processLiteralBlock);
301        iBuilder->SetInsertPoint(processLiteralBlock);
302
303        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
304//        iBuilder->CallPrintInt("offsetPos", offsetPos);
305        // Process Match
306        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
307        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
308
309        iBuilder->CreateCondBr(
310                iBuilder->CreateICmpULT(offsetPos, blockEnd),
311                handleM0BodyBlock,
312                handleM0ElseBlock
313        );
314
315        // HandleM0Body
316        iBuilder->SetInsertPoint(handleM0BodyBlock);
317        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
318//        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
319        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
320
321        iBuilder->CreateBr(processCon);
322
323
324        // HandleM0Else
325        iBuilder->SetInsertPoint(handleM0ElseBlock);
326
327        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
328        // Store final M0 pos to make sure the bit stream will be long enough
329        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
330//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
331        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
332        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
333        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
334        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
335        // finalM0OutputPos should always be 4MB * n except for the final block
336
337        iBuilder->CreateBr(processCon);
338
339
340        iBuilder->SetInsertPoint(exitBlock);
341    }
342
343    Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
344        return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
345    }
346
347    Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
348        return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
349    }
350
351    Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
352        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
353        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
354        Constant* SIZE_ZERO = iBuilder->getSize(0);
355        Type* bitBlockType = iBuilder->getBitBlockType();
356        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
357
358        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
359
360        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
361
362        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
363        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
364        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
365
366        iBuilder->CreateBr(advanceConBlock);
367        // TODO special handling for the first advance may have better performance
368        iBuilder->SetInsertPoint(advanceConBlock);
369
370        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
371        phiCurrentPos->addIncoming(startPos, entryBlock);
372        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
373        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
374        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
375
376        iBuilder->SetInsertPoint(advanceBodyBlock);
377
378
379        Value* currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
380        Value* currentPosBitBlockIndex = iBuilder->CreateSub(currentBlockGlobalPos, baseInputBlockIndex);
381
382        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
383
384        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
385        Value* rawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
386        Value* ptr2 = iBuilder->CreateGEP(rawPtr, iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks())));
387        ptr = ptr2; //TODO workaround here
388
389
390        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
391
392        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
393        if (isNextZero) {
394            currentBitValue = iBuilder->CreateNot(currentBitValue);
395        }
396        Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
397        Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
398        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
399
400        Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
401        Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
402        if (maxPos) {
403            newPos = iBuilder->CreateUMin(maxPos, newPos);
404            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
405            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
406        }
407
408        phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
409        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
410        iBuilder->CreateBr(advanceConBlock);
411
412        iBuilder->SetInsertPoint(advanceExitBlock);
413        return phiCurrentPos;
414    }
415
416    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
417        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getStride());
418        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
419        Constant* SIZE_ZERO = iBuilder->getSize(0);
420
421//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
422
423        //TODO possible bug here, maybe we need to use iBuilder->getStride()
424        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->CreateMul(iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_STRIDE_SIZE), SIZE_STRIDE_SIZE));
425
426        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
427        Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
428
429        //[64 x <4 x i64>]*
430        Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
431        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
432        //GEP here is safe
433        Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
434        return iBuilder->CreateLoad(valuePtr);
435    }
436
437    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
438        // The external buffer is always linear accessible, so the GEP here is safe
439        Value *blockStartPtr = iBuilder->CreatePointerCast(
440                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
441                iBuilder->getInt8PtrTy()
442        );
443        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
444        return iBuilder->CreateLoad(ptr);
445    }
446
447    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
448        Value *fieldValue = iBuilder->getScalarField(fieldName);
449        fieldValue = iBuilder->CreateAdd(fieldValue, value);
450        iBuilder->setScalarField(fieldName, fieldValue);
451    }
452
453    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
454        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
455    }
456
457    // Assume we have enough output buffer
458    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
459                                                                         const std::string &bitstreamName,
460                                                                         llvm::Value *start, llvm::Value *end, bool isOne,
461                                                                         bool setProduced) {
462        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
463        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
464        Value* SIZE_ONE = iBuilder->getSize(1);
465        Value* SIZE_ZERO = iBuilder->getSize(0);
466        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
467        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
468        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
469        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
470
471        Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
472        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
473
474        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
475        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
476        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
477        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
478
479        Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
480
481        iBuilder->CreateBr(conBlock);
482
483        // Con
484        iBuilder->SetInsertPoint(conBlock);
485
486        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
487        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
488        iBuilder->CreateCondBr(
489                iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
490                bodyBlock,
491                exitBlock
492        );
493
494        // Body
495        iBuilder->SetInsertPoint(bodyBlock);
496
497        Value *outputLowestBitValue = iBuilder->CreateSelect(
498                iBuilder->CreateICmpULE(
499                        iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
500                        start
501                ),
502                iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
503                INT_BIT_BLOCK_ONE
504        );
505
506        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
507                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
508                end
509        );
510        Value *producedItemsCount = iBuilder->CreateSelect(
511                hasNotReachEnd,
512                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
513                end
514        );
515
516
517        Value *outputHighestBitValue = iBuilder->CreateSelect(
518                hasNotReachEnd,
519                INT_BIT_BLOCK_ZERO,
520                iBuilder->CreateShl(
521                        INT_BIT_BLOCK_ONE,
522                        iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
523                )
524        );
525
526
527        Value *bitMask = iBuilder->CreateSub(
528                outputHighestBitValue,
529                outputLowestBitValue
530        );
531
532        if (!isOne) {
533            bitMask = iBuilder->CreateNot(bitMask);
534        }
535
536        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
537        Value *rawInputPointer = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
538        Value * ptr = iBuilder->CreateGEP(rawInputPointer, iBuilder->CreateURem(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks())));
539//        iBuilder->CallPrintInt("targetPtr", targetPtr);
540//        iBuilder->CallPrintInt("targetPtr2", ptr);
541        targetPtr = ptr; //TODO workaround here
542
543
544        //TODO fixed circular here
545
546        Value *oldValue = iBuilder->CreateLoad(targetPtr);
547        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
548        Value *newValue = NULL;
549        if (isOne) {
550            newValue = iBuilder->CreateOr(oldValue, bitMask);
551        } else {
552            newValue = iBuilder->CreateAnd(oldValue, bitMask);
553        }
554
555        iBuilder->CreateStore(
556                iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
557                targetPtr
558        );
559        if (setProduced) {
560            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
561        }
562
563        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
564        iBuilder->CreateBr(conBlock);
565
566        // Exit
567        iBuilder->SetInsertPoint(exitBlock);
568        return exitBlock;
569    }
570
571
572
573    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
574                                                          const string &outputBufferName, Type *pointerType,
575                                                          Value *value) {
576
577        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
578        Value* SIZE_ZERO = iBuilder->getSize(0);
579        Value* SIZE_ONE = iBuilder->getSize(1);
580
581        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
582//        iBuilder->CallPrintInt("previousProduced", previousProduced);
583
584        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
585        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
586        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
587
588        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
589
590        // i8, [8 x <4 x i64>]*
591        // i64, [64 x <4 x i64>]*
592        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
593        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
594        ptr = iBuilder->CreateGEP(ptr, blockOffset);
595
596        Value* tmpOffset = iBuilder->CreateURem(outputOffset, iBuilder->getSize(this->getAnyStreamSetBuffer(outputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth()));
597        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(outputBufferName, SIZE_ZERO), pointerType);
598        Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
599        ptr = ptr2;
600//        iBuilder->CallPrintInt("ptr", ptr);
601//        iBuilder->CallPrintInt("ptr2", ptr2);
602
603        // GEP here is safe
604        iBuilder->CreateStore(value, ptr);
605
606        if (outputBufferName == "m0End") {
607//            iBuilder->CallPrintInt("output:m0End", value);
608        }
609
610        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
611    }
612
613
614    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
615                                                         std::vector<std::string> outputList) {
616        previousProducedMap.clear();
617        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
618            previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
619        }
620    }
621}
Note: See TracBrowser for help on using the repository browser.