source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5941

Last change on this file since 5941 was 5941, checked in by xwa163, 16 months ago
  1. Add attributes to disable some features of multiblock kernel
  2. Fix bug for lz4d new approach in large data, pass all test cases
  3. Disable lz4d related test cases for old approach
File size: 32.2 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17
18namespace kernel{
19    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
20            : MultiBlockKernel("LZ4IndexBuilderKernel",
21            // Inputs
22                               {
23                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
24                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream"), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
25//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
26//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
27
28                                       // block data
29                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
30                                               AlwaysConsume()},
31                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
32                                               AlwaysConsume()},
33                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
34                                               AlwaysConsume()}
35
36                               },
37            //Outputs
38                               {
39                                       // Uncompressed_data
40                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
41                                               BoundedRate(0, 1)},
42                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
43                                               BoundedRate(0, 1)},
44                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
45                                               BoundedRate(0, 1)},
46
47                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableSufficientChecking()}},
48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()}, //TODO disable temporary buffer for all output streams
49                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
50                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
51                                       Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableTemporaryBuffer()}}
52                               },
53            //Arguments
54                               {
55                                       Binding{iBuilder->getSizeTy(), "fileSize"}
56                               },
57                               {},
58            //Internal states:
59                               {
60                                       Binding{iBuilder->getSizeTy(), "blockDataIndex"},
61                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
62                               }) {
63        this->setStride(4 * 1024 * 1024);
64        addAttribute(MustExplicitlyTerminate());
65    }
66
67    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
68
69
70        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
71        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
72
73        this->resetPreviousProducedMap(iBuilder, {"e1Marker", "m0Start", "m0End", "matchOffset"});
74
75        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
76
77        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
78        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
79//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
80
81
82        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
83
84        iBuilder->SetInsertPoint(blockEndConBlock);
85        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
86
87        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
88
89        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
90//        iBuilder->CallPrintInt("----totalExtender", totalExtender);
91//        iBuilder->CallPrintInt("----blockStart", blockStart);
92//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
93
94//        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
95        iBuilder->CreateBr(processBlock);
96
97        iBuilder->SetInsertPoint(processBlock);
98
99        //TODO handle uncompressed block
100
101        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
102
103        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
104        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
105        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
106        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
107        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
108
109
110        iBuilder->setProcessedItemCount("byteStream", blockEnd);
111
112
113//        iBuilder->setProcessedItemCount("extender", blockEnd);
114//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
115//        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
116
117        iBuilder->CreateBr(exitBlock);
118
119        iBuilder->SetInsertPoint(exitBlock);
120    }
121
122    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
123        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
124
125        Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
126//        iBuilder->CallPrintInt("token", token);
127
128        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
129        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
130
131        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
132
133        iBuilder->SetInsertPoint(extendLiteralLengthBody);
134        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
135        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
136
137        iBuilder->CreateBr(extendLiteralLengthExit);
138
139        iBuilder->SetInsertPoint(extendLiteralLengthExit);
140
141        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
142        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
143        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
144
145        Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
146//        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
147        Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
148        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
149        Value* literalLengthExtendValue = iBuilder->CreateSelect(
150                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
151                iBuilder->CreateAdd(
152                        iBuilder->CreateMul(
153                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
154                                iBuilder->getSize(255)
155                        ),
156                        finalLengthByte
157                ),
158                iBuilder->getSize(0)
159        );
160        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
161        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
162        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
163
164        Value* offsetPos = iBuilder->CreateAdd(
165                iBuilder->CreateAdd(
166                        phiCursorPosAfterLiteral,
167                        literalLength),
168                iBuilder->getSize(1));
169
170        // TODO Clear Output Buffer at the beginning instead of marking 0
171        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
172//        iBuilder->CallPrintInt("markStart", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
173//        iBuilder->CallPrintInt("phiCursorPosAfterLiteral", phiCursorPosAfterLiteral);
174        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
175        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
176        return offsetPos;
177    }
178
179    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
180        Constant* INT64_ONE = iBuilder->getInt64(1);
181
182        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
183
184        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
185        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
186
187        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
188        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
189
190        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
191
192        iBuilder->SetInsertPoint(extendMatchBodyBlock);
193
194        //ExtendMatchBodyBlock
195        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
196        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
197
198        // ----May be in a different segment now
199        iBuilder->CreateBr(extendMatchExitBlock);
200
201        //ExtendMatchExitBlock
202        iBuilder->SetInsertPoint(extendMatchExitBlock);
203        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
204        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
205        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
206
207        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
208//        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
209//        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
210
211        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
212        Value* matchExtensionSize = iBuilder->CreateSelect(
213                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
214                oldMatchExtensionSize,
215                iBuilder->getSize(0)
216        );
217        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
218        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
219
220
221        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
222        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
223
224        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
225        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
226
227
228        Value* matchLengthAddValue = iBuilder->CreateSelect(
229                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
230                iBuilder->CreateAdd(
231                        iBuilder->CreateMul(
232                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
233                                iBuilder->getSize(255)
234                        ),
235                        extensionLastBitValue
236                )
237                ,
238                iBuilder->getSize(0)
239        );
240        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
241
242        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
243
244        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
245
246        Value* outputEndPos = iBuilder->CreateSub(
247                iBuilder->CreateAdd(outputPos, matchLength),
248                iBuilder->getInt64(1)
249        );
250
251        Value* matchOffset = iBuilder->CreateAdd(
252                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
253                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
254        );
255        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
256        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
257        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
258        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
259        this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false);
260
261        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
262    }
263
264
265    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
266        // Constant
267        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
268
269        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
270        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
271
272
273        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
274
275        iBuilder->setTerminationSignal(isTerminal);
276
277        //TODO use memset to clear output buffer for extract marker
278
279        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
280
281        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
282        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
283
284        iBuilder->CreateBr(processCon);
285        iBuilder->SetInsertPoint(processCon);
286
287        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
288        phiCursorValue->addIncoming(blockStart, entryBlock);
289
290        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
291
292        // Process Body
293        iBuilder->SetInsertPoint(processBody);
294
295        //TODO add acceleration here
296        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
297
298//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
299//        iBuilder->CallPrintInt("token", token);
300
301        // Process Literal
302        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
303        iBuilder->CreateBr(processLiteralBlock);
304        iBuilder->SetInsertPoint(processLiteralBlock);
305
306        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
307//        iBuilder->CallPrintInt("offsetPos", offsetPos);
308        // Process Match
309        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
310        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
311
312        iBuilder->CreateCondBr(
313                iBuilder->CreateICmpULT(offsetPos, blockEnd),
314                handleM0BodyBlock,
315                handleM0ElseBlock
316        );
317
318        // HandleM0Body
319        iBuilder->SetInsertPoint(handleM0BodyBlock);
320        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
321//        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
322        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
323
324        iBuilder->CreateBr(processCon);
325
326
327        // HandleM0Else
328        iBuilder->SetInsertPoint(handleM0ElseBlock);
329
330        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
331        // Store final M0 pos to make sure the bit stream will be long enough
332        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
333//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
334        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
335        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
336        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
337        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
338        // finalM0OutputPos should always be 4MB * n except for the final block
339
340        iBuilder->CreateBr(processCon);
341
342
343        iBuilder->SetInsertPoint(exitBlock);
344    }
345
346    Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
347        return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
348    }
349
350    Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
351        return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
352    }
353
354    Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
355        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
356        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
357        Constant* SIZE_ZERO = iBuilder->getSize(0);
358        Type* bitBlockType = iBuilder->getBitBlockType();
359        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
360
361        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
362
363        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
364
365        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
366        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
367        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
368
369        iBuilder->CreateBr(advanceConBlock);
370        // TODO special handling for the first advance may have better performance
371        iBuilder->SetInsertPoint(advanceConBlock);
372
373        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
374        phiCurrentPos->addIncoming(startPos, entryBlock);
375        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
376        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
377        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
378
379        iBuilder->SetInsertPoint(advanceBodyBlock);
380
381
382        Value* currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
383        Value* currentPosBitBlockIndex = iBuilder->CreateSub(currentBlockGlobalPos, baseInputBlockIndex);
384
385        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
386
387        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
388        Value* rawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
389        Value* ptr2 = iBuilder->CreateGEP(rawPtr, iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks())));
390        ptr = ptr2; //TODO workaround here
391
392
393        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
394
395        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
396        if (isNextZero) {
397            currentBitValue = iBuilder->CreateNot(currentBitValue);
398        }
399        Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
400        Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
401        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
402
403        Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
404        Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
405        if (maxPos) {
406            newPos = iBuilder->CreateUMin(maxPos, newPos);
407            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
408            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
409        }
410
411        phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
412        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
413        iBuilder->CreateBr(advanceConBlock);
414
415        iBuilder->SetInsertPoint(advanceExitBlock);
416        return phiCurrentPos;
417    }
418
419    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
420        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getStride());
421        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
422        Constant* SIZE_ZERO = iBuilder->getSize(0);
423
424//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
425
426        //TODO possible bug here, maybe we need to use iBuilder->getStride()
427        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->CreateMul(iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_STRIDE_SIZE), SIZE_STRIDE_SIZE));
428
429        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
430        Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
431
432        //[64 x <4 x i64>]*
433        Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
434        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
435        //GEP here is safe
436        Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
437        return iBuilder->CreateLoad(valuePtr);
438    }
439
440    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
441        // The external buffer is always linear accessible, so the GEP here is safe
442        Value *blockStartPtr = iBuilder->CreatePointerCast(
443                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
444                iBuilder->getInt8PtrTy()
445        );
446        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
447        return iBuilder->CreateLoad(ptr);
448    }
449
450    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
451        Value *fieldValue = iBuilder->getScalarField(fieldName);
452        fieldValue = iBuilder->CreateAdd(fieldValue, value);
453        iBuilder->setScalarField(fieldName, fieldValue);
454    }
455
456    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
457        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
458    }
459
460    // Assume we have enough output buffer
461    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
462                                                                         const std::string &bitstreamName,
463                                                                         llvm::Value *start, llvm::Value *end, bool isOne,
464                                                                         bool setProduced) {
465        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
466        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
467        Value* SIZE_ONE = iBuilder->getSize(1);
468        Value* SIZE_ZERO = iBuilder->getSize(0);
469        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
470        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
471        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
472        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
473
474        Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
475        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
476
477        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
478        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
479        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
480        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
481
482        Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
483
484        iBuilder->CreateBr(conBlock);
485
486        // Con
487        iBuilder->SetInsertPoint(conBlock);
488
489        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
490        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
491        iBuilder->CreateCondBr(
492                iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
493                bodyBlock,
494                exitBlock
495        );
496
497        // Body
498        iBuilder->SetInsertPoint(bodyBlock);
499
500        Value *outputLowestBitValue = iBuilder->CreateSelect(
501                iBuilder->CreateICmpULE(
502                        iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
503                        start
504                ),
505                iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
506                INT_BIT_BLOCK_ONE
507        );
508
509        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
510                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
511                end
512        );
513        Value *producedItemsCount = iBuilder->CreateSelect(
514                hasNotReachEnd,
515                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
516                end
517        );
518
519
520        Value *outputHighestBitValue = iBuilder->CreateSelect(
521                hasNotReachEnd,
522                INT_BIT_BLOCK_ZERO,
523                iBuilder->CreateShl(
524                        INT_BIT_BLOCK_ONE,
525                        iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
526                )
527        );
528
529
530        Value *bitMask = iBuilder->CreateSub(
531                outputHighestBitValue,
532                outputLowestBitValue
533        );
534
535        if (!isOne) {
536            bitMask = iBuilder->CreateNot(bitMask);
537        }
538
539        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
540        Value *rawInputPointer = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
541        Value * ptr = iBuilder->CreateGEP(rawInputPointer, iBuilder->CreateURem(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks())));
542//        iBuilder->CallPrintInt("targetPtr", targetPtr);
543//        iBuilder->CallPrintInt("targetPtr2", ptr);
544        targetPtr = ptr; //TODO workaround here
545
546
547        //TODO fixed circular here
548
549        Value *oldValue = iBuilder->CreateLoad(targetPtr);
550        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
551        Value *newValue = NULL;
552        if (isOne) {
553            newValue = iBuilder->CreateOr(oldValue, bitMask);
554        } else {
555            newValue = iBuilder->CreateAnd(oldValue, bitMask);
556        }
557
558        iBuilder->CreateStore(
559                iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
560                targetPtr
561        );
562        if (setProduced) {
563            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
564        }
565
566        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
567        iBuilder->CreateBr(conBlock);
568
569        // Exit
570        iBuilder->SetInsertPoint(exitBlock);
571        return exitBlock;
572    }
573
574
575
576    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
577                                                          const string &outputBufferName, Type *pointerType,
578                                                          Value *value) {
579
580        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
581        Value* SIZE_ZERO = iBuilder->getSize(0);
582        Value* SIZE_ONE = iBuilder->getSize(1);
583
584        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
585//        iBuilder->CallPrintInt("previousProduced", previousProduced);
586
587        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
588        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
589        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
590
591        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
592
593        // i8, [8 x <4 x i64>]*
594        // i64, [64 x <4 x i64>]*
595        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
596        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
597        ptr = iBuilder->CreateGEP(ptr, blockOffset);
598
599        Value* tmpOffset = iBuilder->CreateURem(outputOffset, iBuilder->getSize(this->getAnyStreamSetBuffer(outputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth()));
600        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(outputBufferName, SIZE_ZERO), pointerType);
601        Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
602        ptr = ptr2;
603//        iBuilder->CallPrintInt("ptr", ptr);
604//        iBuilder->CallPrintInt("ptr2", ptr2);
605
606        // GEP here is safe
607        iBuilder->CreateStore(value, ptr);
608
609        if (outputBufferName == "m0End") {
610//            iBuilder->CallPrintInt("output:m0End", value);
611        }
612
613        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
614    }
615
616
617    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
618                                                         std::vector<std::string> outputList) {
619        previousProducedMap.clear();
620        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
621            previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
622        }
623    }
624}
Note: See TracBrowser for help on using the repository browser.