source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5923

Last change on this file since 5923 was 5923, checked in by xwa163, 18 months ago

Fix some GEP instructions in lz4_index_builder and lz4_block_decoder_new

File size: 30.2 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17namespace kernel{
18    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
19            : MultiBlockKernel("LZ4IndexBuilderKernel",
20            // Inputs
21                               {
22                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
23                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
24                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
25                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
26
27                                       // block data
28                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                                               ConstantStrideLengthOne()},
30                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                                               ConstantStrideLengthOne()},
32                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                                               ConstantStrideLengthOne()}
34
35                               },
36            //Outputs
37                               {
38                                       // Uncompressed_data
39                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                                               BoundedRate(0, 1)},
41                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                                               BoundedRate(0, 1)},
43                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                                               BoundedRate(0, 1)},
45
46                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
47                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
49                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
50                               },
51            //Arguments
52                               {},
53                               {},
54            //Internal states:
55                               {
56                                       Binding{iBuilder->getSizeTy(), "blockDataIndex"},
57                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
58                               }) {
59//        addAttribute(MustExplicitlyTerminate());
60    }
61
62    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
63//        iBuilder->CallPrintInt("entry", iBuilder->getSize(0));
64//        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
65
66
67
68        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
69        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
70
71        this->resetPreviousProducedMap(iBuilder, {"e1Marker", "m0Start", "m0End", "matchOffset"});
72
73        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
74
75        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
76
77//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
78//        iBuilder->CallPrintInt("totalNumber", totalNumber);
79//        iBuilder->setTerminationSignal(iBuilder->CreateICmpEQ(availableBlockEnd, iBuilder->getSize(0)));
80
81        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
82
83        iBuilder->SetInsertPoint(blockEndConBlock);
84        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
85        iBuilder->CallPrintInt("blockEnd", blockEnd);
86
87        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
88//        iBuilder->CallPrintInt("totalExtender", totalExtender);
89
90//        iBuilder->CallPrintInt("processByteStream", iBuilder->getProcessedItemCount("byteStream"));
91//        iBuilder->CallPrintInt("availableByteStream", iBuilder->getAvailableItemCount("byteStream"));
92
93
94//        iBuilder->CallPrintInt("consumedExtender", iBuilder->getConsumedItemCount("extender"));
95//        iBuilder->CallPrintInt("processExtender", iBuilder->getProcessedItemCount("extender"));
96//        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
97//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
98
99
100        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
101
102        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
103//        iBuilder->CallPrintInt("----totalExtender", totalExtender);
104//        iBuilder->CallPrintInt("----blockStart", blockStart);
105//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
106
107        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
108
109        iBuilder->SetInsertPoint(processBlock);
110
111
112        //TODO handle uncompressed block
113        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
114
115
116
117        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
118        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
119        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
120        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
121        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
122
123
124        iBuilder->setProcessedItemCount("byteStream", blockEnd);
125//        iBuilder->setProcessedItemCount("extender", blockEnd);
126//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
127//        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
128
129        iBuilder->CreateBr(exitBlock);
130
131        iBuilder->SetInsertPoint(exitBlock);
132    }
133
134    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
135        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
136
137        Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
138//        iBuilder->CallPrintInt("token", token);
139
140        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
141        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
142
143        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
144
145        iBuilder->SetInsertPoint(extendLiteralLengthBody);
146        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
147        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
148
149        iBuilder->CreateBr(extendLiteralLengthExit);
150
151        iBuilder->SetInsertPoint(extendLiteralLengthExit);
152
153        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
154        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
155        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
156
157        Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
158//        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
159        Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
160        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
161        Value* literalLengthExtendValue = iBuilder->CreateSelect(
162                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
163                iBuilder->CreateAdd(
164                        iBuilder->CreateMul(
165                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
166                                iBuilder->getSize(255)
167                        ),
168                        finalLengthByte
169                ),
170                iBuilder->getSize(0)
171        );
172        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
173        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
174        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
175
176        Value* offsetPos = iBuilder->CreateAdd(
177                iBuilder->CreateAdd(
178                        phiCursorPosAfterLiteral,
179                        literalLength),
180                iBuilder->getSize(1));
181
182        // TODO Clear Output Buffer at the beginning instead of marking 0
183        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
184        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
185        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
186        return offsetPos;
187    }
188
189    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
190        Constant* INT64_ONE = iBuilder->getInt64(1);
191
192        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
193
194        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
195        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
196
197        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
198        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
199
200        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
201
202        iBuilder->SetInsertPoint(extendMatchBodyBlock);
203
204        //ExtendMatchBodyBlock
205        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
206        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
207
208        // ----May be in a different segment now
209        iBuilder->CreateBr(extendMatchExitBlock);
210
211        //ExtendMatchExitBlock
212        iBuilder->SetInsertPoint(extendMatchExitBlock);
213        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
214        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
215        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
216
217        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
218//        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
219//        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
220
221        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
222        Value* matchExtensionSize = iBuilder->CreateSelect(
223                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
224                oldMatchExtensionSize,
225                iBuilder->getSize(0)
226        );
227        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
228        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
229
230
231        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
232        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
233
234        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
235        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
236
237
238        Value* matchLengthAddValue = iBuilder->CreateSelect(
239                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
240                iBuilder->CreateAdd(
241                        iBuilder->CreateMul(
242                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
243                                iBuilder->getSize(255)
244                        ),
245                        extensionLastBitValue
246                )
247                ,
248                iBuilder->getSize(0)
249        );
250        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
251
252        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
253
254        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
255
256        Value* outputEndPos = iBuilder->CreateSub(
257                iBuilder->CreateAdd(outputPos, matchLength),
258                iBuilder->getInt64(1)
259        );
260
261        Value* matchOffset = iBuilder->CreateAdd(
262                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
263                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
264        );
265//        iBuilder->CallPrintInt("matchOffset", matchOffset);
266        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
267//    iBuilder->CallPrintInt("m0Start", outputPos);
268        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
269//    iBuilder->CallPrintInt("m0End", outputEndPos);
270        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
271//    iBuilder->CallPrintInt("matchOffset", matchOffset);
272        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
273        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
274    }
275
276
277    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
278        // Constant
279        Constant* INT64_ONE = iBuilder->getInt64(1);
280
281        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
282        //TODO use memset to clear output buffer
283        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
284
285        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
286        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
287
288        iBuilder->CreateBr(processCon);
289        iBuilder->SetInsertPoint(processCon);
290
291        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
292        phiCursorValue->addIncoming(blockStart, entryBlock);
293
294        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
295
296        // Process Body
297        iBuilder->SetInsertPoint(processBody);
298
299        //TODO add acceleration here
300        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
301//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
302//        iBuilder->CallPrintInt("token", token);
303
304        // Process Literal
305        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
306        iBuilder->CreateBr(processLiteralBlock);
307        iBuilder->SetInsertPoint(processLiteralBlock);
308
309        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
310//        iBuilder->CallPrintInt("offsetPos", offsetPos);
311        // Process Match
312        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
313        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
314
315        iBuilder->CreateCondBr(
316                iBuilder->CreateICmpULT(offsetPos, blockEnd),
317                handleM0BodyBlock,
318                handleM0ElseBlock
319        );
320
321        // HandleM0Body
322        iBuilder->SetInsertPoint(handleM0BodyBlock);
323        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
324//        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
325        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
326
327        iBuilder->CreateBr(processCon);
328
329
330        // HandleM0Else
331        iBuilder->SetInsertPoint(handleM0ElseBlock);
332
333        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
334        // Store final M0 pos to make sure the bit stream will be long enough
335        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
336//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
337        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
338        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
339        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
340
341        iBuilder->CreateBr(processCon);
342
343
344        iBuilder->SetInsertPoint(exitBlock);
345    }
346
347    Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
348        return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
349    }
350
351    Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
352        return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
353    }
354
355    Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
356        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
357        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
358        Constant* SIZE_ZERO = iBuilder->getSize(0);
359        Type* bitBlockType = iBuilder->getBitBlockType();
360        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
361
362        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
363
364        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
365
366        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
367        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
368        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
369
370        iBuilder->CreateBr(advanceConBlock);
371        // TODO special handling for the first advance may have better performance
372        iBuilder->SetInsertPoint(advanceConBlock);
373
374        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
375        phiCurrentPos->addIncoming(startPos, entryBlock);
376        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
377        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
378        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
379
380        iBuilder->SetInsertPoint(advanceBodyBlock);
381
382        Value* currentPosBitBlockIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH), baseInputBlockIndex);
383
384        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
385
386        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
387
388        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
389
390        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
391        if (isNextZero) {
392            currentBitValue = iBuilder->CreateNot(currentBitValue);
393        }
394        Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
395        Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
396        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
397
398        Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
399        Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
400        if (maxPos) {
401            newPos = iBuilder->CreateUMin(maxPos, newPos);
402            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
403            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
404        }
405
406        phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
407        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
408        iBuilder->CreateBr(advanceConBlock);
409
410        iBuilder->SetInsertPoint(advanceExitBlock);
411        return phiCurrentPos;
412    }
413
414    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
415        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
416        Constant* SIZE_ZERO = iBuilder->getSize(0);
417
418//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
419
420        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->getProcessedItemCount(inputBufferName));
421
422        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
423        Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
424
425        //[64 x <4 x i64>]*
426        Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
427        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
428        //GEP here is safe
429        Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
430        return iBuilder->CreateLoad(valuePtr);
431    }
432
433    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
434        // The external buffer is always linear accessible, so the GEP here is safe
435        Value *blockStartPtr = iBuilder->CreatePointerCast(
436                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
437                iBuilder->getInt8PtrTy()
438        );
439        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
440        return iBuilder->CreateLoad(ptr);
441    }
442
443    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
444        Value *fieldValue = iBuilder->getScalarField(fieldName);
445        fieldValue = iBuilder->CreateAdd(fieldValue, value);
446        iBuilder->setScalarField(fieldName, fieldValue);
447    }
448
449    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
450        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
451    }
452
453    // Assume we have enough output buffer
454    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
455                                                                    const std::string &bitstreamName,
456                                                                    llvm::Value *start, llvm::Value *end, bool isOne,
457                                                                    bool setProduced) {
458        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
459        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
460        Value* SIZE_ONE = iBuilder->getSize(1);
461        Value* SIZE_ZERO = iBuilder->getSize(0);
462        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
463        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
464        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
465        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
466
467        Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
468        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
469
470        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
471        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
472        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
473        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
474
475        Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
476
477        iBuilder->CreateBr(conBlock);
478
479        // Con
480        iBuilder->SetInsertPoint(conBlock);
481
482        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
483        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
484        iBuilder->CreateCondBr(
485                iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
486                bodyBlock,
487                exitBlock
488        );
489
490        // Body
491        iBuilder->SetInsertPoint(bodyBlock);
492
493        Value *outputLowestBitValue = iBuilder->CreateSelect(
494                iBuilder->CreateICmpULE(
495                        iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
496                        start
497                ),
498                iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
499                INT_BIT_BLOCK_ONE
500        );
501
502        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
503                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
504                end
505        );
506        Value *producedItemsCount = iBuilder->CreateSelect(
507                hasNotReachEnd,
508                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
509                end
510        );
511
512
513        Value *outputHighestBitValue = iBuilder->CreateSelect(
514                hasNotReachEnd,
515                INT_BIT_BLOCK_ZERO,
516                iBuilder->CreateShl(
517                        INT_BIT_BLOCK_ONE,
518                        iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
519                )
520        );
521
522
523        Value *bitMask = iBuilder->CreateSub(
524                outputHighestBitValue,
525                outputLowestBitValue
526        );
527
528        if (!isOne) {
529            bitMask = iBuilder->CreateNot(bitMask);
530        }
531
532        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
533        Value *oldValue = iBuilder->CreateLoad(targetPtr);
534        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
535        Value *newValue = NULL;
536        if (isOne) {
537            newValue = iBuilder->CreateOr(oldValue, bitMask);
538        } else {
539            newValue = iBuilder->CreateAnd(oldValue, bitMask);
540        }
541
542        iBuilder->CreateStore(
543                iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
544                targetPtr
545        );
546        if (setProduced) {
547            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
548        }
549
550        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
551        iBuilder->CreateBr(conBlock);
552
553        // Exit
554        iBuilder->SetInsertPoint(exitBlock);
555        return exitBlock;
556    }
557
558
559
560    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
561                                                             const string &outputBufferName, Type *pointerType,
562                                                             Value *value) {
563        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
564        Value* SIZE_ZERO = iBuilder->getSize(0);
565        Value* SIZE_ONE = iBuilder->getSize(1);
566
567        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
568
569        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
570        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
571        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
572
573        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
574
575        // i8, [8 x <4 x i64>]*
576        // i64, [64 x <4 x i64>]*
577        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
578        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
579        // GEP here is safe
580        iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
581
582        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
583    }
584
585
586    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
587                                                            std::vector<std::string> outputList) {
588        previousProducedMap.clear();
589        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
590            previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
591        }
592    }
593}
Note: See TracBrowser for help on using the repository browser.