source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5939

Last change on this file since 5939 was 5939, checked in by cameron, 13 months ago

Some cleanups

File size: 30.1 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17namespace kernel{
18    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
19            : MultiBlockKernel("LZ4IndexBuilderKernel",
20            // Inputs
21                               {
22                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
23                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
24                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
25                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
26
27                                       // block data
28                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                                               ConstantStrideLengthOne()},
30                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                                               ConstantStrideLengthOne()},
32                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                                               ConstantStrideLengthOne()}
34
35                               },
36            //Outputs
37                               {
38                                       // Uncompressed_data
39                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                                               BoundedRate(0, 1)},
41                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                                               BoundedRate(0, 1)},
43                                       Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                                               BoundedRate(0, 1)},
45
46                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
47                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
49                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
50                               },
51            //Arguments
52                               {},
53                               {},
54            //Internal states:
55                               {
56                                       Binding{iBuilder->getSizeTy(), "blockDataIndex"},
57                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
58                               }) {
59//        addAttribute(MustExplicitlyTerminate());
60    }
61
62    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
63//        iBuilder->CallPrintInt("entry", iBuilder->getSize(0));
64//        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
65
66
67
68        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
69        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
70
71        this->resetPreviousProducedMap(iBuilder, {"e1Marker", "m0Start", "m0End", "matchOffset"});
72
73        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
74
75        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
76
77//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
78//        iBuilder->CallPrintInt("totalNumber", totalNumber);
79//        iBuilder->setTerminationSignal(iBuilder->CreateICmpEQ(availableBlockEnd, iBuilder->getSize(0)));
80
81        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
82
83        iBuilder->SetInsertPoint(blockEndConBlock);
84        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
85        iBuilder->CallPrintInt("blockEnd", blockEnd);
86
87        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
88//        iBuilder->CallPrintInt("totalExtender", totalExtender);
89
90//        iBuilder->CallPrintInt("processByteStream", iBuilder->getProcessedItemCount("byteStream"));
91//        iBuilder->CallPrintInt("availableByteStream", iBuilder->getAvailableItemCount("byteStream"));
92
93
94//        iBuilder->CallPrintInt("consumedExtender", iBuilder->getConsumedItemCount("extender"));
95//        iBuilder->CallPrintInt("processExtender", iBuilder->getProcessedItemCount("extender"));
96//        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
97//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
98
99
100        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
101
102        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
103//        iBuilder->CallPrintInt("----totalExtender", totalExtender);
104//        iBuilder->CallPrintInt("----blockStart", blockStart);
105//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
106
107        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
108
109        iBuilder->SetInsertPoint(processBlock);
110
111
112        //TODO handle uncompressed block
113        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
114
115
116
117        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
118        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
119        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
120        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
121        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
122
123
124        iBuilder->setProcessedItemCount("byteStream", blockEnd);
125//        iBuilder->setProcessedItemCount("extender", blockEnd);
126//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
127//        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
128
129        iBuilder->CreateBr(exitBlock);
130
131        iBuilder->SetInsertPoint(exitBlock);
132    }
133
134    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
135        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
136
137        Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
138//        iBuilder->CallPrintInt("token", token);
139
140        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
141        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
142
143        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
144
145        iBuilder->SetInsertPoint(extendLiteralLengthBody);
146        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
147        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
148
149        iBuilder->CreateBr(extendLiteralLengthExit);
150
151        iBuilder->SetInsertPoint(extendLiteralLengthExit);
152
153        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
154        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
155        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
156
157        Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
158//        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
159        Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
160        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
161        Value* literalLengthExtendValue = iBuilder->CreateSelect(
162                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
163                iBuilder->CreateAdd(
164                        iBuilder->CreateMul(
165                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
166                                iBuilder->getSize(255)
167                        ),
168                        finalLengthByte
169                ),
170                iBuilder->getSize(0)
171        );
172        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
173        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
174        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
175
176        Value* offsetPos = iBuilder->CreateAdd(
177                iBuilder->CreateAdd(
178                        phiCursorPosAfterLiteral,
179                        literalLength),
180                iBuilder->getSize(1));
181
182        // TODO Clear Output Buffer at the beginning instead of marking 0
183        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
184        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
185        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
186        return offsetPos;
187    }
188
189    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
190        Constant* INT64_ONE = iBuilder->getInt64(1);
191
192        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
193
194        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
195        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
196
197        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
198        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
199
200        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
201
202        iBuilder->SetInsertPoint(extendMatchBodyBlock);
203
204        //ExtendMatchBodyBlock
205        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
206        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
207
208        // ----May be in a different segment now
209        iBuilder->CreateBr(extendMatchExitBlock);
210
211        //ExtendMatchExitBlock
212        iBuilder->SetInsertPoint(extendMatchExitBlock);
213        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
214        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
215        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
216
217        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
218//        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
219//        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
220
221        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
222        Value* matchExtensionSize = iBuilder->CreateSelect(
223                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
224                oldMatchExtensionSize,
225                iBuilder->getSize(0)
226        );
227        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
228        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
229
230
231        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
232        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
233
234        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
235        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
236
237
238        Value* matchLengthAddValue = iBuilder->CreateSelect(
239                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
240                iBuilder->CreateAdd(
241                        iBuilder->CreateMul(
242                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
243                                iBuilder->getSize(255)
244                        ),
245                        extensionLastBitValue
246                )
247                ,
248                iBuilder->getSize(0)
249        );
250        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
251
252        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
253
254        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
255
256        Value* outputEndPos = iBuilder->CreateSub(
257                iBuilder->CreateAdd(outputPos, matchLength),
258                iBuilder->getInt64(1)
259        );
260
261        Value* matchOffset = iBuilder->CreateAdd(
262                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
263                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
264        );
265//        iBuilder->CallPrintInt("matchOffset", matchOffset);
266        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
267//    iBuilder->CallPrintInt("m0Start", outputPos);
268        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
269//    iBuilder->CallPrintInt("m0End", outputEndPos);
270        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
271//    iBuilder->CallPrintInt("matchOffset", matchOffset);
272        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
273        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
274    }
275
276
277    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
278
279        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
280        //TODO use memset to clear output buffer
281        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
282
283        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
284        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
285
286        iBuilder->CreateBr(processCon);
287        iBuilder->SetInsertPoint(processCon);
288
289        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
290        phiCursorValue->addIncoming(blockStart, entryBlock);
291
292        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
293
294        // Process Body
295        iBuilder->SetInsertPoint(processBody);
296
297        //TODO add acceleration here
298        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
299//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
300//        iBuilder->CallPrintInt("token", token);
301
302        // Process Literal
303        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
304        iBuilder->CreateBr(processLiteralBlock);
305        iBuilder->SetInsertPoint(processLiteralBlock);
306
307        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
308//        iBuilder->CallPrintInt("offsetPos", offsetPos);
309        // Process Match
310        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
311        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
312
313        iBuilder->CreateCondBr(
314                iBuilder->CreateICmpULT(offsetPos, blockEnd),
315                handleM0BodyBlock,
316                handleM0ElseBlock
317        );
318
319        // HandleM0Body
320        iBuilder->SetInsertPoint(handleM0BodyBlock);
321        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
322//        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
323        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
324
325        iBuilder->CreateBr(processCon);
326
327
328        // HandleM0Else
329        iBuilder->SetInsertPoint(handleM0ElseBlock);
330
331        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
332        // Store final M0 pos to make sure the bit stream will be long enough
333        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
334//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
335        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
336        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
337        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
338
339        iBuilder->CreateBr(processCon);
340
341
342        iBuilder->SetInsertPoint(exitBlock);
343    }
344
345    Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
346        return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
347    }
348
349    Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
350        return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
351    }
352
353    Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
354        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
355        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
356        Constant* SIZE_ZERO = iBuilder->getSize(0);
357        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
358
359        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
360
361        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
362
363        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
364        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
365        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
366
367        iBuilder->CreateBr(advanceConBlock);
368        // TODO special handling for the first advance may have better performance
369        iBuilder->SetInsertPoint(advanceConBlock);
370
371        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
372        phiCurrentPos->addIncoming(startPos, entryBlock);
373        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
374        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
375        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
376
377        iBuilder->SetInsertPoint(advanceBodyBlock);
378
379        Value* currentPosBitBlockIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH), baseInputBlockIndex);
380
381        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
382
383        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
384
385        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
386
387        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
388        if (isNextZero) {
389            currentBitValue = iBuilder->CreateNot(currentBitValue);
390        }
391        Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
392        Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
393        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
394
395        Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
396        Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
397        if (maxPos) {
398            newPos = iBuilder->CreateUMin(maxPos, newPos);
399            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
400            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
401        }
402
403        phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
404        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
405        iBuilder->CreateBr(advanceConBlock);
406
407        iBuilder->SetInsertPoint(advanceExitBlock);
408        return phiCurrentPos;
409    }
410
411    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
412        // Stride Size here is Constant 1 instead of BitBlockWidth
413        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
414        Constant* SIZE_ZERO = iBuilder->getSize(0);
415
416//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
417
418        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->getProcessedItemCount(inputBufferName));
419
420        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
421        Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
422
423        //[64 x <4 x i64>]*
424        Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
425        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
426        //GEP here is safe
427        Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
428        return iBuilder->CreateLoad(valuePtr);
429    }
430
431    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
432        // The external buffer is always linear accessible, so the GEP here is safe
433        Value *blockStartPtr = iBuilder->CreatePointerCast(
434                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
435                iBuilder->getInt8PtrTy()
436        );
437        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
438        return iBuilder->CreateLoad(ptr);
439    }
440
441    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
442        Value *fieldValue = iBuilder->getScalarField(fieldName);
443        fieldValue = iBuilder->CreateAdd(fieldValue, value);
444        iBuilder->setScalarField(fieldName, fieldValue);
445    }
446
447    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
448        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
449    }
450
451    // Assume we have enough output buffer
452    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
453                                                                    const std::string &bitstreamName,
454                                                                    llvm::Value *start, llvm::Value *end, bool isOne,
455                                                                    bool setProduced) {
456        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
457        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
458        Value* SIZE_ONE = iBuilder->getSize(1);
459        Value* SIZE_ZERO = iBuilder->getSize(0);
460        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
461        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
462        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
463        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
464
465        Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
466        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
467
468        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
469        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
470        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
471        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
472
473        Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
474
475        iBuilder->CreateBr(conBlock);
476
477        // Con
478        iBuilder->SetInsertPoint(conBlock);
479
480        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
481        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
482        iBuilder->CreateCondBr(
483                iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
484                bodyBlock,
485                exitBlock
486        );
487
488        // Body
489        iBuilder->SetInsertPoint(bodyBlock);
490
491        Value *outputLowestBitValue = iBuilder->CreateSelect(
492                iBuilder->CreateICmpULE(
493                        iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
494                        start
495                ),
496                iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
497                INT_BIT_BLOCK_ONE
498        );
499
500        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
501                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
502                end
503        );
504        Value *producedItemsCount = iBuilder->CreateSelect(
505                hasNotReachEnd,
506                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
507                end
508        );
509
510
511        Value *outputHighestBitValue = iBuilder->CreateSelect(
512                hasNotReachEnd,
513                INT_BIT_BLOCK_ZERO,
514                iBuilder->CreateShl(
515                        INT_BIT_BLOCK_ONE,
516                        iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
517                )
518        );
519
520
521        Value *bitMask = iBuilder->CreateSub(
522                outputHighestBitValue,
523                outputLowestBitValue
524        );
525
526        if (!isOne) {
527            bitMask = iBuilder->CreateNot(bitMask);
528        }
529
530        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
531        Value *oldValue = iBuilder->CreateLoad(targetPtr);
532        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
533        Value *newValue = NULL;
534        if (isOne) {
535            newValue = iBuilder->CreateOr(oldValue, bitMask);
536        } else {
537            newValue = iBuilder->CreateAnd(oldValue, bitMask);
538        }
539
540        iBuilder->CreateStore(
541                iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
542                targetPtr
543        );
544        if (setProduced) {
545            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
546        }
547
548        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
549        iBuilder->CreateBr(conBlock);
550
551        // Exit
552        iBuilder->SetInsertPoint(exitBlock);
553        return exitBlock;
554    }
555
556
557
558    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
559                                                             const string &outputBufferName, Type *pointerType,
560                                                             Value *value) {
561        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
562        Value* SIZE_ZERO = iBuilder->getSize(0);
563        Value* SIZE_ONE = iBuilder->getSize(1);
564
565        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
566
567        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
568        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
569        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
570
571        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
572
573        // i8, [8 x <4 x i64>]*
574        // i64, [64 x <4 x i64>]*
575        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
576        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
577        // GEP here is safe
578        iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
579
580        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
581    }
582
583
584    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
585                                                            std::vector<std::string> outputList) {
586        previousProducedMap.clear();
587        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
588            previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
589        }
590    }
591}
Note: See TracBrowser for help on using the repository browser.