source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5958

Last change on this file since 5958 was 5958, checked in by nmedfort, 12 months ago

made LZ4IndexBuilderKernel a segment-oriented kernel + code clean up.

File size: 24.1 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17
18namespace kernel{
19
20    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
21    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
22    // Inputs
23    {
24           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
25           Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
26
27           // block data
28           Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                   AlwaysConsume()},
30           Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                   AlwaysConsume()},
32           Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                   AlwaysConsume()}
34
35    },
36    //Outputs
37    {
38           // Uncompressed_data
39           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                   BoundedRate(0, 1)},
41           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                   BoundedRate(0, 1)},
43           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                   BoundedRate(0, 1)},
45
46           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
47           Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
48           Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
49           Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
50           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)}
51    },
52    //Arguments
53    {
54           Binding{iBuilder->getSizeTy(), "fileSize"}
55    },
56    {},
57    //Internal states:
58    {
59           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
60           Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
61    }) {
62        this->setStride(4 * 1024 * 1024);
63        addAttribute(MustExplicitlyTerminate());
64    }
65
66    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
67
68        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
69        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
70
71        Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex");
72
73        Value * totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
74        Value * totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
75
76        Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
77
78        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
79
80        iBuilder->SetInsertPoint(blockEndConBlock);
81        Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
82        BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock");
83        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
84
85        iBuilder->SetInsertPoint(processBlock);
86
87        //TODO handle uncompressed block
88
89        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
90
91        Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
92        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
93        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
94        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
95        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
96
97        iBuilder->setProcessedItemCount("byteStream", blockEnd);
98        iBuilder->CreateBr(exitBlock);
99
100        iBuilder->SetInsertPoint(exitBlock);
101    }
102
103    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
104        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
105
106        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
107
108        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
109        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
110
111        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
112
113        iBuilder->SetInsertPoint(extendLiteralLengthBody);
114        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
115        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
116
117        iBuilder->CreateBr(extendLiteralLengthExit);
118
119        iBuilder->SetInsertPoint(extendLiteralLengthExit);
120
121        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
122        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
123        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
124
125        Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
126        Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
127        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
128        Value * literalLengthExtendValue = iBuilder->CreateSelect(
129                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
130                iBuilder->CreateAdd(
131                        iBuilder->CreateMul(
132                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
133                                iBuilder->getSize(255)
134                        ),
135                        finalLengthByte
136                ),
137                iBuilder->getSize(0)
138        );
139        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
140        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
141        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
142
143        Value* offsetPos = iBuilder->CreateAdd(
144                iBuilder->CreateAdd(
145                        phiCursorPosAfterLiteral,
146                        literalLength),
147                iBuilder->getSize(1));
148
149        // TODO Clear Output Buffer at the beginning instead of marking 0
150        this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), true);
151        this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, false);
152        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
153        return offsetPos;
154    }
155
156    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
157        Constant* INT64_ONE = iBuilder->getInt64(1);
158
159        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
160
161        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
162        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
163
164        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
165        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
166
167        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
168
169        iBuilder->SetInsertPoint(extendMatchBodyBlock);
170
171        //ExtendMatchBodyBlock
172        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
173        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
174
175        // ----May be in a different segment now
176        iBuilder->CreateBr(extendMatchExitBlock);
177
178        //ExtendMatchExitBlock
179        iBuilder->SetInsertPoint(extendMatchExitBlock);
180        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
181        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
182        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
183
184        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
185        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
186        Value* matchExtensionSize = iBuilder->CreateSelect(
187                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
188                oldMatchExtensionSize,
189                iBuilder->getSize(0)
190        );
191        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
192        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
193
194
195        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
196        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
197
198        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
199        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
200
201
202        Value* matchLengthAddValue = iBuilder->CreateSelect(
203                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
204                iBuilder->CreateAdd(
205                        iBuilder->CreateMul(
206                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
207                                iBuilder->getSize(255)
208                        ),
209                        extensionLastBitValue
210                )
211                ,
212                iBuilder->getSize(0)
213        );
214        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
215
216        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
217
218        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
219
220        Value* outputEndPos = iBuilder->CreateSub(
221                iBuilder->CreateAdd(outputPos, matchLength),
222                iBuilder->getInt64(1)
223        );
224
225        Value* matchOffset = iBuilder->CreateAdd(
226                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
227                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
228        );
229        this->generateStoreNumberOutput(iBuilder, "m0Start", outputPos);
230        this->generateStoreNumberOutput(iBuilder, "m0End", outputEndPos);
231        this->generateStoreNumberOutput(iBuilder, "matchOffset", matchOffset);
232        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
233        this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false);
234
235        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
236    }
237
238    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
239        // Constant
240        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
241
242        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
243        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
244
245
246        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
247        iBuilder->setTerminationSignal(isTerminal);
248
249        //TODO use memset to clear output buffer for extract marker
250
251        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
252
253        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
254        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
255
256        iBuilder->CreateBr(processCon);
257        iBuilder->SetInsertPoint(processCon);
258
259        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
260        phiCursorValue->addIncoming(blockStart, entryBlock);
261
262        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
263
264        // Process Body
265        iBuilder->SetInsertPoint(processBody);
266
267        //TODO add acceleration here
268        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
269        // Process Literal
270        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
271        iBuilder->CreateBr(processLiteralBlock);
272        iBuilder->SetInsertPoint(processLiteralBlock);
273
274        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
275        // Process Match
276        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
277        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
278
279        iBuilder->CreateCondBr(
280                iBuilder->CreateICmpULT(offsetPos, blockEnd),
281                handleM0BodyBlock,
282                handleM0ElseBlock
283        );
284
285        // HandleM0Body
286        iBuilder->SetInsertPoint(handleM0BodyBlock);
287        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
288        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
289
290        iBuilder->CreateBr(processCon);
291
292
293        // HandleM0Else
294        iBuilder->SetInsertPoint(handleM0ElseBlock);
295
296        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
297        // Store final M0 pos to make sure the bit stream will be long enough
298        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
299        this->generateStoreNumberOutput(iBuilder, "m0Start", finalM0OutputPos);
300        this->generateStoreNumberOutput(iBuilder, "m0End", finalM0OutputPos);
301        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64(0));
302        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
303        // finalM0OutputPos should always be 4MB * n except for the final block
304
305        iBuilder->CreateBr(processCon);
306
307
308        iBuilder->SetInsertPoint(exitBlock);
309    }
310
311    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
312
313        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
314        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
315        Type* bitBlockType = iBuilder->getBitBlockType();
316        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
317
318        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
319
320        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
321        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
322        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
323
324        iBuilder->CreateBr(advanceConBlock);
325        // TODO special handling for the first advance may have better performance
326        iBuilder->SetInsertPoint(advanceConBlock);
327
328        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
329        phiCurrentPos->addIncoming(startPos, entryBlock);
330        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
331        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
332        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
333
334        iBuilder->SetInsertPoint(advanceBodyBlock);
335
336        Value * currentBlockGlobalPos = iBuilder->CreateAnd(phiCurrentPos, ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH));
337        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
338
339        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, currentBlockGlobalPos), bitBlockType->getPointerTo());
340
341        Value * currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
342        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
343        currentBitValue = iBuilder->CreateNot(currentBitValue);
344
345        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
346        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
347        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
348
349        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
350        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
351        if (maxPos) {
352            newPos = iBuilder->CreateUMin(maxPos, newPos);
353            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
354            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
355        }
356
357        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, INT64_BIT_BLOCK_WIDTH), iBuilder->GetInsertBlock());
358        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
359        iBuilder->CreateBr(advanceConBlock);
360
361        iBuilder->SetInsertPoint(advanceExitBlock);
362        return phiCurrentPos;
363    }
364
365    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
366        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
367        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
368        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
369        Value * offset = iBuilder->CreateSub(globalOffset, processed);
370        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
371        return iBuilder->CreateLoad(valuePtr);
372    }
373
374    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
375        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
376        return iBuilder->CreateLoad(ptr);
377    }
378
379    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
380        Value *fieldValue = iBuilder->getScalarField(fieldName);
381        fieldValue = iBuilder->CreateAdd(fieldValue, value);
382        iBuilder->setScalarField(fieldName, fieldValue);
383    }
384
385    size_t LZ4IndexBuilderKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
386        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
387    }
388
389    // Assume we have enough output buffer
390    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
391                                                                         const std::string &bitstreamName,
392                                                                         llvm::Value *start, llvm::Value *end, bool isOne,
393                                                                         bool setProduced) {
394        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
395        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
396        Value* SIZE_ONE = iBuilder->getSize(1);
397        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
398        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
399        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
400        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
401
402        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
403        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
404        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
405        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
406
407        Value * startBlockLocalIndex = iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH);
408
409        iBuilder->CreateBr(conBlock);
410
411        // Con
412        iBuilder->SetInsertPoint(conBlock);
413
414        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
415        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
416
417
418        iBuilder->CreateCondBr(
419                iBuilder->CreateICmpULT(iBuilder->CreateMul(curBlockLocalIndex, SIZE_BIT_BLOCK_WIDTH), end),
420                bodyBlock,
421                exitBlock
422        );
423
424        // Body
425        iBuilder->SetInsertPoint(bodyBlock);
426
427        Value * const currentPosition = iBuilder->CreateMul(curBlockLocalIndex, SIZE_BIT_BLOCK_WIDTH);
428        Value * lowestBitPosition = iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH);
429        lowestBitPosition = iBuilder->CreateZExt(lowestBitPosition, INT_BIT_BLOCK_TY);
430        Value * outputLowestBitValue = iBuilder->CreateShl(INT_BIT_BLOCK_ONE, lowestBitPosition);
431        Value * const hasNotReachedStart = iBuilder->CreateICmpULE(currentPosition, start);
432        outputLowestBitValue = iBuilder->CreateSelect(hasNotReachedStart, outputLowestBitValue, INT_BIT_BLOCK_ONE);
433
434        Value * const nextPosition = iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), SIZE_BIT_BLOCK_WIDTH);
435        Value * const hasNotReachEnd = iBuilder->CreateICmpULE(nextPosition, end);
436        Value * producedItemsCount = iBuilder->CreateSelect(hasNotReachEnd, nextPosition, end);
437        Value * highestBitPosition = iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH);
438        highestBitPosition = iBuilder->CreateZExt(highestBitPosition, INT_BIT_BLOCK_TY);
439        Value * outputHighestBitValue = iBuilder->CreateShl(INT_BIT_BLOCK_ONE, highestBitPosition);
440        outputHighestBitValue = iBuilder->CreateSelect(hasNotReachEnd, INT_BIT_BLOCK_ZERO, outputHighestBitValue);
441        Value * bitMask = iBuilder->CreateSub(outputHighestBitValue, outputLowestBitValue);
442        bitMask = iBuilder->CreateBitCast(bitMask, BIT_BLOCK_TY);
443
444        Value * targetPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, currentPosition), iBuilder->getBitBlockType()->getPointerTo());
445        Value * oldValue = iBuilder->CreateBlockAlignedLoad(targetPtr);
446        Value * newValue = nullptr;
447        if (isOne) {
448            newValue = iBuilder->CreateOr(oldValue, bitMask);
449        } else {
450            newValue = iBuilder->CreateAnd(oldValue, iBuilder->CreateNot(bitMask));
451        }
452        iBuilder->CreateStore(newValue, targetPtr);
453
454        if (setProduced) {
455            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
456        }
457
458        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
459        iBuilder->CreateBr(conBlock);
460
461        // Exit
462        iBuilder->SetInsertPoint(exitBlock);
463        return exitBlock;
464    }
465
466
467    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
468                                                          const string & outputBufferName,
469                                                          Value * value) {
470
471        Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName);
472        Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset);
473        iBuilder->CreateStore(value, outputRawPtr);
474        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1)));
475    }
476
477}
Note: See TracBrowser for help on using the repository browser.