source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5967

Last change on this file since 5967 was 5967, checked in by nmedfort, 13 months ago

Updated LZ4SwizzledMatchCopy + minor changes

File size: 29.4 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17
18namespace kernel{
19
20    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
21    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
22    // Inputs
23    {
24           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
25           Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
26
27           // block data
28           Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                   AlwaysConsume()},
30           Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                   AlwaysConsume()},
32           Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                   AlwaysConsume()}
34
35    },
36    //Outputs
37    {
38           // Uncompressed_data
39           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                   BoundedRate(0, 1)},
41           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                   BoundedRate(0, 1)},
43           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                   BoundedRate(0, 1)},
45
46           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
47           Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
48           Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
49           Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
50           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)}
51    },
52    //Arguments
53    {
54           Binding{iBuilder->getSizeTy(), "fileSize"}
55    },
56    {},
57    //Internal states:
58    {
59           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
60           Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
61    }) {
62        this->setStride(4 * 1024 * 1024);
63        addAttribute(MustExplicitlyTerminate());
64    }
65
66    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
67
68        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
69        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
70
71        Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex");
72
73        // In MultiblockKernel, availableItemCount + processedItemCount == producedItemCount from previous kernel
74        // While in SegmentOrigentedKernel, availableItemCount == producedItemCount from previous kernel
75        Value * totalNumber = iBuilder->getAvailableItemCount("blockEnd");
76        Value * totalExtender = iBuilder->getAvailableItemCount("extender");
77
78        Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
79
80        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
81
82        iBuilder->SetInsertPoint(blockEndConBlock);
83        Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
84        BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock");
85        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
86
87        iBuilder->SetInsertPoint(processBlock);
88
89        //TODO handle uncompressed block
90
91        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
92
93        Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
94        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
95        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
96        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
97        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
98
99        iBuilder->setProcessedItemCount("byteStream", blockEnd);
100        iBuilder->CreateBr(exitBlock);
101
102        iBuilder->SetInsertPoint(exitBlock);
103    }
104
105    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
106        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
107
108        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
109
110        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
111        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
112
113        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
114
115        iBuilder->SetInsertPoint(extendLiteralLengthBody);
116        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
117        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
118
119        iBuilder->CreateBr(extendLiteralLengthExit);
120
121        iBuilder->SetInsertPoint(extendLiteralLengthExit);
122
123        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
124        phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
125        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
126
127        Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
128        Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
129        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
130        Value * literalLengthExtendValue = iBuilder->CreateSelect(
131                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
132                iBuilder->CreateAdd(
133                        iBuilder->CreateMul(
134                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
135                                iBuilder->getSize(255)
136                        ),
137                        finalLengthByte
138                ),
139                iBuilder->getSize(0)
140        );
141        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
142        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
143        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
144
145        Value* offsetPos = iBuilder->CreateAdd(
146                iBuilder->CreateAdd(
147                        phiCursorPosAfterLiteral,
148                        literalLength),
149                iBuilder->getSize(1));
150
151        this->setCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
152
153        iBuilder->setProducedItemCount("deletionMarker", offsetPos);
154        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
155        return offsetPos;
156    }
157
158    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
159        Constant* INT64_ONE = iBuilder->getInt64(1);
160
161        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
162
163        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
164        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
165
166        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
167        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
168
169        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
170
171        iBuilder->SetInsertPoint(extendMatchBodyBlock);
172
173        //ExtendMatchBodyBlock
174        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
175        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
176
177        // ----May be in a different segment now
178        iBuilder->CreateBr(extendMatchExitBlock);
179
180        //ExtendMatchExitBlock
181        iBuilder->SetInsertPoint(extendMatchExitBlock);
182        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
183        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
184        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
185
186        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
187        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
188        Value* matchExtensionSize = iBuilder->CreateSelect(
189                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
190                oldMatchExtensionSize,
191                iBuilder->getSize(0)
192        );
193        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
194        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
195
196
197        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
198        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
199
200        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
201        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
202
203
204        Value* matchLengthAddValue = iBuilder->CreateSelect(
205                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
206                iBuilder->CreateAdd(
207                        iBuilder->CreateMul(
208                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
209                                iBuilder->getSize(255)
210                        ),
211                        extensionLastBitValue
212                )
213                ,
214                iBuilder->getSize(0)
215        );
216        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
217
218        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
219
220        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
221
222        Value* outputEndPos = iBuilder->CreateSub(
223                iBuilder->CreateAdd(outputPos, matchLength),
224                iBuilder->getInt64(1)
225        );
226
227        Value* matchOffset = iBuilder->CreateAdd(
228                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
229                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
230        );
231        this->generateStoreNumberOutput(iBuilder, "m0Start", outputPos);
232        this->generateStoreNumberOutput(iBuilder, "m0End", outputEndPos);
233        this->generateStoreNumberOutput(iBuilder, "matchOffset", matchOffset);
234        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
235        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
236
237        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
238    }
239
240    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
241        // Constant
242
243        this->clearCircularOutputBitstream(iBuilder, "deletionMarker", blockStart, blockEnd);
244
245        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
246
247        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
248        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
249
250
251        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
252        iBuilder->setTerminationSignal(isTerminal);
253
254        //TODO use memset to clear output buffer for extract marker
255
256        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
257
258        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
259        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
260
261        iBuilder->CreateBr(processCon);
262        iBuilder->SetInsertPoint(processCon);
263
264        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
265        phiCursorValue->addIncoming(blockStart, entryBlock);
266
267        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
268
269        // Process Body
270        iBuilder->SetInsertPoint(processBody);
271
272        //TODO add acceleration here
273        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
274        // Process Literal
275        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
276        iBuilder->CreateBr(processLiteralBlock);
277        iBuilder->SetInsertPoint(processLiteralBlock);
278
279        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
280        // Process Match
281        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
282        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
283
284        iBuilder->CreateCondBr(
285                iBuilder->CreateICmpULT(offsetPos, blockEnd),
286                handleM0BodyBlock,
287                handleM0ElseBlock
288        );
289
290        // HandleM0Body
291        iBuilder->SetInsertPoint(handleM0BodyBlock);
292        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
293        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
294
295        iBuilder->CreateBr(processCon);
296
297
298        // HandleM0Else
299        iBuilder->SetInsertPoint(handleM0ElseBlock);
300
301        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
302        // Store final M0 pos to make sure the bit stream will be long enough
303        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
304        this->generateStoreNumberOutput(iBuilder, "m0Start", finalM0OutputPos);
305        this->generateStoreNumberOutput(iBuilder, "m0End", finalM0OutputPos);
306        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64(0));
307        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
308        // finalM0OutputPos should always be 4MB * n except for the final block
309
310        iBuilder->CreateBr(processCon);
311
312
313        iBuilder->SetInsertPoint(exitBlock);
314    }
315
316    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
317
318        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
319        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
320        Type* bitBlockType = iBuilder->getBitBlockType();
321        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
322
323        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
324
325        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
326        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
327        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
328
329        iBuilder->CreateBr(advanceConBlock);
330        // TODO special handling for the first advance may have better performance
331        iBuilder->SetInsertPoint(advanceConBlock);
332
333        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
334        phiCurrentPos->addIncoming(startPos, entryBlock);
335        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
336        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
337        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
338
339        iBuilder->SetInsertPoint(advanceBodyBlock);
340
341        Value * currentBlockGlobalPos = iBuilder->CreateAnd(phiCurrentPos, ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH));
342        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
343
344        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, currentBlockGlobalPos), bitBlockType->getPointerTo());
345
346        Value * currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
347        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
348        currentBitValue = iBuilder->CreateNot(currentBitValue);
349
350        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
351        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
352        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
353
354        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
355        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
356        if (maxPos) {
357            newPos = iBuilder->CreateUMin(maxPos, newPos);
358            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
359            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
360        }
361
362        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, INT64_BIT_BLOCK_WIDTH), iBuilder->GetInsertBlock());
363        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
364        iBuilder->CreateBr(advanceConBlock);
365
366        iBuilder->SetInsertPoint(advanceExitBlock);
367        return phiCurrentPos;
368    }
369
370    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
371        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
372        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
373        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
374        Value * offset = iBuilder->CreateSub(globalOffset, processed);
375        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
376        return iBuilder->CreateLoad(valuePtr);
377    }
378
379    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
380        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
381        return iBuilder->CreateLoad(ptr);
382    }
383
384    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
385        Value *fieldValue = iBuilder->getScalarField(fieldName);
386        fieldValue = iBuilder->CreateAdd(fieldValue, value);
387        iBuilder->setScalarField(fieldName, fieldValue);
388    }
389
390    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
391                                                          const string & outputBufferName,
392                                                          Value * value) {
393
394        Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName);
395        Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset);
396        iBuilder->CreateStore(value, outputRawPtr);
397        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1)));
398    }
399
400
401    void LZ4IndexBuilderKernel::clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
402                                                             const std::string &bitstreamName,
403                                                             llvm::Value *start, llvm::Value *end) {
404        //TODO currently we assume that start/end pos is not in the same byte
405        Value* SIZE_0 = iBuilder->getSize(0);
406        Value* SIZE_8 = iBuilder->getSize(8);
407        Value* INT8_0 = iBuilder->getInt8(0);
408        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
409
410        Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
411        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
412
413        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
414        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
415        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
416        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
417
418        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
419        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
420        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
421        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
422
423        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
424
425        // Clear highest {startShiftAmount} bits
426        iBuilder->SetInsertPoint(startByteCpyBlock);
427        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
428        Value* startValue = iBuilder->CreateLoad(startPtr);
429
430        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
431        startShiftAmount = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
432        startValue = iBuilder->CreateLShr(iBuilder->CreateShl(startValue, startShiftAmount), startShiftAmount);
433
434        iBuilder->CreateStore(startValue, startPtr);
435        iBuilder->CreateBr(endByteCpyConBlock);
436
437        iBuilder->SetInsertPoint(endByteCpyConBlock);
438        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
439
440        // Clear lowest {endRemain} bits
441        iBuilder->SetInsertPoint(endByteCpyBlock);
442        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
443        Value* endValue = iBuilder->CreateLoad(endPtr);
444        endRemain = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
445        endValue = iBuilder->CreateShl(iBuilder->CreateLShr(endValue, endRemain), endRemain);
446        iBuilder->CreateStore(endValue, endPtr);
447        iBuilder->CreateBr(memsetBlock);
448
449        iBuilder->SetInsertPoint(memsetBlock);
450        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
451        Value* memsetEndByte = endBytePos;
452
453        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
454
455        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
456        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
457
458        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
459
460        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
461        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
462
463        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), INT8_0, memsetSize1, true);
464        iBuilder->CreateMemSet(rawOutputPtr, INT8_0, memsetSize2, true);
465    }
466
467    void LZ4IndexBuilderKernel::setCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
468                                                             const std::string &bitstreamName,
469                                                             llvm::Value *start, llvm::Value *end) {
470        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
471
472        Value* SIZE_0 = iBuilder->getSize(0);
473        Value* SIZE_1 = iBuilder->getSize(1);
474        Value* SIZE_8 = iBuilder->getSize(8);
475//        Value* INT8_0 = iBuilder->getInt8(0);
476//        Value* INT8_1 = iBuilder->getInt8(1);
477        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
478
479        Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
480        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
481
482        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
483        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
484        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
485        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
486        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
487
488        BasicBlock* shortSetBlock = iBuilder->CreateBasicBlock("shortSetBlock");
489        BasicBlock* longSetBlock = iBuilder->CreateBasicBlock("longSetBlock");
490
491//        iBuilder->CreateBr(startByteCpyBlock);
492        iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(startBytePos, endBytePos), shortSetBlock, longSetBlock);
493
494        // When startPos and endPos are in the same byte
495        iBuilder->SetInsertPoint(shortSetBlock);
496        Value* targetPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
497        Value* targetValue = iBuilder->CreateLoad(targetPtr);
498        Value* rangeMask = iBuilder->CreateSub(iBuilder->CreateShl(SIZE_1, endRemain), iBuilder->CreateShl(SIZE_1, startRemain));
499        rangeMask = iBuilder->CreateZExtOrTrunc(rangeMask, targetValue->getType());
500        targetValue = iBuilder->CreateOr(rangeMask, targetValue);
501
502//        targetValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(targetValue), startShiftAmount), startShiftAmount));
503//        targetValue = iBuilder->CreateShl(iBuilder->CreateLShr(targetValue, endRemain), endRemain);
504        iBuilder->CreateStore(targetValue, targetPtr);
505        iBuilder->CreateBr(exitBlock);
506
507        iBuilder->SetInsertPoint(longSetBlock);
508
509        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
510        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
511        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
512        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
513
514        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
515        // Clear highest {startShiftAmount} bits
516        iBuilder->SetInsertPoint(startByteCpyBlock);
517        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
518        Value* startValue = iBuilder->CreateLoad(startPtr);
519
520        Value* startShiftAmount2 = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
521        startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount2), startShiftAmount2));
522
523        iBuilder->CreateStore(startValue, startPtr);
524        iBuilder->CreateBr(endByteCpyConBlock);
525
526        iBuilder->SetInsertPoint(endByteCpyConBlock);
527        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
528
529        // Clear lowest {endRemain} bits
530        iBuilder->SetInsertPoint(endByteCpyBlock);
531        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
532        Value* endValue = iBuilder->CreateLoad(endPtr);
533        Value* endRemain2 = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
534        endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain2), endRemain2));
535        iBuilder->CreateStore(endValue, endPtr);
536        iBuilder->CreateBr(memsetBlock);
537
538        iBuilder->SetInsertPoint(memsetBlock);
539        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
540        Value* memsetEndByte = endBytePos;
541
542        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
543        // TODO bug here when start end in the same byte
544//        iBuilder->CallPrintInt("memsetEndByte", memsetEndByte);
545//        iBuilder->CallPrintInt("memsetStartByte", memsetStartByte);
546//        iBuilder->CallPrintInt("memsetSize1_1", memsetSize);
547
548
549        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
550
551        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
552
553        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
554
555        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
556        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
557//        iBuilder->CallPrintInt("memset1Ptr", iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem));
558//        iBuilder->CallPrintInt("memsetSize1", memsetSize1);
559
560//        iBuilder->CallPrintInt("memset2Ptr", rawOutputPtr);
561//        iBuilder->CallPrintInt("memsetSize2", memsetSize2);
562        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
563        iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
564        iBuilder->CreateBr(exitBlock);
565
566        iBuilder->SetInsertPoint(exitBlock);
567    }
568
569}
Note: See TracBrowser for help on using the repository browser.