source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 6020

Last change on this file since 6020 was 6020, checked in by xwa163, 3 months ago
  1. New version of lz4_swizzled_match_copy kernel with higher performance
  2. Adjust related pipeline code
  3. Remove legacy comments
File size: 29.5 KB
Line 
1
2#include "lz4_index_builder.h"
3
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15namespace kernel{
16
17    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
18    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
19    // Inputs
20    {
21           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
22           Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
23
24           // block data
25           Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1), AlwaysConsume()},
26           Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed"), AlwaysConsume()},
27           Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed"), AlwaysConsume()}
28
29    },
30    //Outputs
31    {
32           // Uncompressed_data
33           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
34                   BoundedRate(0, 1)},
35           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
36                   BoundedRate(0, 1)},
37           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
38                   BoundedRate(0, 1)},
39
40           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
41           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
42           Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
43    },
44    //Arguments
45    {
46           Binding{iBuilder->getSizeTy(), "fileSize"}
47    },
48    {},
49    //Internal states:
50    {
51           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
52           Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
53           Binding{iBuilder->getInt64Ty(), "compressedSpaceClearPos"}
54    }) {
55        this->setStride(4 * 1024 * 1024);
56        addAttribute(MustExplicitlyTerminate());
57    }
58
59    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
60        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
61        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
62
63        Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex");
64
65        // In MultiblockKernel, availableItemCount + processedItemCount == producedItemCount from previous kernel
66        // While in SegmentOrigentedKernel, availableItemCount == producedItemCount from previous kernel
67        Value * totalNumber = iBuilder->getAvailableItemCount("blockEnd");
68        Value * totalExtender = iBuilder->getAvailableItemCount("extender");
69
70        Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
71
72        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
73
74        iBuilder->SetInsertPoint(blockEndConBlock);
75        Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
76        BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock");
77        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
78
79        iBuilder->SetInsertPoint(processBlock);
80
81        //TODO handle uncompressed block
82
83        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
84
85        Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
86        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
87        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
88//        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
89//        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
90
91        iBuilder->setProcessedItemCount("byteStream", blockEnd);
92        iBuilder->CreateBr(exitBlock);
93
94        iBuilder->SetInsertPoint(exitBlock);
95    }
96
97    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
98//        iBuilder->CallPrintInt("blockEnd", blockEnd);
99        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
100
101        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
102
103        BasicBlock* extendLiteralLengthCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_con");
104        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
105        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
106
107        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthCon, extendLiteralLengthExit);
108
109        iBuilder->SetInsertPoint(extendLiteralLengthCon);
110
111        Value * const nextTokenPos = iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1));
112        Value * const nextToken = iBuilder->CreateLoad(iBuilder->getRawInputPointer("byteStream", nextTokenPos));
113        Value * const isExitToken = iBuilder->CreateICmpNE(nextToken, iBuilder->getInt8(0xff));
114        iBuilder->CreateLikelyCondBr(isExitToken, extendLiteralLengthExit, extendLiteralLengthBody);
115
116
117        iBuilder->SetInsertPoint(extendLiteralLengthBody);
118        Value* newCursorPos2 = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
119        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
120
121
122        iBuilder->CreateBr(extendLiteralLengthExit);
123
124        iBuilder->SetInsertPoint(extendLiteralLengthExit);
125        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
126        phiCursorPosAfterLiteral->addIncoming(nextTokenPos, extendLiteralLengthCon);
127        phiCursorPosAfterLiteral->addIncoming(newCursorPos2, advanceFinishBlock);
128        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
129
130        Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
131        Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
132        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
133        Value * literalLengthExtendValue = iBuilder->CreateSelect(
134                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
135                iBuilder->CreateAdd(
136                        iBuilder->CreateMul(
137                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
138                                iBuilder->getSize(255)
139                        ),
140                        finalLengthByte
141                ),
142                iBuilder->getSize(0)
143        );
144        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
145        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
146        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
147
148        Value* offsetPos = iBuilder->CreateAdd(
149                iBuilder->CreateAdd(
150                        phiCursorPosAfterLiteral,
151                        literalLength),
152                iBuilder->getSize(1));
153
154        this->setCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
155
156        iBuilder->setProducedItemCount("deletionMarker", offsetPos);
157        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
158        return offsetPos;
159    }
160
161    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
162        Constant* INT64_ONE = iBuilder->getInt64(1);
163
164        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
165
166        Value* extendMatchStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
167        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
168
169        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
170        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
171
172        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
173
174        iBuilder->SetInsertPoint(extendMatchBodyBlock);
175
176        //ExtendMatchBodyBlock
177        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(extendMatchStartPos, INT64_ONE), blockEnd);
178        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
179
180        iBuilder->CreateBr(extendMatchExitBlock);
181
182        //ExtendMatchExitBlock
183        iBuilder->SetInsertPoint(extendMatchExitBlock);
184        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
185        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
186        phiCursorPosAfterMatch->addIncoming(extendMatchStartPos, entryBlock);
187
188        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, extendMatchStartPos);
189//        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
190        Value* matchExtensionSize = iBuilder->CreateSelect(extendedMatchValue, oldMatchExtensionSize, iBuilder->getSize(0));
191        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
192        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
193
194
195        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
196        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
197
198        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
199        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
200
201
202        Value* matchLengthAddValue = iBuilder->CreateSelect(
203                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
204                iBuilder->CreateAdd(
205                        iBuilder->CreateMul(
206                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
207                                iBuilder->getSize(255)
208                        ),
209                        extensionLastBitValue
210                )
211                ,
212                iBuilder->getSize(0)
213        );
214        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
215
216        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
217
218        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
219
220        Value* outputEndPos = iBuilder->CreateSub(
221                iBuilder->CreateAdd(outputPos, matchLength),
222                iBuilder->getInt64(1)
223        );
224
225
226
227        this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
228        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
229        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
230
231        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
232    }
233
234    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
235        Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
236        // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
237        this->clearCircularOutputBitstream(iBuilder, "deletionMarker", clearPos, blockEnd);
238        this->clearCircularOutputBitstream(iBuilder, "MatchOffsetMarker", clearPos, blockEnd);
239        iBuilder->setScalarField("compressedSpaceClearPos", blockEnd);
240
241        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
242
243        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
244        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
245
246
247        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
248        iBuilder->setTerminationSignal(isTerminal);
249
250        //TODO use memset to clear output buffer for extract marker
251
252        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
253
254        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
255        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
256
257        iBuilder->CreateBr(processCon);
258        iBuilder->SetInsertPoint(processCon);
259
260        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
261        phiCursorValue->addIncoming(blockStart, entryBlock);
262
263        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
264
265        // Process Body
266        iBuilder->SetInsertPoint(processBody);
267
268        //TODO add acceleration here
269        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
270        // Process Literal
271        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
272        iBuilder->CreateBr(processLiteralBlock);
273        iBuilder->SetInsertPoint(processLiteralBlock);
274
275        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
276        // Process Match
277        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
278        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
279
280        iBuilder->CreateCondBr(
281                iBuilder->CreateICmpULT(offsetPos, blockEnd),
282                handleM0BodyBlock,
283                handleM0ElseBlock
284        );
285
286        // HandleM0Body
287        iBuilder->SetInsertPoint(handleM0BodyBlock);
288        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
289        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
290
291        iBuilder->CreateBr(processCon);
292
293
294        // HandleM0Else
295        iBuilder->SetInsertPoint(handleM0ElseBlock);
296
297        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
298        // Store final M0 pos to make sure the bit stream will be long enough
299        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
300        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
301        // finalM0OutputPos should always be 4MB * n except for the final block
302
303        iBuilder->CreateBr(processCon);
304
305
306        iBuilder->SetInsertPoint(exitBlock);
307    }
308
309    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
310
311        Constant* SIZE_64 = iBuilder->getSize(64);
312
313        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
314
315        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
316        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
317        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
318
319        iBuilder->CreateBr(advanceConBlock);
320        // TODO special handling for the first advance may have better performance
321        iBuilder->SetInsertPoint(advanceConBlock);
322
323        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
324        phiCurrentPos->addIncoming(startPos, entryBlock);
325        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
326        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
327        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
328
329        iBuilder->SetInsertPoint(advanceBodyBlock);
330
331        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
332        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64));
333        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
334
335        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
336        Value * currentBitValue = iBuilder->CreateLoad(iBuilder->CreateGEP(ptr, currentBlockLocalPos));
337
338        currentBitValue = iBuilder->CreateLShr(currentBitValue, currentPosBitBlockOffset);
339        currentBitValue = iBuilder->CreateNot(currentBitValue);
340
341        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
342        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
343        newOffset = iBuilder->CreateUMin(newOffset, iBuilder->getSize(64));
344
345        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
346        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
347        if (maxPos) {
348            newPos = iBuilder->CreateUMin(maxPos, newPos);
349            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
350            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
351        }
352
353        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, iBuilder->getSize(64)), iBuilder->GetInsertBlock());
354        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
355        iBuilder->CreateBr(advanceConBlock);
356
357        iBuilder->SetInsertPoint(advanceExitBlock);
358        return phiCurrentPos;
359    }
360
361    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
362//        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
363        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getInputStreamSetBuffer(inputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth());
364        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
365        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
366        Value * offset = iBuilder->CreateSub(globalOffset, processed);
367        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
368        return iBuilder->CreateLoad(valuePtr);
369    }
370
371    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
372        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
373        return iBuilder->CreateLoad(ptr);
374    }
375
376    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
377        Value *fieldValue = iBuilder->getScalarField(fieldName);
378        fieldValue = iBuilder->CreateAdd(fieldValue, value);
379        iBuilder->setScalarField(fieldName, fieldValue);
380    }
381
382
383    void LZ4IndexBuilderKernel::clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
384                                                             const std::string &bitstreamName,
385                                                             llvm::Value *start, llvm::Value *end) {
386        //TODO currently we assume that start/end pos is not in the same byte because of the requirement of the LZ4 format
387        Value* SIZE_0 = iBuilder->getSize(0);
388        Value* SIZE_8 = iBuilder->getSize(8);
389        Value* INT8_0 = iBuilder->getInt8(0);
390        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
391
392        Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
393        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
394
395        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
396        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
397        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
398        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
399
400        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
401        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
402        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
403        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
404
405        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
406
407        // Clear highest {startShiftAmount} bits
408        iBuilder->SetInsertPoint(startByteCpyBlock);
409        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
410        Value* startValue = iBuilder->CreateLoad(startPtr);
411
412        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
413        startShiftAmount = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
414        startValue = iBuilder->CreateLShr(iBuilder->CreateShl(startValue, startShiftAmount), startShiftAmount);
415
416        iBuilder->CreateStore(startValue, startPtr);
417        iBuilder->CreateBr(endByteCpyConBlock);
418
419        iBuilder->SetInsertPoint(endByteCpyConBlock);
420        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
421
422        // Clear lowest {endRemain} bits
423        iBuilder->SetInsertPoint(endByteCpyBlock);
424        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
425        Value* endValue = iBuilder->CreateLoad(endPtr);
426        endRemain = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
427        endValue = iBuilder->CreateShl(iBuilder->CreateLShr(endValue, endRemain), endRemain);
428        iBuilder->CreateStore(endValue, endPtr);
429        iBuilder->CreateBr(memsetBlock);
430
431        iBuilder->SetInsertPoint(memsetBlock);
432        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
433        Value* memsetEndByte = endBytePos;
434
435        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
436
437        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
438        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
439
440        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
441
442        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
443        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
444
445        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), INT8_0, memsetSize1, true);
446        iBuilder->CreateMemSet(rawOutputPtr, INT8_0, memsetSize2, true);
447    }
448
449    void LZ4IndexBuilderKernel::setCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
450                                                             const std::string &bitstreamName,
451                                                             llvm::Value *start, llvm::Value *end) {
452        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
453
454        Value* SIZE_0 = iBuilder->getSize(0);
455        Value* SIZE_1 = iBuilder->getSize(1);
456        Value* SIZE_8 = iBuilder->getSize(8);
457//        Value* INT8_0 = iBuilder->getInt8(0);
458//        Value* INT8_1 = iBuilder->getInt8(1);
459        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
460
461        Value* outputBufferBytes = iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
462        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
463
464        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
465        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
466        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
467        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
468        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
469
470        BasicBlock* shortSetBlock = iBuilder->CreateBasicBlock("shortSetBlock");
471        BasicBlock* longSetBlock = iBuilder->CreateBasicBlock("longSetBlock");
472
473//        iBuilder->CreateBr(startByteCpyBlock);
474        iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(startBytePos, endBytePos), shortSetBlock, longSetBlock);
475
476        // When startPos and endPos are in the same byte
477        iBuilder->SetInsertPoint(shortSetBlock);
478        Value* targetPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
479        Value* targetValue = iBuilder->CreateLoad(targetPtr);
480        Value* rangeMask = iBuilder->CreateSub(iBuilder->CreateShl(SIZE_1, endRemain), iBuilder->CreateShl(SIZE_1, startRemain));
481        rangeMask = iBuilder->CreateZExtOrTrunc(rangeMask, targetValue->getType());
482        targetValue = iBuilder->CreateOr(rangeMask, targetValue);
483
484//        targetValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(targetValue), startShiftAmount), startShiftAmount));
485//        targetValue = iBuilder->CreateShl(iBuilder->CreateLShr(targetValue, endRemain), endRemain);
486        iBuilder->CreateStore(targetValue, targetPtr);
487        iBuilder->CreateBr(exitBlock);
488
489        iBuilder->SetInsertPoint(longSetBlock);
490
491        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
492        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
493        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
494        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
495
496        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
497        // Clear highest {startShiftAmount} bits
498        iBuilder->SetInsertPoint(startByteCpyBlock);
499        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
500        Value* startValue = iBuilder->CreateLoad(startPtr);
501
502        Value* startShiftAmount2 = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
503        startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount2), startShiftAmount2));
504
505        iBuilder->CreateStore(startValue, startPtr);
506        iBuilder->CreateBr(endByteCpyConBlock);
507
508        iBuilder->SetInsertPoint(endByteCpyConBlock);
509        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
510
511        // Clear lowest {endRemain} bits
512        iBuilder->SetInsertPoint(endByteCpyBlock);
513        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
514        Value* endValue = iBuilder->CreateLoad(endPtr);
515        Value* endRemain2 = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
516        endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain2), endRemain2));
517        iBuilder->CreateStore(endValue, endPtr);
518        iBuilder->CreateBr(memsetBlock);
519
520        iBuilder->SetInsertPoint(memsetBlock);
521        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
522        Value* memsetEndByte = endBytePos;
523
524        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
525
526        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
527
528        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
529
530        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
531
532        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
533        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
534
535        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
536        iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
537        iBuilder->CreateBr(exitBlock);
538
539        iBuilder->SetInsertPoint(exitBlock);
540    }
541
542    void LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const string &bitstreamName, Value *pos) {
543        Value* SIZE_0 = iBuilder->getSize(0);
544        Value* SIZE_8 = iBuilder->getSize(8);
545        Value* INT8_1 = iBuilder->getInt8(1);
546        Type* bytePtrType = iBuilder->getInt8PtrTy();
547
548        Value* outputBufferBytes = iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
549
550        Value* bytePos = iBuilder->CreateUDiv(pos, SIZE_8);
551        bytePos = iBuilder->CreateURem(bytePos, outputBufferBytes);
552        Value* byteOffset = iBuilder->CreateTrunc(iBuilder->CreateURem(pos, SIZE_8), iBuilder->getInt8Ty());
553
554        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), bytePtrType);
555        Value* outputTargetPtr = iBuilder->CreateGEP(outputRawPtr, bytePos);
556
557        Value* targetValue = iBuilder->CreateLoad(outputTargetPtr);
558        targetValue = iBuilder->CreateOr(targetValue, iBuilder->CreateShl(INT8_1, byteOffset));
559        iBuilder->CreateStore(targetValue, outputTargetPtr);
560    }
561
562}
Note: See TracBrowser for help on using the repository browser.