source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5985

Last change on this file since 5985 was 5985, checked in by nmedfort, 4 months ago

Restructured MultiBlock? kernel. Removal of Swizzled buffers. Inclusion of PopCount? rates / non-linear access. Modifications to several kernels to better align them with the kernel and pipeline changes.

File size: 30.5 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17
18namespace kernel{
19
20    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
21    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
22    // Inputs
23    {
24           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
25           Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
26
27           // block data
28           Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1), AlwaysConsume()},
29           Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed"), AlwaysConsume()},
30           Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed"), AlwaysConsume()}
31
32    },
33    //Outputs
34    {
35           // Uncompressed_data
36           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
37                   BoundedRate(0, 1)},
38           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
39                   BoundedRate(0, 1)},
40           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
41                   BoundedRate(0, 1)},
42
43           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
44           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
45           Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
46           Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
47    },
48    //Arguments
49    {
50           Binding{iBuilder->getSizeTy(), "fileSize"}
51    },
52    {},
53    //Internal states:
54    {
55           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
56           Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
57           Binding{iBuilder->getInt64Ty(), "compressedSpaceClearPos"}
58    }) {
59        this->setStride(4 * 1024 * 1024);
60        addAttribute(MustExplicitlyTerminate());
61    }
62
63    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
64//        iBuilder->CallPrintInt("IndexBuilder:entry", iBuilder->getSize(0));
65
66        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
67        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
68
69        Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex");
70
71        // In MultiblockKernel, availableItemCount + processedItemCount == producedItemCount from previous kernel
72        // While in SegmentOrigentedKernel, availableItemCount == producedItemCount from previous kernel
73        Value * totalNumber = iBuilder->getAvailableItemCount("blockEnd");
74        Value * totalExtender = iBuilder->getAvailableItemCount("extender");
75
76        Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
77
78        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
79
80        iBuilder->SetInsertPoint(blockEndConBlock);
81        Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
82        BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock");
83        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
84
85        iBuilder->SetInsertPoint(processBlock);
86
87        //TODO handle uncompressed block
88
89        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
90
91        Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
92        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
93        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
94//        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
95//        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
96
97        iBuilder->setProcessedItemCount("byteStream", blockEnd);
98        iBuilder->CreateBr(exitBlock);
99
100        iBuilder->SetInsertPoint(exitBlock);
101    }
102
103    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
104//        iBuilder->CallPrintInt("blockEnd", blockEnd);
105        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
106
107        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
108
109        BasicBlock* extendLiteralLengthCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_con");
110        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
111        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
112
113        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthCon, extendLiteralLengthExit);
114
115        iBuilder->SetInsertPoint(extendLiteralLengthCon);
116
117        Value * const nextTokenPos = iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1));
118        Value * const nextToken = iBuilder->CreateLoad(iBuilder->getRawInputPointer("byteStream", nextTokenPos));
119        Value * const isExitToken = iBuilder->CreateICmpNE(nextToken, iBuilder->getInt8(0xff));
120        iBuilder->CreateLikelyCondBr(isExitToken, extendLiteralLengthExit, extendLiteralLengthBody);
121
122
123        iBuilder->SetInsertPoint(extendLiteralLengthBody);
124        Value* newCursorPos2 = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
125        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
126
127
128        iBuilder->CreateBr(extendLiteralLengthExit);
129
130        iBuilder->SetInsertPoint(extendLiteralLengthExit);
131        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
132        phiCursorPosAfterLiteral->addIncoming(nextTokenPos, extendLiteralLengthCon);
133        phiCursorPosAfterLiteral->addIncoming(newCursorPos2, advanceFinishBlock);
134        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
135
136        Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
137        Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
138        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
139        Value * literalLengthExtendValue = iBuilder->CreateSelect(
140                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
141                iBuilder->CreateAdd(
142                        iBuilder->CreateMul(
143                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
144                                iBuilder->getSize(255)
145                        ),
146                        finalLengthByte
147                ),
148                iBuilder->getSize(0)
149        );
150        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
151        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
152        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
153
154        Value* offsetPos = iBuilder->CreateAdd(
155                iBuilder->CreateAdd(
156                        phiCursorPosAfterLiteral,
157                        literalLength),
158                iBuilder->getSize(1));
159
160        this->setCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
161
162        iBuilder->setProducedItemCount("deletionMarker", offsetPos);
163        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
164        return offsetPos;
165    }
166
167    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
168        Constant* INT64_ONE = iBuilder->getInt64(1);
169
170        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
171
172        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
173        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
174
175        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
176        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
177
178        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
179
180        iBuilder->SetInsertPoint(extendMatchBodyBlock);
181
182        //ExtendMatchBodyBlock
183        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
184        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
185
186        // ----May be in a different segment now
187        iBuilder->CreateBr(extendMatchExitBlock);
188
189        //ExtendMatchExitBlock
190        iBuilder->SetInsertPoint(extendMatchExitBlock);
191        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
192        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
193        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
194
195        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
196        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
197        Value* matchExtensionSize = iBuilder->CreateSelect(
198                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
199                oldMatchExtensionSize,
200                iBuilder->getSize(0)
201        );
202        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
203        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
204
205
206        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
207        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
208
209        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
210        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
211
212
213        Value* matchLengthAddValue = iBuilder->CreateSelect(
214                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
215                iBuilder->CreateAdd(
216                        iBuilder->CreateMul(
217                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
218                                iBuilder->getSize(255)
219                        ),
220                        extensionLastBitValue
221                )
222                ,
223                iBuilder->getSize(0)
224        );
225        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
226
227        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
228
229        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
230
231        Value* outputEndPos = iBuilder->CreateSub(
232                iBuilder->CreateAdd(outputPos, matchLength),
233                iBuilder->getInt64(1)
234        );
235
236
237        iBuilder->setProducedItemCount("M0CountMarker", iBuilder->CreateAdd(iBuilder->getProducedItemCount("M0CountMarker"), iBuilder->getSize(1)));
238        this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
239        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
240        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
241
242        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
243    }
244
245    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
246        // Constant
247        Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
248        // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
249        this->clearCircularOutputBitstream(iBuilder, "deletionMarker", clearPos, blockEnd);
250        this->clearCircularOutputBitstream(iBuilder, "MatchOffsetMarker", clearPos, blockEnd);
251        iBuilder->setScalarField("compressedSpaceClearPos", blockEnd);
252
253        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
254
255        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
256        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
257
258
259        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
260        iBuilder->setTerminationSignal(isTerminal);
261
262        //TODO use memset to clear output buffer for extract marker
263
264        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
265
266        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
267        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
268
269        iBuilder->CreateBr(processCon);
270        iBuilder->SetInsertPoint(processCon);
271
272        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
273        phiCursorValue->addIncoming(blockStart, entryBlock);
274
275        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
276
277        // Process Body
278        iBuilder->SetInsertPoint(processBody);
279
280        //TODO add acceleration here
281        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
282        // Process Literal
283        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
284        iBuilder->CreateBr(processLiteralBlock);
285        iBuilder->SetInsertPoint(processLiteralBlock);
286
287        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
288        // Process Match
289        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
290        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
291
292        iBuilder->CreateCondBr(
293                iBuilder->CreateICmpULT(offsetPos, blockEnd),
294                handleM0BodyBlock,
295                handleM0ElseBlock
296        );
297
298        // HandleM0Body
299        iBuilder->SetInsertPoint(handleM0BodyBlock);
300        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
301        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
302
303        iBuilder->CreateBr(processCon);
304
305
306        // HandleM0Else
307        iBuilder->SetInsertPoint(handleM0ElseBlock);
308
309        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
310        // Store final M0 pos to make sure the bit stream will be long enough
311        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
312        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
313        // finalM0OutputPos should always be 4MB * n except for the final block
314
315        iBuilder->CreateBr(processCon);
316
317
318        iBuilder->SetInsertPoint(exitBlock);
319    }
320
321    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
322
323        Constant* SIZE_64 = iBuilder->getSize(64);
324
325        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
326
327        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
328        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
329        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
330
331        iBuilder->CreateBr(advanceConBlock);
332        // TODO special handling for the first advance may have better performance
333        iBuilder->SetInsertPoint(advanceConBlock);
334
335        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
336        phiCurrentPos->addIncoming(startPos, entryBlock);
337        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
338        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
339        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
340
341        iBuilder->SetInsertPoint(advanceBodyBlock);
342
343        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
344        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64));
345        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
346
347        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
348        Value * currentBitValue = iBuilder->CreateLoad(iBuilder->CreateGEP(ptr, currentBlockLocalPos));
349
350        currentBitValue = iBuilder->CreateLShr(currentBitValue, currentPosBitBlockOffset);
351        currentBitValue = iBuilder->CreateNot(currentBitValue);
352
353        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
354        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
355        newOffset = iBuilder->CreateUMin(newOffset, iBuilder->getSize(64));
356
357        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
358        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
359        if (maxPos) {
360            newPos = iBuilder->CreateUMin(maxPos, newPos);
361            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
362            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
363        }
364
365        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, iBuilder->getSize(64)), iBuilder->GetInsertBlock());
366        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
367        iBuilder->CreateBr(advanceConBlock);
368
369        iBuilder->SetInsertPoint(advanceExitBlock);
370        return phiCurrentPos;
371    }
372
373    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
374        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
375        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
376        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
377        Value * offset = iBuilder->CreateSub(globalOffset, processed);
378        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
379        return iBuilder->CreateLoad(valuePtr);
380    }
381
382    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
383        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
384        return iBuilder->CreateLoad(ptr);
385    }
386
387    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
388        Value *fieldValue = iBuilder->getScalarField(fieldName);
389        fieldValue = iBuilder->CreateAdd(fieldValue, value);
390        iBuilder->setScalarField(fieldName, fieldValue);
391    }
392
393    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
394                                                          const string & outputBufferName,
395                                                          Value * value) {
396
397        Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName);
398        Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset);
399        iBuilder->CreateStore(value, outputRawPtr);
400        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1)));
401    }
402
403
404    void LZ4IndexBuilderKernel::clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
405                                                             const std::string &bitstreamName,
406                                                             llvm::Value *start, llvm::Value *end) {
407        //TODO currently we assume that start/end pos is not in the same byte because of the requirement of the LZ4 format
408        Value* SIZE_0 = iBuilder->getSize(0);
409        Value* SIZE_8 = iBuilder->getSize(8);
410        Value* INT8_0 = iBuilder->getInt8(0);
411        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
412
413        Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
414        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
415
416        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
417        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
418        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
419        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
420
421        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
422        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
423        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
424        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
425
426        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
427
428        // Clear highest {startShiftAmount} bits
429        iBuilder->SetInsertPoint(startByteCpyBlock);
430        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
431        Value* startValue = iBuilder->CreateLoad(startPtr);
432
433        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
434        startShiftAmount = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
435        startValue = iBuilder->CreateLShr(iBuilder->CreateShl(startValue, startShiftAmount), startShiftAmount);
436
437        iBuilder->CreateStore(startValue, startPtr);
438        iBuilder->CreateBr(endByteCpyConBlock);
439
440        iBuilder->SetInsertPoint(endByteCpyConBlock);
441        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
442
443        // Clear lowest {endRemain} bits
444        iBuilder->SetInsertPoint(endByteCpyBlock);
445        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
446        Value* endValue = iBuilder->CreateLoad(endPtr);
447        endRemain = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
448        endValue = iBuilder->CreateShl(iBuilder->CreateLShr(endValue, endRemain), endRemain);
449        iBuilder->CreateStore(endValue, endPtr);
450        iBuilder->CreateBr(memsetBlock);
451
452        iBuilder->SetInsertPoint(memsetBlock);
453        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
454        Value* memsetEndByte = endBytePos;
455
456        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
457
458        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
459        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
460
461        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
462
463        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
464        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
465
466        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), INT8_0, memsetSize1, true);
467        iBuilder->CreateMemSet(rawOutputPtr, INT8_0, memsetSize2, true);
468    }
469
470    void LZ4IndexBuilderKernel::setCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
471                                                             const std::string &bitstreamName,
472                                                             llvm::Value *start, llvm::Value *end) {
473        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
474
475        Value* SIZE_0 = iBuilder->getSize(0);
476        Value* SIZE_1 = iBuilder->getSize(1);
477        Value* SIZE_8 = iBuilder->getSize(8);
478//        Value* INT8_0 = iBuilder->getInt8(0);
479//        Value* INT8_1 = iBuilder->getInt8(1);
480        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
481
482        Value* outputBufferBytes = iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
483        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
484
485        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
486        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
487        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
488        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
489        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
490
491        BasicBlock* shortSetBlock = iBuilder->CreateBasicBlock("shortSetBlock");
492        BasicBlock* longSetBlock = iBuilder->CreateBasicBlock("longSetBlock");
493
494//        iBuilder->CreateBr(startByteCpyBlock);
495        iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(startBytePos, endBytePos), shortSetBlock, longSetBlock);
496
497        // When startPos and endPos are in the same byte
498        iBuilder->SetInsertPoint(shortSetBlock);
499        Value* targetPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
500        Value* targetValue = iBuilder->CreateLoad(targetPtr);
501        Value* rangeMask = iBuilder->CreateSub(iBuilder->CreateShl(SIZE_1, endRemain), iBuilder->CreateShl(SIZE_1, startRemain));
502        rangeMask = iBuilder->CreateZExtOrTrunc(rangeMask, targetValue->getType());
503        targetValue = iBuilder->CreateOr(rangeMask, targetValue);
504
505//        targetValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(targetValue), startShiftAmount), startShiftAmount));
506//        targetValue = iBuilder->CreateShl(iBuilder->CreateLShr(targetValue, endRemain), endRemain);
507        iBuilder->CreateStore(targetValue, targetPtr);
508        iBuilder->CreateBr(exitBlock);
509
510        iBuilder->SetInsertPoint(longSetBlock);
511
512        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
513        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
514        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
515        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
516
517        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
518        // Clear highest {startShiftAmount} bits
519        iBuilder->SetInsertPoint(startByteCpyBlock);
520        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
521        Value* startValue = iBuilder->CreateLoad(startPtr);
522
523        Value* startShiftAmount2 = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
524        startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount2), startShiftAmount2));
525
526        iBuilder->CreateStore(startValue, startPtr);
527        iBuilder->CreateBr(endByteCpyConBlock);
528
529        iBuilder->SetInsertPoint(endByteCpyConBlock);
530        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
531
532        // Clear lowest {endRemain} bits
533        iBuilder->SetInsertPoint(endByteCpyBlock);
534        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
535        Value* endValue = iBuilder->CreateLoad(endPtr);
536        Value* endRemain2 = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
537        endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain2), endRemain2));
538        iBuilder->CreateStore(endValue, endPtr);
539        iBuilder->CreateBr(memsetBlock);
540
541        iBuilder->SetInsertPoint(memsetBlock);
542        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
543        Value* memsetEndByte = endBytePos;
544
545        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
546
547        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
548
549        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
550
551        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
552
553        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
554        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
555
556        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
557        iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
558        iBuilder->CreateBr(exitBlock);
559
560        iBuilder->SetInsertPoint(exitBlock);
561    }
562
563    void LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const string &bitstreamName, Value *pos) {
564        Value* SIZE_0 = iBuilder->getSize(0);
565        Value* SIZE_8 = iBuilder->getSize(8);
566        Value* INT8_1 = iBuilder->getInt8(1);
567        Type* bytePtrType = iBuilder->getInt8PtrTy();
568
569        Value* outputBufferBytes = iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
570
571        Value* bytePos = iBuilder->CreateUDiv(pos, SIZE_8);
572        bytePos = iBuilder->CreateURem(bytePos, outputBufferBytes);
573        Value* byteOffset = iBuilder->CreateTrunc(iBuilder->CreateURem(pos, SIZE_8), iBuilder->getInt8Ty());
574
575        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), bytePtrType);
576        Value* outputTargetPtr = iBuilder->CreateGEP(outputRawPtr, bytePos);
577
578        Value* targetValue = iBuilder->CreateLoad(outputTargetPtr);
579        targetValue = iBuilder->CreateOr(targetValue, iBuilder->CreateShl(INT8_1, byteOffset));
580        iBuilder->CreateStore(targetValue, outputTargetPtr);
581    }
582
583}
Note: See TracBrowser for help on using the repository browser.