source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 5974

Last change on this file since 5974 was 5974, checked in by xwa163, 17 months ago
  1. Use i1 bit stream instead of i64 number stream in M0 related streams and Match Offset related stream
  2. Improve the performance of lz4_index_builder
File size: 31.7 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_index_builder.h"
6
7
8#include <kernels/kernel_builder.h>
9#include <iostream>
10#include <string>
11#include <llvm/Support/raw_ostream.h>
12#include <kernels/streamset.h>
13
14using namespace llvm;
15using namespace kernel;
16using namespace std;
17
18namespace kernel{
19
20    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
21    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
22    // Inputs
23    {
24           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
25           Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
26
27           // block data
28           Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
29                   AlwaysConsume()},
30           Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
31                   AlwaysConsume()},
32           Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
33                   AlwaysConsume()}
34
35    },
36    //Outputs
37    {
38           // Uncompressed_data
39           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
40                   BoundedRate(0, 1)},
41           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
42                   BoundedRate(0, 1)},
43           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
44                   BoundedRate(0, 1)},
45
46           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
47           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
48           Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
49           Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
50    },
51    //Arguments
52    {
53           Binding{iBuilder->getSizeTy(), "fileSize"}
54    },
55    {},
56    //Internal states:
57    {
58           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
59           Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
60           Binding{iBuilder->getInt64Ty(), "compressedSpaceClearPos"}
61    }) {
62        this->setStride(4 * 1024 * 1024);
63        addAttribute(MustExplicitlyTerminate());
64    }
65
66    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
67//        iBuilder->CallPrintInt("IndexBuilder:entry", iBuilder->getSize(0));
68
69        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
70        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
71
72        Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex");
73
74        // In MultiblockKernel, availableItemCount + processedItemCount == producedItemCount from previous kernel
75        // While in SegmentOrigentedKernel, availableItemCount == producedItemCount from previous kernel
76        Value * totalNumber = iBuilder->getAvailableItemCount("blockEnd");
77        Value * totalExtender = iBuilder->getAvailableItemCount("extender");
78
79        Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
80
81        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
82
83        iBuilder->SetInsertPoint(blockEndConBlock);
84        Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
85        BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock");
86        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
87
88        iBuilder->SetInsertPoint(processBlock);
89
90        //TODO handle uncompressed block
91
92        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
93
94        Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
95        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
96        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
97        iBuilder->setProcessedItemCount("blockStart", newBlockDataIndex);
98        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
99
100        iBuilder->setProcessedItemCount("byteStream", blockEnd);
101        iBuilder->CreateBr(exitBlock);
102
103        iBuilder->SetInsertPoint(exitBlock);
104    }
105
106    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
107//        iBuilder->CallPrintInt("blockEnd", blockEnd);
108        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
109
110        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
111
112        BasicBlock* extendLiteralLengthCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_con");
113        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
114        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
115
116        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthCon, extendLiteralLengthExit);
117
118        iBuilder->SetInsertPoint(extendLiteralLengthCon);
119
120        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(iBuilder->CreateLoad(
121                iBuilder->getRawInputPointer("byteStream", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)))),
122                                                            iBuilder->getInt8(0xff)), extendLiteralLengthExit,
123                                     extendLiteralLengthBody);
124
125
126        iBuilder->SetInsertPoint(extendLiteralLengthBody);
127        Value* newCursorPos2 = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
128        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
129
130        iBuilder->CreateBr(extendLiteralLengthExit);
131
132        iBuilder->SetInsertPoint(extendLiteralLengthExit);
133//        PHINode* newCursorPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
134//        newCursorPos->addIncoming(a, extendLiteralLengthCon);
135//        newCursorPos->addIncoming(newCursorPos2, advanceFinishBlock);
136
137        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
138        phiCursorPosAfterLiteral->addIncoming(iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), extendLiteralLengthCon);
139        phiCursorPosAfterLiteral->addIncoming(newCursorPos2, advanceFinishBlock);
140        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
141
142        Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
143        Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
144        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
145        Value * literalLengthExtendValue = iBuilder->CreateSelect(
146                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
147                iBuilder->CreateAdd(
148                        iBuilder->CreateMul(
149                                iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
150                                iBuilder->getSize(255)
151                        ),
152                        finalLengthByte
153                ),
154                iBuilder->getSize(0)
155        );
156        literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
157        Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
158        Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
159
160        Value* offsetPos = iBuilder->CreateAdd(
161                iBuilder->CreateAdd(
162                        phiCursorPosAfterLiteral,
163                        literalLength),
164                iBuilder->getSize(1));
165
166        this->setCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
167
168        iBuilder->setProducedItemCount("deletionMarker", offsetPos);
169        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
170        return offsetPos;
171    }
172
173    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
174        Constant* INT64_ONE = iBuilder->getInt64(1);
175
176        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
177
178        Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
179        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
180
181        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
182        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
183
184        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
185
186        iBuilder->SetInsertPoint(extendMatchBodyBlock);
187
188        //ExtendMatchBodyBlock
189        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
190        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
191
192        // ----May be in a different segment now
193        iBuilder->CreateBr(extendMatchExitBlock);
194
195        //ExtendMatchExitBlock
196        iBuilder->SetInsertPoint(extendMatchExitBlock);
197        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
198        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
199        phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
200
201        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
202        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
203        Value* matchExtensionSize = iBuilder->CreateSelect(
204                iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
205                oldMatchExtensionSize,
206                iBuilder->getSize(0)
207        );
208        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
209        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
210
211
212        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
213        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
214
215        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
216        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
217
218
219        Value* matchLengthAddValue = iBuilder->CreateSelect(
220                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
221                iBuilder->CreateAdd(
222                        iBuilder->CreateMul(
223                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
224                                iBuilder->getSize(255)
225                        ),
226                        extensionLastBitValue
227                )
228                ,
229                iBuilder->getSize(0)
230        );
231        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
232
233        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
234
235        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
236
237        Value* outputEndPos = iBuilder->CreateSub(
238                iBuilder->CreateAdd(outputPos, matchLength),
239                iBuilder->getInt64(1)
240        );
241
242        Value* matchOffset = iBuilder->CreateAdd(
243                iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, offsetPos), iBuilder->getSizeTy()),
244                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
245        );
246        iBuilder->setProducedItemCount("M0CountMarker", iBuilder->CreateAdd(iBuilder->getProducedItemCount("M0CountMarker"), iBuilder->getSize(1)));
247        this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
248//        iBuilder->CallPrintInt("offsetPos", offsetPos);
249//        iBuilder->CallPrintInt("matchOffset", matchOffset);
250
251
252        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
253        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
254
255        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
256    }
257
258    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
259        // Constant
260        Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
261        // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
262        this->clearCircularOutputBitstream(iBuilder, "deletionMarker", clearPos, blockEnd);
263        this->clearCircularOutputBitstream(iBuilder, "MatchOffsetMarker", clearPos, blockEnd);
264        iBuilder->setScalarField("compressedSpaceClearPos", blockEnd);
265
266        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
267
268        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
269        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
270
271
272        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
273        iBuilder->setTerminationSignal(isTerminal);
274
275        //TODO use memset to clear output buffer for extract marker
276
277        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
278
279        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
280        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
281
282        iBuilder->CreateBr(processCon);
283        iBuilder->SetInsertPoint(processCon);
284
285        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
286        phiCursorValue->addIncoming(blockStart, entryBlock);
287
288        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
289
290        // Process Body
291        iBuilder->SetInsertPoint(processBody);
292
293        //TODO add acceleration here
294        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
295        // Process Literal
296        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
297        iBuilder->CreateBr(processLiteralBlock);
298        iBuilder->SetInsertPoint(processLiteralBlock);
299
300        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
301        // Process Match
302        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
303        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
304
305        iBuilder->CreateCondBr(
306                iBuilder->CreateICmpULT(offsetPos, blockEnd),
307                handleM0BodyBlock,
308                handleM0ElseBlock
309        );
310
311        // HandleM0Body
312        iBuilder->SetInsertPoint(handleM0BodyBlock);
313        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
314        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
315
316        iBuilder->CreateBr(processCon);
317
318
319        // HandleM0Else
320        iBuilder->SetInsertPoint(handleM0ElseBlock);
321
322        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
323        // Store final M0 pos to make sure the bit stream will be long enough
324        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
325        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
326        // finalM0OutputPos should always be 4MB * n except for the final block
327
328        iBuilder->CreateBr(processCon);
329
330
331        iBuilder->SetInsertPoint(exitBlock);
332    }
333
334    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
335
336        Constant* SIZE_64 = iBuilder->getSize(64);
337
338        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
339
340        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
341        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
342        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
343
344        iBuilder->CreateBr(advanceConBlock);
345        // TODO special handling for the first advance may have better performance
346        iBuilder->SetInsertPoint(advanceConBlock);
347
348        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
349        phiCurrentPos->addIncoming(startPos, entryBlock);
350        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
351        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
352        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
353
354        iBuilder->SetInsertPoint(advanceBodyBlock);
355
356        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
357        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64));
358        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
359
360        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
361        Value * currentBitValue = iBuilder->CreateLoad(iBuilder->CreateGEP(ptr, currentBlockLocalPos));
362
363        currentBitValue = iBuilder->CreateLShr(currentBitValue, currentPosBitBlockOffset);
364        currentBitValue = iBuilder->CreateNot(currentBitValue);
365
366        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
367        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
368        newOffset = iBuilder->CreateUMin(newOffset, iBuilder->getSize(64));
369
370        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
371        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
372        if (maxPos) {
373            newPos = iBuilder->CreateUMin(maxPos, newPos);
374            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
375            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
376        }
377
378        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, iBuilder->getSize(64)), iBuilder->GetInsertBlock());
379        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
380        iBuilder->CreateBr(advanceConBlock);
381
382        iBuilder->SetInsertPoint(advanceExitBlock);
383        return phiCurrentPos;
384    }
385
386    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
387        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
388        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
389        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
390        Value * offset = iBuilder->CreateSub(globalOffset, processed);
391        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
392        return iBuilder->CreateLoad(valuePtr);
393    }
394
395    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
396        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
397        return iBuilder->CreateLoad(ptr);
398    }
399
400    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
401        Value *fieldValue = iBuilder->getScalarField(fieldName);
402        fieldValue = iBuilder->CreateAdd(fieldValue, value);
403        iBuilder->setScalarField(fieldName, fieldValue);
404    }
405
406    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
407                                                          const string & outputBufferName,
408                                                          Value * value) {
409
410        Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName);
411        Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset);
412        iBuilder->CreateStore(value, outputRawPtr);
413        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1)));
414    }
415
416
417    void LZ4IndexBuilderKernel::clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
418                                                             const std::string &bitstreamName,
419                                                             llvm::Value *start, llvm::Value *end) {
420        //TODO currently we assume that start/end pos is not in the same byte because of the requirement of the LZ4 format
421        Value* SIZE_0 = iBuilder->getSize(0);
422        Value* SIZE_8 = iBuilder->getSize(8);
423        Value* INT8_0 = iBuilder->getInt8(0);
424        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
425
426        Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
427        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
428
429        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
430        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
431        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
432        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
433
434        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
435        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
436        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
437        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
438
439        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
440
441        // Clear highest {startShiftAmount} bits
442        iBuilder->SetInsertPoint(startByteCpyBlock);
443        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
444        Value* startValue = iBuilder->CreateLoad(startPtr);
445
446        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
447        startShiftAmount = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
448        startValue = iBuilder->CreateLShr(iBuilder->CreateShl(startValue, startShiftAmount), startShiftAmount);
449
450        iBuilder->CreateStore(startValue, startPtr);
451        iBuilder->CreateBr(endByteCpyConBlock);
452
453        iBuilder->SetInsertPoint(endByteCpyConBlock);
454        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
455
456        // Clear lowest {endRemain} bits
457        iBuilder->SetInsertPoint(endByteCpyBlock);
458        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
459        Value* endValue = iBuilder->CreateLoad(endPtr);
460        endRemain = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
461        endValue = iBuilder->CreateShl(iBuilder->CreateLShr(endValue, endRemain), endRemain);
462        iBuilder->CreateStore(endValue, endPtr);
463        iBuilder->CreateBr(memsetBlock);
464
465        iBuilder->SetInsertPoint(memsetBlock);
466        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
467        Value* memsetEndByte = endBytePos;
468
469        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
470
471        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
472        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
473
474        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
475
476        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
477        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
478
479        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), INT8_0, memsetSize1, true);
480        iBuilder->CreateMemSet(rawOutputPtr, INT8_0, memsetSize2, true);
481    }
482
483    void LZ4IndexBuilderKernel::setCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
484                                                             const std::string &bitstreamName,
485                                                             llvm::Value *start, llvm::Value *end) {
486        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
487
488        Value* SIZE_0 = iBuilder->getSize(0);
489        Value* SIZE_1 = iBuilder->getSize(1);
490        Value* SIZE_8 = iBuilder->getSize(8);
491//        Value* INT8_0 = iBuilder->getInt8(0);
492//        Value* INT8_1 = iBuilder->getInt8(1);
493        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
494
495        Value* outputBufferBytes = iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
496        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
497
498        Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
499        Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
500        Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
501        Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
502        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
503
504        BasicBlock* shortSetBlock = iBuilder->CreateBasicBlock("shortSetBlock");
505        BasicBlock* longSetBlock = iBuilder->CreateBasicBlock("longSetBlock");
506
507//        iBuilder->CreateBr(startByteCpyBlock);
508        iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(startBytePos, endBytePos), shortSetBlock, longSetBlock);
509
510        // When startPos and endPos are in the same byte
511        iBuilder->SetInsertPoint(shortSetBlock);
512        Value* targetPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
513        Value* targetValue = iBuilder->CreateLoad(targetPtr);
514        Value* rangeMask = iBuilder->CreateSub(iBuilder->CreateShl(SIZE_1, endRemain), iBuilder->CreateShl(SIZE_1, startRemain));
515        rangeMask = iBuilder->CreateZExtOrTrunc(rangeMask, targetValue->getType());
516        targetValue = iBuilder->CreateOr(rangeMask, targetValue);
517
518//        targetValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(targetValue), startShiftAmount), startShiftAmount));
519//        targetValue = iBuilder->CreateShl(iBuilder->CreateLShr(targetValue, endRemain), endRemain);
520        iBuilder->CreateStore(targetValue, targetPtr);
521        iBuilder->CreateBr(exitBlock);
522
523        iBuilder->SetInsertPoint(longSetBlock);
524
525        BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
526        BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
527        BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
528        BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
529
530        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
531        // Clear highest {startShiftAmount} bits
532        iBuilder->SetInsertPoint(startByteCpyBlock);
533        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
534        Value* startValue = iBuilder->CreateLoad(startPtr);
535
536        Value* startShiftAmount2 = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
537        startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount2), startShiftAmount2));
538
539        iBuilder->CreateStore(startValue, startPtr);
540        iBuilder->CreateBr(endByteCpyConBlock);
541
542        iBuilder->SetInsertPoint(endByteCpyConBlock);
543        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
544
545        // Clear lowest {endRemain} bits
546        iBuilder->SetInsertPoint(endByteCpyBlock);
547        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
548        Value* endValue = iBuilder->CreateLoad(endPtr);
549        Value* endRemain2 = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
550        endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain2), endRemain2));
551        iBuilder->CreateStore(endValue, endPtr);
552        iBuilder->CreateBr(memsetBlock);
553
554        iBuilder->SetInsertPoint(memsetBlock);
555        Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
556        Value* memsetEndByte = endBytePos;
557
558        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
559
560        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
561
562        // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
563
564        Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
565
566        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
567        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
568
569        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
570        iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
571        iBuilder->CreateBr(exitBlock);
572
573        iBuilder->SetInsertPoint(exitBlock);
574    }
575
576    void LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const string &bitstreamName, Value *pos) {
577        Value* SIZE_0 = iBuilder->getSize(0);
578        Value* SIZE_8 = iBuilder->getSize(8);
579        Value* INT8_1 = iBuilder->getInt8(1);
580        Type* bytePtrType = iBuilder->getInt8PtrTy();
581
582        Value* outputBufferBytes = iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
583
584        Value* bytePos = iBuilder->CreateUDiv(pos, SIZE_8);
585        bytePos = iBuilder->CreateURem(bytePos, outputBufferBytes);
586        Value* byteOffset = iBuilder->CreateTrunc(iBuilder->CreateURem(pos, SIZE_8), iBuilder->getInt8Ty());
587
588        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), bytePtrType);
589        Value* outputTargetPtr = iBuilder->CreateGEP(outputRawPtr, bytePos);
590
591        Value* targetValue = iBuilder->CreateLoad(outputTargetPtr);
592        targetValue = iBuilder->CreateOr(targetValue, iBuilder->CreateShl(INT8_1, byteOffset));
593        iBuilder->CreateStore(targetValue, outputTargetPtr);
594
595        Value* a = iBuilder->CreateURem(iBuilder->CreateUDiv(pos, iBuilder->getSize(iBuilder->getBitBlockWidth())), iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks()));
596        Value* p = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), iBuilder->getBitBlockType()->getPointerTo());
597//        iBuilder->CallPrintInt("--pos", pos);
598//        iBuilder->CallPrintRegister("aa", iBuilder->CreateLoad(iBuilder->CreateGEP(p, a)));
599
600    }
601
602}
Note: See TracBrowser for help on using the repository browser.