source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp @ 6047

Last change on this file since 6047 was 6047, checked in by nmedfort, 12 months ago

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

File size: 30.7 KB
Line 
1
2#include "lz4_index_builder.h"
3
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15namespace kernel{
16
17    // TODO IndexBuilderKernel is responsible to clear the output buffer for final produced block
18    // e.g. when produce item count is 0x120, IndexBuilderKernel needs to set 0x121 ~ 0x200 to 0
19
20    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &b)
21    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
22    // Inputs
23    {
24           Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
25           Binding{b->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
26
27           // block data
28           Binding{b->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1), AlwaysConsume()},
29           Binding{b->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed"), AlwaysConsume()},
30           Binding{b->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed"), AlwaysConsume()}
31
32    },
33    //Outputs
34    {
35           // Uncompressed_data
36           Binding{b->getStreamSetTy(1, 64), "uncompressedStartPos",
37                   BoundedRate(0, 1)},
38           Binding{b->getStreamSetTy(1, 64), "uncompressedLength",
39                   BoundedRate(0, 1)},
40           Binding{b->getStreamSetTy(1, 64), "uncompressedOutputPos",
41                   BoundedRate(0, 1)},
42
43           Binding{b->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
44           Binding{b->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
45           Binding{b->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
46    },
47    //Arguments
48    {
49           Binding{b->getSizeTy(), "fileSize"}
50    },
51    {},
52    //Internal states:
53    {
54           Binding{b->getSizeTy(), "blockDataIndex"},
55           Binding{b->getInt64Ty(), "m0OutputPos"},
56
57           // For MatchOffset Output
58           Binding{b->getIntNTy(64), "pendingMatchOffsetMarkerBits"},
59           Binding{b->getInt64Ty(), "pendingMarchOffsetMarkerIndex"},
60
61           // For deletionMarker output
62           Binding{b->getIntNTy(64), "pendingDeletionMarkerStartBits"},
63           Binding{b->getIntNTy(64), "pendingDeletionMarkerEndBits"},
64           Binding{b->getIntNTy(64), "pendingDeletionMarkerCarryBit"},
65           Binding{b->getInt64Ty(), "pendingDeletionMarkerIndex"},
66
67           // For M0 Output
68           Binding{b->getIntNTy(64), "pendingM0StartBits"},
69           Binding{b->getIntNTy(64), "pendingM0EndBits"},
70           Binding{b->getIntNTy(64), "pendingM0CarryBit"},
71           Binding{b->getInt64Ty(), "pendingM0Index"},
72
73
74    }) {
75        this->setStride(4 * 1024 * 1024);
76        addAttribute(MustExplicitlyTerminate());
77    }
78
79    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &b) {
80        BasicBlock* exitBlock = b->CreateBasicBlock("exitBlock");
81        BasicBlock* blockEndConBlock = b->CreateBasicBlock("blockEndConBlock");
82
83        Value * blockDataIndex = b->getScalarField("blockDataIndex");
84
85        // In MultiblockKernel, availableItemCount + processedItemCount == producedItemCount from previous kernel
86        // While in SegmentOrigentedKernel, availableItemCount == producedItemCount from previous kernel
87        Value * totalNumber = b->getAvailableItemCount("blockEnd");
88        Value * totalExtender = b->getAvailableItemCount("extender");
89
90        Value * blockEnd = this->generateLoadInt64NumberInput(b, "blockEnd", blockDataIndex);
91
92        b->CreateCondBr(b->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
93
94        b->SetInsertPoint(blockEndConBlock);
95        Value * blockStart = this->generateLoadInt64NumberInput(b, "blockStart", blockDataIndex);
96        BasicBlock * processBlock = b->CreateBasicBlock("processBlock");
97        b->CreateCondBr(b->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
98
99        b->SetInsertPoint(processBlock);
100
101        //TODO handle uncompressed block
102
103        this->generateProcessCompressedBlock(b, blockStart, blockEnd);
104        this->storePendingM0(b);
105        this->storePendingDeletionMarker(b);
106        this->storePendingMatchOffsetMarker(b);
107        Value * newBlockDataIndex = b->CreateAdd(blockDataIndex, b->getInt64(1));
108        b->setScalarField("blockDataIndex", newBlockDataIndex);
109        b->setProcessedItemCount("isCompressed", newBlockDataIndex);
110
111        b->setProcessedItemCount("byteStream", blockEnd);
112        b->CreateBr(exitBlock);
113
114        b->SetInsertPoint(exitBlock);
115    }
116
117    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &b, Value* token, Value* tokenPos, Value* blockEnd) {
118        BasicBlock* entryBlock = b->GetInsertBlock();
119
120        Value * extendedLiteralValue = b->CreateICmpEQ(b->CreateAnd(token, b->getInt8(0xf0)), b->getInt8(0xf0));
121
122        BasicBlock* extendLiteralLengthCon = b->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_con");
123        BasicBlock* extendLiteralLengthBody = b->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
124        BasicBlock* extendLiteralLengthExit = b->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
125
126        b->CreateCondBr(extendedLiteralValue, extendLiteralLengthCon, extendLiteralLengthExit);
127
128        b->SetInsertPoint(extendLiteralLengthCon);
129
130        Value * const nextTokenPos = b->CreateAdd(tokenPos, b->getInt64(1));
131        Value * const nextToken = b->CreateLoad(b->getRawInputPointer("byteStream", nextTokenPos));
132        Value * const isExitToken = b->CreateICmpNE(nextToken, b->getInt8(0xff));
133        b->CreateLikelyCondBr(isExitToken, extendLiteralLengthExit, extendLiteralLengthBody);
134
135
136        b->SetInsertPoint(extendLiteralLengthBody);
137        Value* newCursorPos2 = this->advanceUntilNextZero(b, "extender", b->CreateAdd(tokenPos, b->getInt64(1)), blockEnd);
138        BasicBlock* advanceFinishBlock = b->GetInsertBlock();
139
140
141        b->CreateBr(extendLiteralLengthExit);
142
143        b->SetInsertPoint(extendLiteralLengthExit);
144        PHINode* phiCursorPosAfterLiteral = b->CreatePHI(b->getInt64Ty(), 3);
145        phiCursorPosAfterLiteral->addIncoming(nextTokenPos, extendLiteralLengthCon);
146        phiCursorPosAfterLiteral->addIncoming(newCursorPos2, advanceFinishBlock);
147        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
148
149        Value * literalExtensionSize = b->CreateSub(phiCursorPosAfterLiteral, tokenPos);
150        Value * finalLengthByte = this->generateLoadSourceInputByte(b, phiCursorPosAfterLiteral);
151        finalLengthByte = b->CreateZExt(finalLengthByte, b->getInt64Ty());
152        Value * literalLengthExtendValue = b->CreateSelect(
153                b->CreateICmpUGT(literalExtensionSize, b->getSize(0)),
154                b->CreateAdd(
155                        b->CreateMul(
156                                b->CreateSub(literalExtensionSize, b->getSize(1)),
157                                b->getSize(255)
158                        ),
159                        finalLengthByte
160                ),
161                b->getSize(0)
162        );
163        literalLengthExtendValue = b->CreateZExt(literalLengthExtendValue, b->getInt64Ty());
164        Value* literalLengthBase = b->CreateLShr(b->CreateZExt(token, b->getInt64Ty()), b->getInt64(4));
165        Value* literalLength = b->CreateAdd(literalLengthBase, literalLengthExtendValue);
166
167        Value* offsetPos = b->CreateAdd(
168                b->CreateAdd(
169                        phiCursorPosAfterLiteral,
170                        literalLength),
171                b->getSize(1));
172
173        this->appendDeletionMarkerOutput(b, b->getProducedItemCount("deletionMarker"), b->CreateAdd(phiCursorPosAfterLiteral, b->getSize(1)));
174
175        b->setProducedItemCount("deletionMarker", offsetPos);
176        this->increaseScalarField(b, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
177        return offsetPos;
178    }
179
180    Value* LZ4IndexBuilderKernel::processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, Value* offsetPos, Value* token, Value* blockEnd) {
181        Constant* INT64_ONE = iBuilder->getInt64(1);
182
183        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
184
185        Value* extendMatchStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
186        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
187
188        BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
189        BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
190
191        iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
192
193        iBuilder->SetInsertPoint(extendMatchBodyBlock);
194
195        //ExtendMatchBodyBlock
196        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(extendMatchStartPos, INT64_ONE), blockEnd);
197        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
198
199        iBuilder->CreateBr(extendMatchExitBlock);
200
201        //ExtendMatchExitBlock
202        iBuilder->SetInsertPoint(extendMatchExitBlock);
203        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
204        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
205        phiCursorPosAfterMatch->addIncoming(extendMatchStartPos, entryBlock);
206
207        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, extendMatchStartPos);
208        Value* matchExtensionSize = iBuilder->CreateSelect(extendedMatchValue, oldMatchExtensionSize, iBuilder->getSize(0));
209        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
210        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
211
212
213        Value* extensionLastBitPos = iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1));
214        extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
215
216        Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, extensionLastBitPos);
217        extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
218
219
220        Value* matchLengthAddValue = iBuilder->CreateSelect(
221                iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
222                iBuilder->CreateAdd(
223                        iBuilder->CreateMul(
224                                iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
225                                iBuilder->getSize(255)
226                        ),
227                        extensionLastBitValue
228                )
229                ,
230                iBuilder->getSize(0)
231        );
232        matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
233
234        matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
235
236        Value* outputPos = iBuilder->getScalarField("m0OutputPos");
237
238        Value* outputEndPos = iBuilder->CreateSub(
239                iBuilder->CreateAdd(outputPos, matchLength),
240                iBuilder->getInt64(1)
241        );
242
243
244
245        this->appendMatchOffsetMarkerOutput(iBuilder, offsetPos);
246        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
247        this->appendM0Output(iBuilder, outputPos, outputEndPos);
248
249        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
250    }
251
252    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
253        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
254
255
256        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
257        iBuilder->setTerminationSignal(isTerminal);
258
259        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
260
261        BasicBlock* processCon = iBuilder->CreateBasicBlock("processCompressedConBlock");
262        BasicBlock* processBody = iBuilder->CreateBasicBlock("processCompressedBodyBlock");
263
264        iBuilder->CreateBr(processCon);
265        iBuilder->SetInsertPoint(processCon);
266
267        PHINode* phiCursorValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3); // phiCursorValue should always be the position of next token except for the final sequence
268        phiCursorValue->addIncoming(blockStart, entryBlock);
269
270        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(phiCursorValue, blockEnd), processBody, exitBlock);
271
272        // Process Body
273        iBuilder->SetInsertPoint(processBody);
274
275        //TODO add acceleration here
276        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
277        // Process Literal
278        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
279        iBuilder->CreateBr(processLiteralBlock);
280        iBuilder->SetInsertPoint(processLiteralBlock);
281
282        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
283        // Process Match
284        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
285        BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
286
287        iBuilder->CreateCondBr(
288                iBuilder->CreateICmpULT(offsetPos, blockEnd),
289                handleM0BodyBlock,
290                handleM0ElseBlock
291        );
292
293        // HandleM0Body
294        iBuilder->SetInsertPoint(handleM0BodyBlock);
295        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
296        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
297
298        iBuilder->CreateBr(processCon);
299
300
301        // HandleM0Else
302        iBuilder->SetInsertPoint(handleM0ElseBlock);
303
304        phiCursorValue->addIncoming(offsetPos, handleM0ElseBlock);
305        // Store final M0 pos to make sure the bit stream will be long enough
306        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
307        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
308        // finalM0OutputPos should always be 4MB * n except for the final block
309
310        iBuilder->CreateBr(processCon);
311
312
313        iBuilder->SetInsertPoint(exitBlock);
314    }
315
316    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
317
318        Constant* SIZE_64 = iBuilder->getSize(64);
319
320        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
321
322        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
323        BasicBlock* advanceBodyBlock = iBuilder->CreateBasicBlock("advanceBodyBlock");
324        BasicBlock* advanceExitBlock = iBuilder->CreateBasicBlock("advanceExitBlock");
325
326        iBuilder->CreateBr(advanceConBlock);
327        // TODO special handling for the first advance may have better performance
328        iBuilder->SetInsertPoint(advanceConBlock);
329
330        PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
331        phiCurrentPos->addIncoming(startPos, entryBlock);
332        PHINode* phiIsFinish = iBuilder->CreatePHI(iBuilder->getInt1Ty(), 2);
333        phiIsFinish->addIncoming(iBuilder->getInt1(false), entryBlock);
334        iBuilder->CreateCondBr(iBuilder->CreateNot(phiIsFinish), advanceBodyBlock, advanceExitBlock);
335
336        iBuilder->SetInsertPoint(advanceBodyBlock);
337
338        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
339        Value * currentBlockCapacity = iBuilder->CreateUDiv(iBuilder->getCapacity(inputName), iBuilder->getSize(64));
340        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, currentBlockCapacity);
341        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
342
343        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
344        Value * currentBitValue = iBuilder->CreateLoad(iBuilder->CreateGEP(ptr, currentBlockLocalPos));
345
346        currentBitValue = iBuilder->CreateLShr(currentBitValue, currentPosBitBlockOffset);
347        currentBitValue = iBuilder->CreateNot(currentBitValue);
348
349        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
350        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
351        newOffset = iBuilder->CreateUMin(newOffset, iBuilder->getSize(64));
352
353        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
354        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
355        if (maxPos) {
356            newPos = iBuilder->CreateUMin(maxPos, newPos);
357            actualAdvanceValue = iBuilder->CreateSub(newPos, phiCurrentPos);
358            newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, actualAdvanceValue);
359        }
360
361        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, iBuilder->getSize(64)), iBuilder->GetInsertBlock());
362        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
363        iBuilder->CreateBr(advanceConBlock);
364
365        iBuilder->SetInsertPoint(advanceExitBlock);
366        return phiCurrentPos;
367    }
368
369    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
370        Value * capacity = iBuilder->getCapacity(inputBufferName);
371        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
372        processed = iBuilder->CreateAnd(processed, iBuilder->CreateNeg(capacity));
373        Value * offset = iBuilder->CreateSub(globalOffset, processed);
374        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
375        return iBuilder->CreateLoad(valuePtr);
376    }
377
378    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
379        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
380        return iBuilder->CreateLoad(ptr);
381    }
382
383    void LZ4IndexBuilderKernel::increaseScalarField(const unique_ptr<KernelBuilder> &iBuilder, const string &fieldName, Value *value) {
384        Value *fieldValue = iBuilder->getScalarField(fieldName);
385        fieldValue = iBuilder->CreateAdd(fieldValue, value);
386        iBuilder->setScalarField(fieldName, fieldValue);
387    }
388
389    void LZ4IndexBuilderKernel::appendM0Output(const std::unique_ptr<KernelBuilder> &b, llvm::Value *start, llvm::Value *end) {
390        // ---- Entry
391        // Constant
392
393        int fw = 64;
394        BasicBlock* entryBlock = b->GetInsertBlock();
395        Value* SIZE_1 = b->getSize(1);
396        Value* SIZE_256 = b->getSize(fw);
397        Value* INT256_0 = b->getIntN(fw, 0);
398        Value* INT256_1 = b->getIntN(fw, 1);
399
400        Value* startBlockIndex = b->CreateUDiv(start, SIZE_256);
401        Value* startOffset = b->CreateZExt(b->CreateURem(start, SIZE_256), b->getIntNTy(fw));
402        Value* endBlockIndex = b->CreateUDiv(end, SIZE_256);
403        Value* endOffset = b->CreateZExt(b->CreateURem(end, SIZE_256), b->getIntNTy(fw));
404
405
406        BasicBlock* appendM0Con = b->CreateBasicBlock("appendM0Con");
407        BasicBlock* appendM0Body = b->CreateBasicBlock("appendM0Body");
408        BasicBlock* appendM0Exit = b->CreateBasicBlock("appendM0Exit");
409
410        Value* pendingM0Index = b->getScalarField("pendingM0Index");
411        Value* pendingM0StartBits = b->getScalarField("pendingM0StartBits");
412        Value* pendingM0EndBits = b->getScalarField("pendingM0EndBits");
413        Value* pendingM0CarryBit = b->getScalarField("pendingM0CarryBit");
414
415        b->CreateBr(appendM0Con);
416
417        // ---- AppendM0Con
418        b->SetInsertPoint(appendM0Con);
419        PHINode* phiCurrentIndex = b->CreatePHI(b->getSizeTy(), 2);
420        phiCurrentIndex->addIncoming(pendingM0Index, entryBlock);
421        PHINode* phiStartBits = b->CreatePHI(b->getIntNTy(fw), 2);
422        phiStartBits->addIncoming(pendingM0StartBits, entryBlock);
423        PHINode* phiEndBits = b->CreatePHI(b->getIntNTy(fw), 2);
424        phiEndBits->addIncoming(pendingM0EndBits, entryBlock);
425        PHINode* phiCarryBit = b->CreatePHI(b->getIntNTy(fw), 2);
426        phiCarryBit->addIncoming(pendingM0CarryBit, entryBlock);
427
428
429        b->CreateUnlikelyCondBr(b->CreateICmpULT(phiCurrentIndex, endBlockIndex), appendM0Body, appendM0Exit);
430        // ---- AppendM0Body
431        b->SetInsertPoint(appendM0Body);
432        Value* actualStartBits = b->CreateSelect(b->CreateICmpEQ(phiCurrentIndex, startBlockIndex), b->CreateOr(phiStartBits, b->CreateShl(INT256_1, startOffset)), phiStartBits);
433        Value* outputValue = b->CreateSub(b->CreateSub(phiEndBits, actualStartBits), phiCarryBit);
434        Value* newCarryBit = b->CreateZExt(b->CreateICmpUGT(b->CreateAdd(actualStartBits, phiCarryBit), phiEndBits), b->getIntNTy(fw));
435
436        this->storeM0(b, phiCurrentIndex, outputValue);
437
438        phiCurrentIndex->addIncoming(b->CreateAdd(phiCurrentIndex, SIZE_1), b->GetInsertBlock());
439        phiStartBits->addIncoming(INT256_0, b->GetInsertBlock());
440        phiEndBits->addIncoming(INT256_0, b->GetInsertBlock());
441        phiCarryBit->addIncoming(newCarryBit, b->GetInsertBlock());
442
443        b->CreateBr(appendM0Con);
444
445        // ---- AppendM0Exit
446        b->SetInsertPoint(appendM0Exit);
447        Value* finalStartBits = b->CreateSelect(b->CreateICmpEQ(phiCurrentIndex, startBlockIndex), b->CreateOr(phiStartBits, b->CreateShl(INT256_1, startOffset)), phiStartBits);
448        Value* finalEndBits = b->CreateOr(phiEndBits, b->CreateShl(INT256_1, endOffset));
449        b->setScalarField("pendingM0Index", phiCurrentIndex);
450        b->setScalarField("pendingM0StartBits", finalStartBits);
451        b->setScalarField("pendingM0EndBits", finalEndBits);
452        b->setScalarField("pendingM0CarryBit", phiCarryBit);
453    }
454
455    void LZ4IndexBuilderKernel::storeM0(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockIndex, llvm::Value* value) {
456        int fw = 64;
457        Value* m0BufferBlocks = b->CreateUDiv(b->getCapacity("M0Marker"), b->getSize(fw));
458        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
459        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("M0Marker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
460        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
461    }
462
463    void LZ4IndexBuilderKernel::storePendingM0(const std::unique_ptr<KernelBuilder> &b) {
464        Value* outputValue = b->CreateSub(
465                b->CreateSub(
466                        b->getScalarField("pendingM0EndBits"),
467                        b->getScalarField("pendingM0StartBits")
468                ),
469                b->getScalarField("pendingM0CarryBit")
470        );
471        this->storeM0(b, b->getScalarField("pendingM0Index"), outputValue);
472    }
473
474    void LZ4IndexBuilderKernel::appendDeletionMarkerOutput(const std::unique_ptr<KernelBuilder> &b,
475                                                           llvm::Value *start, llvm::Value *end) {
476        // ---- Entry
477        // Constant
478
479        int fw = 64;
480        BasicBlock* entryBlock = b->GetInsertBlock();
481        Value* SIZE_1 = b->getSize(1);
482        Value* SIZE_256 = b->getSize(fw);
483        Value* INT256_0 = b->getIntN(fw, 0);
484        Value* INT256_1 = b->getIntN(fw, 1);
485
486        Value* startBlockIndex = b->CreateUDiv(start, SIZE_256);
487        Value* startOffset = b->CreateZExt(b->CreateURem(start, SIZE_256), b->getIntNTy(fw));
488        Value* endBlockIndex = b->CreateUDiv(end, SIZE_256);
489        Value* endOffset = b->CreateZExt(b->CreateURem(end, SIZE_256), b->getIntNTy(fw));
490
491
492        BasicBlock* appendDeletionMarkerCon = b->CreateBasicBlock("appendDeletionMarkerCon");
493        BasicBlock* appendDeletionMarkerBody = b->CreateBasicBlock("appendDeletionMarkerBody");
494        BasicBlock* appendDeletionMarkerExit = b->CreateBasicBlock("appendDeletionMarkerExit");
495
496        Value* pendingDeletionMarkerIndex = b->getScalarField("pendingDeletionMarkerIndex");
497        Value* pendingDeletionMarkerStartBits = b->getScalarField("pendingDeletionMarkerStartBits");
498        Value* pendingDeletionMarkerEndBits = b->getScalarField("pendingDeletionMarkerEndBits");
499        Value* pendingDeletionMarkerCarryBit = b->getScalarField("pendingDeletionMarkerCarryBit");
500
501        b->CreateBr(appendDeletionMarkerCon);
502
503        // ---- AppendM0Con
504        b->SetInsertPoint(appendDeletionMarkerCon);
505        PHINode* phiCurrentIndex = b->CreatePHI(b->getSizeTy(), 2);
506        phiCurrentIndex->addIncoming(pendingDeletionMarkerIndex, entryBlock);
507        PHINode* phiStartBits = b->CreatePHI(b->getIntNTy(fw), 2);
508        phiStartBits->addIncoming(pendingDeletionMarkerStartBits, entryBlock);
509        PHINode* phiEndBits = b->CreatePHI(b->getIntNTy(fw), 2);
510        phiEndBits->addIncoming(pendingDeletionMarkerEndBits, entryBlock);
511        PHINode* phiCarryBit = b->CreatePHI(b->getIntNTy(fw), 2);
512        phiCarryBit->addIncoming(pendingDeletionMarkerCarryBit, entryBlock);
513
514
515        b->CreateUnlikelyCondBr(b->CreateICmpULT(phiCurrentIndex, endBlockIndex), appendDeletionMarkerBody, appendDeletionMarkerExit);
516        // ---- AppendM0Body
517        b->SetInsertPoint(appendDeletionMarkerBody);
518        Value* actualStartBits = b->CreateSelect(b->CreateICmpEQ(phiCurrentIndex, startBlockIndex), b->CreateOr(phiStartBits, b->CreateShl(INT256_1, startOffset)), phiStartBits);
519        Value* outputValue = b->CreateSub(b->CreateSub(phiEndBits, actualStartBits), phiCarryBit);
520        Value* newCarryBit = b->CreateZExt(b->CreateICmpUGT(b->CreateAdd(actualStartBits, phiCarryBit), phiEndBits), b->getIntNTy(fw));
521
522        this->storeDeletionMarker(b, phiCurrentIndex, outputValue);
523
524        phiCurrentIndex->addIncoming(b->CreateAdd(phiCurrentIndex, SIZE_1), b->GetInsertBlock());
525        phiStartBits->addIncoming(INT256_0, b->GetInsertBlock());
526        phiEndBits->addIncoming(INT256_0, b->GetInsertBlock());
527        phiCarryBit->addIncoming(newCarryBit, b->GetInsertBlock());
528
529        b->CreateBr(appendDeletionMarkerCon);
530
531        // ---- AppendM0Exit
532        b->SetInsertPoint(appendDeletionMarkerExit);
533        Value* finalStartBits = b->CreateSelect(b->CreateICmpEQ(phiCurrentIndex, startBlockIndex), b->CreateOr(phiStartBits, b->CreateShl(INT256_1, startOffset)), phiStartBits);
534        Value* finalEndBits = b->CreateOr(phiEndBits, b->CreateShl(INT256_1, endOffset));
535        b->setScalarField("pendingDeletionMarkerIndex", phiCurrentIndex);
536        b->setScalarField("pendingDeletionMarkerStartBits", finalStartBits);
537        b->setScalarField("pendingDeletionMarkerEndBits", finalEndBits);
538        b->setScalarField("pendingDeletionMarkerCarryBit", phiCarryBit);
539    }
540
541    void
542    LZ4IndexBuilderKernel::storeDeletionMarker(const std::unique_ptr<KernelBuilder> &b, llvm::Value *blockIndex, llvm::Value *value) {
543        int fw = 64;
544        Value* m0BufferBlocks = b->CreateUDiv(b->getCapacity("deletionMarker"), b->getSize(fw));
545        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
546        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("deletionMarker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
547        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
548    }
549
550    void LZ4IndexBuilderKernel::storePendingDeletionMarker(const std::unique_ptr<KernelBuilder> &b) {
551        Value* outputValue = b->CreateSub(
552                b->CreateSub(
553                        b->getScalarField("pendingDeletionMarkerEndBits"),
554                        b->getScalarField("pendingDeletionMarkerStartBits")
555                ),
556                b->getScalarField("pendingDeletionMarkerCarryBit")
557        );
558        this->storeDeletionMarker(b, b->getScalarField("pendingDeletionMarkerIndex"), outputValue);
559    }
560
561    void LZ4IndexBuilderKernel::appendMatchOffsetMarkerOutput(const std::unique_ptr<KernelBuilder> &b,
562                                                              llvm::Value *position) {
563        // ---- Entry
564        // Constant
565        int fw = 64;
566        BasicBlock* entryBlock = b->GetInsertBlock();
567        Value* SIZE_1 = b->getSize(1);
568        Value* SIZE_256 = b->getSize(fw);
569        Value* INT256_0 = b->getIntN(fw, 0);
570        Value* INT256_1 = b->getIntN(fw, 1);
571
572        Value* endBlockIndex = b->CreateUDiv(position, SIZE_256);
573        Value* endOffset = b->CreateZExt(b->CreateURem(position, SIZE_256), b->getIntNTy(fw));
574
575        BasicBlock* appendMatchOffsetMarkerCon = b->CreateBasicBlock("appendMatchOffsetMarkerCon");
576        BasicBlock* appendMatchOffsetMarkerBody = b->CreateBasicBlock("appendMatchOffsetMarkerBody");
577        BasicBlock* appendMatchOffsetMarkerExit = b->CreateBasicBlock("appendMatchOffsetMarkerExit");
578
579        Value* pendingMatchOffsetMarkerIndex = b->getScalarField("pendingMarchOffsetMarkerIndex");
580        Value* pendingMatchOffsetMarkerEndBits = b->getScalarField("pendingMatchOffsetMarkerBits");
581
582        b->CreateBr(appendMatchOffsetMarkerCon);
583
584        // ---- AppendM0Con
585        b->SetInsertPoint(appendMatchOffsetMarkerCon);
586        PHINode* phiCurrentIndex = b->CreatePHI(b->getSizeTy(), 2);
587        phiCurrentIndex->addIncoming(pendingMatchOffsetMarkerIndex, entryBlock);
588        PHINode* phiEndBits = b->CreatePHI(b->getIntNTy(fw), 2);
589        phiEndBits->addIncoming(pendingMatchOffsetMarkerEndBits, entryBlock);
590
591        b->CreateUnlikelyCondBr(b->CreateICmpULT(phiCurrentIndex, endBlockIndex), appendMatchOffsetMarkerBody, appendMatchOffsetMarkerExit);
592        // ---- AppendM0Body
593        b->SetInsertPoint(appendMatchOffsetMarkerBody);
594        this->storeMatchOffsetMarker(b, phiCurrentIndex, phiEndBits);
595        phiCurrentIndex->addIncoming(b->CreateAdd(phiCurrentIndex, SIZE_1), b->GetInsertBlock());
596        phiEndBits->addIncoming(INT256_0, b->GetInsertBlock());
597
598        b->CreateBr(appendMatchOffsetMarkerCon);
599
600        // ---- AppendM0Exit
601        b->SetInsertPoint(appendMatchOffsetMarkerExit);
602        Value* finalEndBits = b->CreateOr(phiEndBits, b->CreateShl(INT256_1, endOffset));
603        b->setScalarField("pendingMarchOffsetMarkerIndex", phiCurrentIndex);
604        b->setScalarField("pendingMatchOffsetMarkerBits", finalEndBits);
605    }
606
607    void LZ4IndexBuilderKernel::storeMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &b, llvm::Value *blockIndex, llvm::Value *value) {
608        int fw = 64;
609        Value* m0BufferBlocks = b->CreateUDiv(b->getCapacity("MatchOffsetMarker"), b->getSize(fw));
610        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
611        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("MatchOffsetMarker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
612        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
613    }
614
615    void LZ4IndexBuilderKernel::storePendingMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &b) {
616        this->storeMatchOffsetMarker(
617                b,
618                b->getScalarField("pendingMarchOffsetMarkerIndex"),
619                b->getScalarField("pendingMatchOffsetMarkerBits")
620        );
621    }
622}
Note: See TracBrowser for help on using the repository browser.