source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_extract_e_m0.cpp @ 5921

Last change on this file since 5921 was 5921, checked in by xwa163, 13 months ago
  1. Initial checkin for new approach for lz4 index decoder that always use 4MB buffer
  2. Add test case for new approach (for now test cases will fail when test file is larger than 4MB)
File size: 31.5 KB
Line 
1
2#include "lz4_extract_e_m0.h"
3#include <kernels/kernel_builder.h>
4#include <toolchain/toolchain.h>
5#include <kernels/streamset.h>
6#include <iostream>
7
8//#define APPLY_64PACK_ACCELERATION
9// TODO May be we can change it to 256 PACK Acceleration based on SIMD instruction
10
11#define ACCELERATION_LOOP_COUNT (20)
12
13using namespace llvm;
14using namespace kernel;
15using namespace std;
16
17void LZ4ExtractEM0Kernel::generateDoSequentialSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
18    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
19
20    BasicBlock* blockDataLoopCon = iBuilder->CreateBasicBlock("block_data_loop_con");
21    BasicBlock* blockDataLoopProcess = iBuilder->CreateBasicBlock("block_data_loop_process");
22    BasicBlock* blockDataLoopCompressed = iBuilder->CreateBasicBlock("block_data_loop_compressed");
23    BasicBlock* blockDataLoopUncompressed = iBuilder->CreateBasicBlock("block_data_loop_uncompressed");
24
25    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exit_block");
26
27    iBuilder->CreateBr(blockDataLoopCon);
28
29    // blockDataLoopCon
30    iBuilder->SetInsertPoint(blockDataLoopCon);
31    Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
32    Value* availableBlockData = iBuilder->getAvailableItemCount("blockStart");
33    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, availableBlockData), blockDataLoopProcess, exitBlock);
34
35    // blockDataLoopProcess
36    iBuilder->SetInsertPoint(blockDataLoopProcess);
37    Value* isCompressed = this->generateLoadCircularInput(iBuilder, "isCompressed", blockDataIndex, iBuilder->getInt1Ty()->getPointerTo());
38    iBuilder->CreateCondBr(isCompressed, blockDataLoopCompressed, blockDataLoopUncompressed);
39
40    // blockDataLoop Compressed
41    iBuilder->SetInsertPoint(blockDataLoopCompressed);
42    this->generateHandleCompressedBlock(iBuilder);
43
44
45    this->generateIncreaseBlockDataIndex(iBuilder);
46    iBuilder->CreateBr(blockDataLoopCon);
47
48
49    // blockDataLoop Uncompressed
50    iBuilder->SetInsertPoint(blockDataLoopUncompressed);
51    //handle uncompressed block
52    this->generateRecordUncompressedBlock(iBuilder);
53//    iBuilder->setProducedItemCount("e1", this->loadCurrentBlockData(iBuilder, "blockEnd"));
54    this->generateIncreaseBlockDataIndex(iBuilder);
55    iBuilder->CreateBr(blockDataLoopCon);
56
57    //Exit
58    iBuilder->SetInsertPoint(exitBlock);
59
60}
61
62BasicBlock* LZ4ExtractEM0Kernel::generateHandleCompressedBlock(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
63    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
64    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_exit");
65
66    Value* blockStart = this->loadCurrentBlockData(iBuilder, "blockStart");
67
68    this->advanceCursorUntilPos(iBuilder, "extender", iBuilder->CreateZExtOrTrunc(blockStart, iBuilder->getSizeTy()));
69
70    BasicBlock* compressedBlockLoopCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_con");
71    BasicBlock* compressedBlockLoopBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_body");
72    BasicBlock* compressedBlockLoopFinal = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_final");
73
74    iBuilder->CreateBr(compressedBlockLoopCon);
75
76    // compressedBlockLoopCon
77    iBuilder->SetInsertPoint(compressedBlockLoopCon);
78    Value* cursorValue = this->getCursorValue(iBuilder, "extender");
79    Value* blockEndPos = this->loadCurrentBlockData(iBuilder, "blockEnd");
80    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(cursorValue, blockEndPos), compressedBlockLoopBody, exitBlock);
81
82    // body
83    iBuilder->SetInsertPoint(compressedBlockLoopBody);
84
85#ifdef APPLY_64PACK_ACCELERATION
86    BasicBlock* accelerationEndBlock = iBuilder->CreateBasicBlock("acceleration_end_block");
87    BasicBlock* accelerationFinishBlock = iBuilder->CreateBasicBlock("acceleration_finish_block");
88
89    iBuilder->SetInsertPoint(accelerationEndBlock);
90
91    PHINode* phiTokenMarkers = iBuilder->CreatePHI(iBuilder->getInt64Ty(), ACCELERATION_LOOP_COUNT + 1);
92    PHINode* phiE1FinalValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), ACCELERATION_LOOP_COUNT + 1);
93    Value* extenderOffset = this->getCursorValue(iBuilder, "extender");
94    Value* packBaseOffset = this->offsetToPackBaseOffset(iBuilder, extenderOffset);
95
96    //    Value* tempTokenMarkers = iBuilder->getScalarField("temp_TokenMarkers");
97    Value *tokenPackPos = iBuilder->CreateSub(
98            iBuilder->CreateSub(
99                    iBuilder->getSize(64),
100                    iBuilder->CreateCountReverseZeroes(phiTokenMarkers)
101            ),
102            iBuilder->getSize(1)
103    );
104
105    Value *tokenActualPos = iBuilder->CreateAdd(packBaseOffset, tokenPackPos);
106
107    //TODO output here
108    {
109        Value* targetPackIndex = iBuilder->CreateSub(iBuilder->getSize(64), iBuilder->CreateCountReverseZeroes(phiE1FinalValue));
110        Value* targetActualIndex = iBuilder->CreateAdd(targetPackIndex, packBaseOffset);
111        Value* preActualIndex = iBuilder->getProducedItemCount("e1Marker");
112        targetActualIndex = iBuilder->CreateSelect(
113                iBuilder->CreateICmpUGE(targetActualIndex, preActualIndex),
114                targetActualIndex,
115                preActualIndex
116        );
117        this->markCircularOutputBitstream(iBuilder, "e1Marker", preActualIndex, tokenActualPos, false, false);
118
119        Value* extenderOffset = this->getCursorValue(iBuilder, "extender");
120        Value* targetOutputIndex = iBuilder->CreateLShr(extenderOffset, iBuilder->getSize(std::log2(64)));
121        size_t packNum = this->getOutputBufferSize(iBuilder, "e1Marker") / 64;
122        Value* maskedPackIndex = iBuilder->CreateAnd(targetOutputIndex, iBuilder->getSize(packNum - 1));
123        Value* accelerationOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
124        accelerationOutputPtr = iBuilder->CreateGEP(accelerationOutputPtr, maskedPackIndex);
125
126//        iBuilder->CallPrintInt("expected", iBuilder->CreateLoad(accelerationOutputPtr));
127
128        Value* v = iBuilder->CreateOr(iBuilder->CreateLoad(accelerationOutputPtr), phiE1FinalValue);
129        // TODO phiE1FinalValue
130        iBuilder->CreateStore(v, accelerationOutputPtr);
131//        iBuilder->CallPrintInt("actual", iBuilder->CreateLoad(accelerationOutputPtr));
132
133        iBuilder->setProducedItemCount("e1Marker", targetActualIndex);
134    }
135
136
137
138    this->advanceCursorUntilPos(iBuilder, "extender", tokenActualPos);
139    iBuilder->CreateBr(accelerationFinishBlock);
140
141
142    iBuilder->SetInsertPoint(compressedBlockLoopBody);
143    //------------------------------------ 64 Pack Acceleration Start
144    this->waitCursorUntilInputAvailable(iBuilder, "extender", "extender");
145
146    Value* currentBlockEnd = this->loadCurrentBlockData(iBuilder, "blockEnd");
147
148    extenderOffset = this->getCursorValue(iBuilder, "extender");
149    Value* targetOutputIndex = iBuilder->CreateLShr(extenderOffset, iBuilder->getSize(std::log2(64)));
150    size_t packNum = this->getOutputBufferSize(iBuilder, "e1Marker") / 64;
151    Value* maskedPackIndex = iBuilder->CreateAnd(targetOutputIndex, iBuilder->getSize(packNum - 1));
152    Value* accelerationOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
153    accelerationOutputPtr = iBuilder->CreateGEP(accelerationOutputPtr, maskedPackIndex);
154
155
156
157//    Value* accelerationPackIndex = this->offsetToPackIndex(iBuilder, extenderOffset);
158//    Value* maskedAccelerationPackIndex = this->generateLoadCircularInputPack()
159
160
161    packBaseOffset = this->offsetToPackBaseOffset(iBuilder, extenderOffset);
162    Value* packOffset = this->offsetToPackOffset(iBuilder, extenderOffset);
163
164    Value* extenderPackData = this->generateLoadCircularInputPack(iBuilder, "extender", extenderOffset);
165    Value* CC_0xFXPackData = this->generateLoadCircularInputPack(iBuilder, "CC_0xFX", extenderOffset);
166    Value* CC_0xXFPackData = this->generateLoadCircularInputPack(iBuilder, "CC_0xXF", extenderOffset);
167
168    // extend_match_only_byte = CC_0xFX &~ CC_0xXF
169    Value* extendMatchOnlyBytePack = iBuilder->CreateAnd(
170            CC_0xXFPackData,
171            iBuilder->CreateNot(
172                    CC_0xFXPackData
173            )
174    );
175
176    Value* tokenMarkers = iBuilder->CreateShl(iBuilder->getInt64(1), packOffset);
177//    Value* outputInitValue = iBuilder->CreateLoad(accelerationOutputPtr);
178    Value* outputInitValue = iBuilder->getInt64(0);
179
180
181//    iBuilder->setScalarField("temp_TokenMarkers", tokenMarkers);
182
183    for (int i = 0; i < ACCELERATION_LOOP_COUNT; i++) {
184        Value *tokenPackPos = iBuilder->CreateSub(
185                iBuilder->CreateSub(
186                        iBuilder->getSize(64),
187                        iBuilder->CreateCountReverseZeroes(tokenMarkers)
188                ),
189                iBuilder->getSize(1)
190        );
191        Value *tokenActualPos = iBuilder->CreateAdd(packBaseOffset, tokenPackPos);
192
193        Value *tokenValue = iBuilder->CreateZExt(
194                this->generateLoadSourceInputByte(iBuilder, "byteStream", tokenActualPos),
195                iBuilder->getSizeTy()
196        );
197
198        Value *literalLengthBase =
199                iBuilder->CreateLShr(
200                        tokenValue,
201                        iBuilder->getSize(4)
202                );
203
204        Value* matchLengthBase = iBuilder->CreateAnd(
205                tokenValue,
206                iBuilder->getSize(0x0f)
207        );
208        matchLengthBase = iBuilder->CreateAdd(matchLengthBase, iBuilder->getSize(0x4));
209
210        Value *tokenMarker = iBuilder->CreateShl(iBuilder->getInt64(1), tokenPackPos);
211        Value * notExtendLiteralMarker = iBuilder->CreateAnd(
212                tokenMarker,
213                iBuilder->CreateNot(CC_0xFXPackData)
214        );
215        ////
216        Value* expectedNotExtendNextTokenMarker = iBuilder->CreateShl(
217                notExtendLiteralMarker,
218                iBuilder->CreateAdd(literalLengthBase, iBuilder->getSize(3))
219        );  // If not extend literal or match, next token pos will be here, 1 (token) + literalLengthBase + 2 (match offset)
220
221
222        Value *matchExtendOnlyMarker = iBuilder->CreateAnd(
223                tokenMarker,
224                extendMatchOnlyBytePack
225        );
226
227        Value *extenderMarker = iBuilder->CreateShl(
228                matchExtendOnlyMarker,
229                iBuilder->CreateAdd(literalLengthBase, iBuilder->getSize(2))
230        );  // Match offset
231
232        //ScanThru
233        ////
234        Value *expectedNextTokenMarker = iBuilder->CreateAnd(
235                iBuilder->CreateAdd(
236                        extenderMarker,
237                        iBuilder->CreateOr(
238                                extenderMarker,
239                                extenderPackData
240                        )
241                ),
242                iBuilder->CreateNot(extenderPackData)
243        );
244        expectedNextTokenMarker = iBuilder->CreateShl(
245                expectedNextTokenMarker,
246                iBuilder->getSize(1)
247        );
248
249        Value* needExtendMatch = iBuilder->CreateNot(iBuilder->CreateICmpEQ(matchExtendOnlyMarker, iBuilder->getSize(0)));
250
251        expectedNextTokenMarker = iBuilder->CreateSelect(
252                needExtendMatch,
253                expectedNextTokenMarker,
254                expectedNotExtendNextTokenMarker
255        );
256
257        Value *expectedNextTokenPos = iBuilder->CreateAdd(
258                packBaseOffset,
259                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker)
260        );
261        Value *reachBlockEnd = iBuilder->CreateICmpUGE(
262                expectedNextTokenPos,
263                currentBlockEnd
264        );
265
266        expectedNextTokenMarker = iBuilder->CreateSelect(
267                reachBlockEnd,
268                iBuilder->getInt64(0),
269                expectedNextTokenMarker
270        );
271
272        tokenMarkers = iBuilder->CreateOr(tokenMarkers, expectedNextTokenMarker, "tokenMarkers");
273//        iBuilder->setScalarField("temp_TokenMarkers", tokenMarkers);
274
275        Value* matchOffsetActualPos = iBuilder->CreateAdd(
276                tokenActualPos,
277                iBuilder->CreateAdd(
278                        iBuilder->getSize(1),
279                        literalLengthBase
280                )
281        );
282
283        Value* matchOffsetValue = iBuilder->CreateAdd(
284                iBuilder->CreateZExt(
285                        this->generateLoadSourceInputByte(iBuilder, "byteStream", matchOffsetActualPos),
286                        iBuilder->getSizeTy()
287                ),
288                iBuilder->CreateShl(
289                        iBuilder->CreateZExt(this->generateLoadSourceInputByte(
290                                iBuilder,
291                                "byteStream",
292                                iBuilder->CreateAdd(matchOffsetActualPos , iBuilder->getSize(1))),
293                                             iBuilder->getSizeTy()
294                        ),
295                        iBuilder->getSize(8)
296                )
297        );
298
299
300        Value* shouldEndAcceleration = iBuilder->CreateOr(
301                iBuilder->CreateICmpEQ(expectedNextTokenMarker, iBuilder->getSize(0)),
302                reachBlockEnd
303        );
304
305
306        BasicBlock *pack64AcceMatchExtendOnlyExitBlock = iBuilder->CreateBasicBlock(
307                "pack64AcceMatchExtendOnlyExitBlock");
308
309        phiTokenMarkers->addIncoming(tokenMarkers, iBuilder->GetInsertBlock());
310        phiE1FinalValue->addIncoming(outputInitValue, iBuilder->GetInsertBlock());
311
312        iBuilder->CreateUnlikelyCondBr(
313                shouldEndAcceleration,
314                accelerationEndBlock,
315                pack64AcceMatchExtendOnlyExitBlock
316        );
317
318        iBuilder->SetInsertPoint(pack64AcceMatchExtendOnlyExitBlock);
319
320        // ------------------
321        // Yellow Logic Start
322        Value *nextTokenPos = iBuilder->CreateAdd(
323                packBaseOffset,
324                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker)
325        );
326
327        Value *matchExtendLastBitPos = iBuilder->CreateSub(
328                nextTokenPos,
329                iBuilder->getSize(1)
330        );
331
332        Value *matchExtendLastBitValue = this->generateLoadSourceInputByte(iBuilder, "byteStream",
333                                                                           matchExtendLastBitPos);
334        matchExtendLastBitValue = iBuilder->CreateZExt(matchExtendLastBitValue, iBuilder->getSizeTy());
335        Value *matchLength = iBuilder->CreateAdd(
336                matchExtendLastBitValue,
337                matchLengthBase
338        );
339
340        Value *matchExtendLength = iBuilder->CreateSub(
341                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker),
342                iBuilder->CreateCountForwardZeroes(extenderMarker)
343        );
344
345        matchLength = iBuilder->CreateSelect(
346                needExtendMatch,
347                iBuilder->CreateAdd(
348                        matchLength,
349                        iBuilder->CreateMul(
350                                iBuilder->CreateSub(matchExtendLength, iBuilder->getSize(2)),
351                                iBuilder->getSize(0xff)
352                        )
353                ),
354                matchLengthBase
355        );
356
357        Value* oldM0OutputPos = iBuilder->getScalarField("m0OutputPos");
358
359
360        // Mark E1
361        Value *expectedNewOffsetPos = iBuilder->CreateAdd(
362                tokenActualPos,
363                iBuilder->CreateAdd(
364                        literalLengthBase,
365                        iBuilder->getSize(1)
366                )
367        );
368        Value *newOffsetPos = expectedNewOffsetPos;
369        iBuilder->setScalarField("offsetPos", newOffsetPos);
370
371        // Yellow Logic End
372        // ------------------------------
373        // Blue Logic Start
374
375        // e1 start:tokenActualPos, e1 end:expectedNewOffsetPos,
376//        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), tokenActualPos, false);
377//        this->markCircularOutputBitstream(iBuilder, "e1Marker", tokenActualPos, expectedNewOffsetPos, true);
378
379
380        Value* newMask = iBuilder->CreateSub(
381                iBuilder->CreateShl(iBuilder->getInt64(1), iBuilder->CreateSub(expectedNewOffsetPos, packBaseOffset)),
382                iBuilder->CreateShl(iBuilder->getInt64(1), iBuilder->CreateAdd(iBuilder->CreateSub(tokenActualPos, packBaseOffset), iBuilder->getSize(1)))
383        );
384
385        outputInitValue = iBuilder->CreateOr(outputInitValue, newMask);
386
387        Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
388
389
390        Value* m0OutputPos = iBuilder->CreateAdd(oldM0OutputPos, literalLengthBase);
391
392        // Mark M0
393        Value *outputEndPos = iBuilder->CreateSub(
394                iBuilder->CreateAdd(m0OutputPos, matchLength),
395                iBuilder->getInt64(1)
396        );
397
398        this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), m0OutputPos);
399//        iBuilder->CallPrintInt("m0Start", m0OutputPos);
400        this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
401//        iBuilder->CallPrintInt("m0End", outputEndPos);
402        this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffsetValue);
403//        iBuilder->CallPrintInt("matchOffset1", matchOffsetValue);
404
405        m0OutputPos = iBuilder->CreateAdd(m0OutputPos, matchLength);
406        iBuilder->setScalarField("m0OutputPos", m0OutputPos);
407
408        // Blue Logic End
409        // ---------------------------
410    }
411    phiTokenMarkers->addIncoming(tokenMarkers, iBuilder->GetInsertBlock());
412    phiE1FinalValue->addIncoming(outputInitValue, iBuilder->GetInsertBlock());
413
414    //------------------------------------ 64 Pack Acceleration End
415    // Config Extender Cursor after Acceleration
416    iBuilder->CreateBr(accelerationEndBlock);
417
418    iBuilder->SetInsertPoint(accelerationFinishBlock);
419
420    //------------------------------------- Finish
421#endif
422
423//    iBuilder->CallPrintInt("tokenPos", this->getCursorValue(iBuilder, "extender"));
424    Value* token = this->generateLoadSourceInputByte(iBuilder, "byteStream", this->getCursorValue(iBuilder, "extender"));
425
426    iBuilder->CallPrintInt("tokenPos", this->getCursorValue(iBuilder, "extender"));
427//    iBuilder->CallPrintInt("token", token);
428
429//    iBuilder->CreateAssert(iBuilder->CreateICmpULT(this->getCursorValue(iBuilder, "extender"), iBuilder->getSize(0xcb32a)), "ee");
430    iBuilder->setScalarField("token", token);
431
432    Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
433
434    // TokenPos can not be refered by next few statements since they may be in different segment
435    iBuilder->setScalarField("tokenPos", this->getCursorValue(iBuilder, "extender"));
436
437    BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
438    BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
439
440    iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
441
442    iBuilder->SetInsertPoint(extendLiteralLengthBody);
443
444    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
445
446    this->advanceCursorUntilNextZero(iBuilder, "extender", "extender", this->loadCurrentBlockData(iBuilder, "blockEnd"));
447
448    iBuilder->CreateBr(extendLiteralLengthExit);
449
450    iBuilder->SetInsertPoint(extendLiteralLengthExit);
451    // ----May be in a different segment now
452    Value* literalLengthEndPos = this->getCursorValue(iBuilder, "extender");
453    Value* literalExtensionSize = iBuilder->CreateSub(literalLengthEndPos, iBuilder->getScalarField("tokenPos"));
454//    iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
455    Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, "byteStream", this->getCursorValue(iBuilder, "extender"));
456
457    finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getSizeTy());
458    Value* literalLengthExtendValue = iBuilder->CreateSelect(
459            iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
460            iBuilder->CreateAdd(
461                    iBuilder->CreateMul(
462                            iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
463                            iBuilder->getSize(255)
464                    ),
465                    finalLengthByte
466            ),
467            iBuilder->getSize(0)
468    );
469    literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
470    token = iBuilder->getScalarField("token");
471    Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
472    Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
473
474
475    Value* offsetPos = iBuilder->CreateAdd(
476            iBuilder->CreateAdd(
477                    literalLengthEndPos,
478                    literalLength),
479            iBuilder->getSize(1));
480    iBuilder->setScalarField("offsetPos", offsetPos);
481//    iBuilder->CallPrintInt("offsetPos", offsetPos);
482//    iBuilder->CallPrintInt("literalStart", iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)));
483//    iBuilder->CallPrintInt("literalLength", literalLength);
484    this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)), false);
485    this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)), offsetPos, true);
486
487    Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
488
489    this->increaseScalarField(iBuilder, "m0OutputPos", literalLength);
490
491
492    BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
493    BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
494
495    iBuilder->CreateCondBr(
496            iBuilder->CreateICmpULT(offsetPos, this->loadCurrentBlockData(iBuilder, "blockEnd")),
497            handleM0BodyBlock,
498            handleM0ElseBlock
499    );
500    // HandleM0Body
501
502    iBuilder->SetInsertPoint(handleM0BodyBlock);
503
504    Value* matchLengthStartPos = iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1));
505    iBuilder->setScalarField("matchLengthStartPos", matchLengthStartPos);
506    this->advanceCursorUntilPos(iBuilder, "extender", matchLengthStartPos);
507
508
509    token = iBuilder->getScalarField("token");
510
511    Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf), "extendedMatchValue");
512
513    BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
514    BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
515
516    iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
517
518    iBuilder->SetInsertPoint(extendMatchBodyBlock);
519
520    //ExtendMatchBodyBlock
521    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
522    this->advanceCursorUntilNextZero(iBuilder, "extender", "extender", this->loadCurrentBlockData(iBuilder, "blockEnd"));
523
524    // ----May be in a different segment now
525    iBuilder->CreateBr(extendMatchExitBlock);
526
527    //ExtendMatchExitBlock
528    iBuilder->SetInsertPoint(extendMatchExitBlock);
529    matchLengthStartPos = iBuilder->getScalarField("matchLengthStartPos");
530    Value* oldMatchExtensionSize = iBuilder->CreateSub(this->getCursorValue(iBuilder, "extender"), matchLengthStartPos);
531    iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
532
533    token = iBuilder->getScalarField("token");
534    extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
535
536    Value* matchExtensionSize = iBuilder->CreateSelect(
537            iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
538            oldMatchExtensionSize,
539            iBuilder->getSize(0)
540    );
541
542
543    Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
544    Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
545
546
547    Value* extensionLastBitPos = iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1));
548    extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
549    Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, "byteStream", extensionLastBitPos);
550    extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
551
552
553    Value* matchLengthAddValue = iBuilder->CreateSelect(
554            iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
555            iBuilder->CreateAdd(
556                    iBuilder->CreateMul(
557                            iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
558                            iBuilder->getSize(255)
559                    ),
560                    extensionLastBitValue
561            )
562            ,
563            iBuilder->getSize(0)
564    );
565    matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
566
567    matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
568
569    Value* outputPos = iBuilder->getScalarField("m0OutputPos");
570
571    Value* outputEndPos = iBuilder->CreateSub(
572            iBuilder->CreateAdd(outputPos, matchLength),
573            iBuilder->getInt64(1)
574    );
575
576    Value* matchOffset = iBuilder->CreateAdd(
577            iBuilder->CreateZExt(
578                    this->generateLoadSourceInputByte(iBuilder, "byteStream", iBuilder->getScalarField("offsetPos")),
579                    iBuilder->getSizeTy()
580            ),
581            iBuilder->CreateShl(
582                    iBuilder->CreateZExt(this->generateLoadSourceInputByte(
583                            iBuilder,
584                            "byteStream",
585                            iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1))),
586                                         iBuilder->getSizeTy()
587                    ),
588                    iBuilder->getSize(8)
589            )
590    );
591//    iBuilder->CallPrintInt("matchOffset", matchOffset);
592
593
594    this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
595//    iBuilder->CallPrintInt("m0Start", outputPos);
596    this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
597//    iBuilder->CallPrintInt("m0End", outputEndPos);
598    this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
599//    iBuilder->CallPrintInt("matchOffset", matchOffset);
600
601
602    this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
603    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
604//    iBuilder->CallPrintInt("bbb", this->getCursorValue(iBuilder, "extender"));
605
606    iBuilder->CreateBr(compressedBlockLoopFinal);
607
608    // HandleM0Else
609    iBuilder->SetInsertPoint(handleM0ElseBlock);
610    this->advanceCursorUntilPos(iBuilder, "extender", iBuilder->getScalarField("offsetPos"));
611
612    // Store final M0 pos to make sure the bit stream will be long enough
613    Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
614    this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
615    this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
616    this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
617
618    iBuilder->CreateBr(compressedBlockLoopFinal);
619
620    // final
621    iBuilder->SetInsertPoint(compressedBlockLoopFinal);
622    iBuilder->CreateBr(compressedBlockLoopCon);
623
624    // Exit
625    iBuilder->SetInsertPoint(exitBlock);
626
627    return exitBlock;
628}
629
630void LZ4ExtractEM0Kernel::generateRecordUncompressedBlock(const unique_ptr<kernel::KernelBuilder> & iBuilder) {
631    Value* blockStart = this->loadCurrentBlockData(iBuilder, "blockStart");
632    Value* blockEnd = this->loadCurrentBlockData(iBuilder, "blockEnd");
633    Value* length = iBuilder->CreateSub(blockEnd, blockStart);
634    Value* outputPos = iBuilder->getScalarField("m0OutputPos");
635    this->increaseScalarField(iBuilder, "m0OutputPos", length);
636
637    // Store Uncompressed Data
638    this->generateStoreCircularOutput(iBuilder, "uncompressedStartPos", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
639    this->generateStoreCircularOutput(iBuilder, "uncompressedLength", iBuilder->getInt64Ty()->getPointerTo(), length);
640    this->generateStoreCircularOutput(iBuilder, "uncompressedOutputPos", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
641}
642
643void LZ4ExtractEM0Kernel::generateIncreaseBlockDataIndex(const unique_ptr<kernel::KernelBuilder> & iBuilder) {
644    this->increaseScalarField(iBuilder, "blockDataIndex", iBuilder->getSize(1));
645}
646
647Value* LZ4ExtractEM0Kernel::loadCurrentBlockData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::string& name) {
648    Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
649    return this->generateLoadCircularInput(iBuilder, name, blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
650}
651
652LZ4ExtractEM0Kernel::LZ4ExtractEM0Kernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, const std::map<std::string, size_t>& inputIndexMap):
653        SequentialKernel(
654                iBuilder,
655                "lz4_extract_e_m0_kernel",
656                {//Inputs
657                        Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
658                        Binding{iBuilder->getStreamSetTy(1, 1), "extender"},
659                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX"},
660                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF"},
661
662                        // block data
663                        Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1)},
664                        Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1)},
665                        Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1)}
666                },
667                {//Outputs
668                        // Uncompressed_data
669                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos", BoundedRate(0, 1)},
670                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength", BoundedRate(0, 1)},
671                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos", BoundedRate(0, 1)},
672
673                        Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
674                        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
675                        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
676                        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
677                },
678                {//Arguments
679                },
680                {},
681                {//Internal States
682                        Binding{iBuilder->getSizeTy(), "blockDataIndex"},
683                        Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
684                        Binding{iBuilder->getSizeTy(), "tokenPos"},
685                        Binding{iBuilder->getInt8Ty(), "token"},
686                        Binding{iBuilder->getSizeTy(), "matchLengthStartPos"},
687                        Binding{iBuilder->getSizeTy(), "offsetPos"}
688//                        Binding{iBuilder->getInt64Ty(), "temp_TokenMarkers"}
689                }
690        ) {
691    this->initBufferCursor(iBuilder, {"extender"});
692}
Note: See TracBrowser for help on using the repository browser.