source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_extract_e_m0.cpp @ 5922

Last change on this file since 5922 was 5922, checked in by xwa163, 16 months ago

Remove some debug printing in lz4_extract_e_m0_kernel

File size: 31.5 KB
Line 
1
2#include "lz4_extract_e_m0.h"
3#include <kernels/kernel_builder.h>
4#include <toolchain/toolchain.h>
5#include <kernels/streamset.h>
6#include <iostream>
7
8//#define APPLY_64PACK_ACCELERATION
9// TODO May be we can change it to 256 PACK Acceleration based on SIMD instruction
10
11#define ACCELERATION_LOOP_COUNT (20)
12
13using namespace llvm;
14using namespace kernel;
15using namespace std;
16
17void LZ4ExtractEM0Kernel::generateDoSequentialSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
18    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
19
20    BasicBlock* blockDataLoopCon = iBuilder->CreateBasicBlock("block_data_loop_con");
21    BasicBlock* blockDataLoopProcess = iBuilder->CreateBasicBlock("block_data_loop_process");
22    BasicBlock* blockDataLoopCompressed = iBuilder->CreateBasicBlock("block_data_loop_compressed");
23    BasicBlock* blockDataLoopUncompressed = iBuilder->CreateBasicBlock("block_data_loop_uncompressed");
24
25    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exit_block");
26
27    iBuilder->CreateBr(blockDataLoopCon);
28
29    // blockDataLoopCon
30    iBuilder->SetInsertPoint(blockDataLoopCon);
31    Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
32    Value* availableBlockData = iBuilder->getAvailableItemCount("blockStart");
33    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, availableBlockData), blockDataLoopProcess, exitBlock);
34
35    // blockDataLoopProcess
36    iBuilder->SetInsertPoint(blockDataLoopProcess);
37    Value* isCompressed = this->generateLoadCircularInput(iBuilder, "isCompressed", blockDataIndex, iBuilder->getInt1Ty()->getPointerTo());
38    iBuilder->CreateCondBr(isCompressed, blockDataLoopCompressed, blockDataLoopUncompressed);
39
40    // blockDataLoop Compressed
41    iBuilder->SetInsertPoint(blockDataLoopCompressed);
42    this->generateHandleCompressedBlock(iBuilder);
43
44
45    this->generateIncreaseBlockDataIndex(iBuilder);
46    iBuilder->CreateBr(blockDataLoopCon);
47
48
49    // blockDataLoop Uncompressed
50    iBuilder->SetInsertPoint(blockDataLoopUncompressed);
51    //handle uncompressed block
52    this->generateRecordUncompressedBlock(iBuilder);
53//    iBuilder->setProducedItemCount("e1", this->loadCurrentBlockData(iBuilder, "blockEnd"));
54    this->generateIncreaseBlockDataIndex(iBuilder);
55    iBuilder->CreateBr(blockDataLoopCon);
56
57    //Exit
58    iBuilder->SetInsertPoint(exitBlock);
59
60}
61
62BasicBlock* LZ4ExtractEM0Kernel::generateHandleCompressedBlock(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
63    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
64    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_exit");
65
66    Value* blockStart = this->loadCurrentBlockData(iBuilder, "blockStart");
67
68    this->advanceCursorUntilPos(iBuilder, "extender", iBuilder->CreateZExtOrTrunc(blockStart, iBuilder->getSizeTy()));
69
70    BasicBlock* compressedBlockLoopCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_con");
71    BasicBlock* compressedBlockLoopBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_body");
72    BasicBlock* compressedBlockLoopFinal = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_final");
73
74    iBuilder->CreateBr(compressedBlockLoopCon);
75
76    // compressedBlockLoopCon
77    iBuilder->SetInsertPoint(compressedBlockLoopCon);
78    Value* cursorValue = this->getCursorValue(iBuilder, "extender");
79    Value* blockEndPos = this->loadCurrentBlockData(iBuilder, "blockEnd");
80    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(cursorValue, blockEndPos), compressedBlockLoopBody, exitBlock);
81
82    // body
83    iBuilder->SetInsertPoint(compressedBlockLoopBody);
84
85#ifdef APPLY_64PACK_ACCELERATION
86    BasicBlock* accelerationEndBlock = iBuilder->CreateBasicBlock("acceleration_end_block");
87    BasicBlock* accelerationFinishBlock = iBuilder->CreateBasicBlock("acceleration_finish_block");
88
89    iBuilder->SetInsertPoint(accelerationEndBlock);
90
91    PHINode* phiTokenMarkers = iBuilder->CreatePHI(iBuilder->getInt64Ty(), ACCELERATION_LOOP_COUNT + 1);
92    PHINode* phiE1FinalValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), ACCELERATION_LOOP_COUNT + 1);
93    Value* extenderOffset = this->getCursorValue(iBuilder, "extender");
94    Value* packBaseOffset = this->offsetToPackBaseOffset(iBuilder, extenderOffset);
95
96    //    Value* tempTokenMarkers = iBuilder->getScalarField("temp_TokenMarkers");
97    Value *tokenPackPos = iBuilder->CreateSub(
98            iBuilder->CreateSub(
99                    iBuilder->getSize(64),
100                    iBuilder->CreateCountReverseZeroes(phiTokenMarkers)
101            ),
102            iBuilder->getSize(1)
103    );
104
105    Value *tokenActualPos = iBuilder->CreateAdd(packBaseOffset, tokenPackPos);
106
107    //TODO output here
108    {
109        Value* targetPackIndex = iBuilder->CreateSub(iBuilder->getSize(64), iBuilder->CreateCountReverseZeroes(phiE1FinalValue));
110        Value* targetActualIndex = iBuilder->CreateAdd(targetPackIndex, packBaseOffset);
111        Value* preActualIndex = iBuilder->getProducedItemCount("e1Marker");
112        targetActualIndex = iBuilder->CreateSelect(
113                iBuilder->CreateICmpUGE(targetActualIndex, preActualIndex),
114                targetActualIndex,
115                preActualIndex
116        );
117        this->markCircularOutputBitstream(iBuilder, "e1Marker", preActualIndex, tokenActualPos, false, false);
118
119        Value* extenderOffset = this->getCursorValue(iBuilder, "extender");
120        Value* targetOutputIndex = iBuilder->CreateLShr(extenderOffset, iBuilder->getSize(std::log2(64)));
121        size_t packNum = this->getOutputBufferSize(iBuilder, "e1Marker") / 64;
122        Value* maskedPackIndex = iBuilder->CreateAnd(targetOutputIndex, iBuilder->getSize(packNum - 1));
123        Value* accelerationOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
124        accelerationOutputPtr = iBuilder->CreateGEP(accelerationOutputPtr, maskedPackIndex);
125
126//        iBuilder->CallPrintInt("expected", iBuilder->CreateLoad(accelerationOutputPtr));
127
128        Value* v = iBuilder->CreateOr(iBuilder->CreateLoad(accelerationOutputPtr), phiE1FinalValue);
129        // TODO phiE1FinalValue
130        iBuilder->CreateStore(v, accelerationOutputPtr);
131//        iBuilder->CallPrintInt("actual", iBuilder->CreateLoad(accelerationOutputPtr));
132
133        iBuilder->setProducedItemCount("e1Marker", targetActualIndex);
134    }
135
136
137
138    this->advanceCursorUntilPos(iBuilder, "extender", tokenActualPos);
139    iBuilder->CreateBr(accelerationFinishBlock);
140
141
142    iBuilder->SetInsertPoint(compressedBlockLoopBody);
143    //------------------------------------ 64 Pack Acceleration Start
144    this->waitCursorUntilInputAvailable(iBuilder, "extender", "extender");
145
146    Value* currentBlockEnd = this->loadCurrentBlockData(iBuilder, "blockEnd");
147
148    extenderOffset = this->getCursorValue(iBuilder, "extender");
149    Value* targetOutputIndex = iBuilder->CreateLShr(extenderOffset, iBuilder->getSize(std::log2(64)));
150    size_t packNum = this->getOutputBufferSize(iBuilder, "e1Marker") / 64;
151    Value* maskedPackIndex = iBuilder->CreateAnd(targetOutputIndex, iBuilder->getSize(packNum - 1));
152    Value* accelerationOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
153    accelerationOutputPtr = iBuilder->CreateGEP(accelerationOutputPtr, maskedPackIndex);
154
155
156
157//    Value* accelerationPackIndex = this->offsetToPackIndex(iBuilder, extenderOffset);
158//    Value* maskedAccelerationPackIndex = this->generateLoadCircularInputPack()
159
160
161    packBaseOffset = this->offsetToPackBaseOffset(iBuilder, extenderOffset);
162    Value* packOffset = this->offsetToPackOffset(iBuilder, extenderOffset);
163
164    Value* extenderPackData = this->generateLoadCircularInputPack(iBuilder, "extender", extenderOffset);
165    Value* CC_0xFXPackData = this->generateLoadCircularInputPack(iBuilder, "CC_0xFX", extenderOffset);
166    Value* CC_0xXFPackData = this->generateLoadCircularInputPack(iBuilder, "CC_0xXF", extenderOffset);
167
168    // extend_match_only_byte = CC_0xFX &~ CC_0xXF
169    Value* extendMatchOnlyBytePack = iBuilder->CreateAnd(
170            CC_0xXFPackData,
171            iBuilder->CreateNot(
172                    CC_0xFXPackData
173            )
174    );
175
176    Value* tokenMarkers = iBuilder->CreateShl(iBuilder->getInt64(1), packOffset);
177//    Value* outputInitValue = iBuilder->CreateLoad(accelerationOutputPtr);
178    Value* outputInitValue = iBuilder->getInt64(0);
179
180
181//    iBuilder->setScalarField("temp_TokenMarkers", tokenMarkers);
182
183    for (int i = 0; i < ACCELERATION_LOOP_COUNT; i++) {
184        Value *tokenPackPos = iBuilder->CreateSub(
185                iBuilder->CreateSub(
186                        iBuilder->getSize(64),
187                        iBuilder->CreateCountReverseZeroes(tokenMarkers)
188                ),
189                iBuilder->getSize(1)
190        );
191        Value *tokenActualPos = iBuilder->CreateAdd(packBaseOffset, tokenPackPos);
192
193        Value *tokenValue = iBuilder->CreateZExt(
194                this->generateLoadSourceInputByte(iBuilder, "byteStream", tokenActualPos),
195                iBuilder->getSizeTy()
196        );
197
198        Value *literalLengthBase =
199                iBuilder->CreateLShr(
200                        tokenValue,
201                        iBuilder->getSize(4)
202                );
203
204        Value* matchLengthBase = iBuilder->CreateAnd(
205                tokenValue,
206                iBuilder->getSize(0x0f)
207        );
208        matchLengthBase = iBuilder->CreateAdd(matchLengthBase, iBuilder->getSize(0x4));
209
210        Value *tokenMarker = iBuilder->CreateShl(iBuilder->getInt64(1), tokenPackPos);
211        Value * notExtendLiteralMarker = iBuilder->CreateAnd(
212                tokenMarker,
213                iBuilder->CreateNot(CC_0xFXPackData)
214        );
215        ////
216        Value* expectedNotExtendNextTokenMarker = iBuilder->CreateShl(
217                notExtendLiteralMarker,
218                iBuilder->CreateAdd(literalLengthBase, iBuilder->getSize(3))
219        );  // If not extend literal or match, next token pos will be here, 1 (token) + literalLengthBase + 2 (match offset)
220
221
222        Value *matchExtendOnlyMarker = iBuilder->CreateAnd(
223                tokenMarker,
224                extendMatchOnlyBytePack
225        );
226
227        Value *extenderMarker = iBuilder->CreateShl(
228                matchExtendOnlyMarker,
229                iBuilder->CreateAdd(literalLengthBase, iBuilder->getSize(2))
230        );  // Match offset
231
232        //ScanThru
233        ////
234        Value *expectedNextTokenMarker = iBuilder->CreateAnd(
235                iBuilder->CreateAdd(
236                        extenderMarker,
237                        iBuilder->CreateOr(
238                                extenderMarker,
239                                extenderPackData
240                        )
241                ),
242                iBuilder->CreateNot(extenderPackData)
243        );
244        expectedNextTokenMarker = iBuilder->CreateShl(
245                expectedNextTokenMarker,
246                iBuilder->getSize(1)
247        );
248
249        Value* needExtendMatch = iBuilder->CreateNot(iBuilder->CreateICmpEQ(matchExtendOnlyMarker, iBuilder->getSize(0)));
250
251        expectedNextTokenMarker = iBuilder->CreateSelect(
252                needExtendMatch,
253                expectedNextTokenMarker,
254                expectedNotExtendNextTokenMarker
255        );
256
257        Value *expectedNextTokenPos = iBuilder->CreateAdd(
258                packBaseOffset,
259                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker)
260        );
261        Value *reachBlockEnd = iBuilder->CreateICmpUGE(
262                expectedNextTokenPos,
263                currentBlockEnd
264        );
265
266        expectedNextTokenMarker = iBuilder->CreateSelect(
267                reachBlockEnd,
268                iBuilder->getInt64(0),
269                expectedNextTokenMarker
270        );
271
272        tokenMarkers = iBuilder->CreateOr(tokenMarkers, expectedNextTokenMarker, "tokenMarkers");
273//        iBuilder->setScalarField("temp_TokenMarkers", tokenMarkers);
274
275        Value* matchOffsetActualPos = iBuilder->CreateAdd(
276                tokenActualPos,
277                iBuilder->CreateAdd(
278                        iBuilder->getSize(1),
279                        literalLengthBase
280                )
281        );
282
283        Value* matchOffsetValue = iBuilder->CreateAdd(
284                iBuilder->CreateZExt(
285                        this->generateLoadSourceInputByte(iBuilder, "byteStream", matchOffsetActualPos),
286                        iBuilder->getSizeTy()
287                ),
288                iBuilder->CreateShl(
289                        iBuilder->CreateZExt(this->generateLoadSourceInputByte(
290                                iBuilder,
291                                "byteStream",
292                                iBuilder->CreateAdd(matchOffsetActualPos , iBuilder->getSize(1))),
293                                             iBuilder->getSizeTy()
294                        ),
295                        iBuilder->getSize(8)
296                )
297        );
298
299
300        Value* shouldEndAcceleration = iBuilder->CreateOr(
301                iBuilder->CreateICmpEQ(expectedNextTokenMarker, iBuilder->getSize(0)),
302                reachBlockEnd
303        );
304
305
306        BasicBlock *pack64AcceMatchExtendOnlyExitBlock = iBuilder->CreateBasicBlock(
307                "pack64AcceMatchExtendOnlyExitBlock");
308
309        phiTokenMarkers->addIncoming(tokenMarkers, iBuilder->GetInsertBlock());
310        phiE1FinalValue->addIncoming(outputInitValue, iBuilder->GetInsertBlock());
311
312        iBuilder->CreateUnlikelyCondBr(
313                shouldEndAcceleration,
314                accelerationEndBlock,
315                pack64AcceMatchExtendOnlyExitBlock
316        );
317
318        iBuilder->SetInsertPoint(pack64AcceMatchExtendOnlyExitBlock);
319
320        // ------------------
321        // Yellow Logic Start
322        Value *nextTokenPos = iBuilder->CreateAdd(
323                packBaseOffset,
324                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker)
325        );
326
327        Value *matchExtendLastBitPos = iBuilder->CreateSub(
328                nextTokenPos,
329                iBuilder->getSize(1)
330        );
331
332        Value *matchExtendLastBitValue = this->generateLoadSourceInputByte(iBuilder, "byteStream",
333                                                                           matchExtendLastBitPos);
334        matchExtendLastBitValue = iBuilder->CreateZExt(matchExtendLastBitValue, iBuilder->getSizeTy());
335        Value *matchLength = iBuilder->CreateAdd(
336                matchExtendLastBitValue,
337                matchLengthBase
338        );
339
340        Value *matchExtendLength = iBuilder->CreateSub(
341                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker),
342                iBuilder->CreateCountForwardZeroes(extenderMarker)
343        );
344
345        matchLength = iBuilder->CreateSelect(
346                needExtendMatch,
347                iBuilder->CreateAdd(
348                        matchLength,
349                        iBuilder->CreateMul(
350                                iBuilder->CreateSub(matchExtendLength, iBuilder->getSize(2)),
351                                iBuilder->getSize(0xff)
352                        )
353                ),
354                matchLengthBase
355        );
356
357        Value* oldM0OutputPos = iBuilder->getScalarField("m0OutputPos");
358
359
360        // Mark E1
361        Value *expectedNewOffsetPos = iBuilder->CreateAdd(
362                tokenActualPos,
363                iBuilder->CreateAdd(
364                        literalLengthBase,
365                        iBuilder->getSize(1)
366                )
367        );
368        Value *newOffsetPos = expectedNewOffsetPos;
369        iBuilder->setScalarField("offsetPos", newOffsetPos);
370
371        // Yellow Logic End
372        // ------------------------------
373        // Blue Logic Start
374
375        // e1 start:tokenActualPos, e1 end:expectedNewOffsetPos,
376//        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), tokenActualPos, false);
377//        this->markCircularOutputBitstream(iBuilder, "e1Marker", tokenActualPos, expectedNewOffsetPos, true);
378
379
380        Value* newMask = iBuilder->CreateSub(
381                iBuilder->CreateShl(iBuilder->getInt64(1), iBuilder->CreateSub(expectedNewOffsetPos, packBaseOffset)),
382                iBuilder->CreateShl(iBuilder->getInt64(1), iBuilder->CreateAdd(iBuilder->CreateSub(tokenActualPos, packBaseOffset), iBuilder->getSize(1)))
383        );
384
385        outputInitValue = iBuilder->CreateOr(outputInitValue, newMask);
386
387        Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
388
389
390        Value* m0OutputPos = iBuilder->CreateAdd(oldM0OutputPos, literalLengthBase);
391
392        // Mark M0
393        Value *outputEndPos = iBuilder->CreateSub(
394                iBuilder->CreateAdd(m0OutputPos, matchLength),
395                iBuilder->getInt64(1)
396        );
397
398        this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), m0OutputPos);
399//        iBuilder->CallPrintInt("m0Start", m0OutputPos);
400        this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
401//        iBuilder->CallPrintInt("m0End", outputEndPos);
402        this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffsetValue);
403//        iBuilder->CallPrintInt("matchOffset1", matchOffsetValue);
404
405        m0OutputPos = iBuilder->CreateAdd(m0OutputPos, matchLength);
406        iBuilder->setScalarField("m0OutputPos", m0OutputPos);
407
408        // Blue Logic End
409        // ---------------------------
410    }
411    phiTokenMarkers->addIncoming(tokenMarkers, iBuilder->GetInsertBlock());
412    phiE1FinalValue->addIncoming(outputInitValue, iBuilder->GetInsertBlock());
413
414    //------------------------------------ 64 Pack Acceleration End
415    // Config Extender Cursor after Acceleration
416    iBuilder->CreateBr(accelerationEndBlock);
417
418    iBuilder->SetInsertPoint(accelerationFinishBlock);
419
420    //------------------------------------- Finish
421#endif
422
423//    iBuilder->CallPrintInt("tokenPos", this->getCursorValue(iBuilder, "extender"));
424    Value* token = this->generateLoadSourceInputByte(iBuilder, "byteStream", this->getCursorValue(iBuilder, "extender"));
425
426//    iBuilder->CallPrintInt("tokenPos", this->getCursorValue(iBuilder, "extender"));
427//    iBuilder->CallPrintInt("token", token);
428
429//    iBuilder->CreateAssert(iBuilder->CreateICmpULT(this->getCursorValue(iBuilder, "extender"), iBuilder->getSize(0xcb32a)), "ee");
430    iBuilder->setScalarField("token", token);
431
432    Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
433
434    // TokenPos can not be refered by next few statements since they may be in different segment
435    iBuilder->setScalarField("tokenPos", this->getCursorValue(iBuilder, "extender"));
436
437    BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
438    BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
439
440    iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
441
442    iBuilder->SetInsertPoint(extendLiteralLengthBody);
443
444    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
445
446    this->advanceCursorUntilNextZero(iBuilder, "extender", "extender", this->loadCurrentBlockData(iBuilder, "blockEnd"));
447
448    iBuilder->CreateBr(extendLiteralLengthExit);
449
450    iBuilder->SetInsertPoint(extendLiteralLengthExit);
451    // ----May be in a different segment now
452    Value* literalLengthEndPos = this->getCursorValue(iBuilder, "extender");
453    Value* literalExtensionSize = iBuilder->CreateSub(literalLengthEndPos, iBuilder->getScalarField("tokenPos"));
454//    iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
455    Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, "byteStream", this->getCursorValue(iBuilder, "extender"));
456
457    finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getSizeTy());
458    Value* literalLengthExtendValue = iBuilder->CreateSelect(
459            iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
460            iBuilder->CreateAdd(
461                    iBuilder->CreateMul(
462                            iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
463                            iBuilder->getSize(255)
464                    ),
465                    finalLengthByte
466            ),
467            iBuilder->getSize(0)
468    );
469    literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
470    token = iBuilder->getScalarField("token");
471    Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
472    Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
473
474
475    Value* offsetPos = iBuilder->CreateAdd(
476            iBuilder->CreateAdd(
477                    literalLengthEndPos,
478                    literalLength),
479            iBuilder->getSize(1));
480    iBuilder->setScalarField("offsetPos", offsetPos);
481//    iBuilder->CallPrintInt("offsetPos", offsetPos);
482//    iBuilder->CallPrintInt("literalStart", iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)));
483//    iBuilder->CallPrintInt("literalLength", literalLength);
484    this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)), false);
485    this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)), offsetPos, true);
486
487    Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
488
489    this->increaseScalarField(iBuilder, "m0OutputPos", literalLength);
490
491
492    BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
493    BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
494
495    iBuilder->CreateCondBr(
496            iBuilder->CreateICmpULT(offsetPos, this->loadCurrentBlockData(iBuilder, "blockEnd")),
497            handleM0BodyBlock,
498            handleM0ElseBlock
499    );
500    // HandleM0Body
501
502    iBuilder->SetInsertPoint(handleM0BodyBlock);
503
504    Value* matchLengthStartPos = iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1));
505    iBuilder->setScalarField("matchLengthStartPos", matchLengthStartPos);
506    this->advanceCursorUntilPos(iBuilder, "extender", matchLengthStartPos);
507
508
509    token = iBuilder->getScalarField("token");
510
511    Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf), "extendedMatchValue");
512
513    BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
514    BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
515
516    iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
517
518    iBuilder->SetInsertPoint(extendMatchBodyBlock);
519
520    //ExtendMatchBodyBlock
521    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
522    this->advanceCursorUntilNextZero(iBuilder, "extender", "extender", this->loadCurrentBlockData(iBuilder, "blockEnd"));
523
524    // ----May be in a different segment now
525    iBuilder->CreateBr(extendMatchExitBlock);
526
527    //ExtendMatchExitBlock
528    iBuilder->SetInsertPoint(extendMatchExitBlock);
529    matchLengthStartPos = iBuilder->getScalarField("matchLengthStartPos");
530    Value* oldMatchExtensionSize = iBuilder->CreateSub(this->getCursorValue(iBuilder, "extender"), matchLengthStartPos);
531//    iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
532
533    token = iBuilder->getScalarField("token");
534    extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
535
536    Value* matchExtensionSize = iBuilder->CreateSelect(
537            iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
538            oldMatchExtensionSize,
539            iBuilder->getSize(0)
540    );
541
542
543    Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
544    Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
545
546
547    Value* extensionLastBitPos = iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1));
548    extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
549    Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, "byteStream", extensionLastBitPos);
550    extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
551
552
553    Value* matchLengthAddValue = iBuilder->CreateSelect(
554            iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
555            iBuilder->CreateAdd(
556                    iBuilder->CreateMul(
557                            iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
558                            iBuilder->getSize(255)
559                    ),
560                    extensionLastBitValue
561            )
562            ,
563            iBuilder->getSize(0)
564    );
565    matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
566
567    matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
568
569    Value* outputPos = iBuilder->getScalarField("m0OutputPos");
570
571    Value* outputEndPos = iBuilder->CreateSub(
572            iBuilder->CreateAdd(outputPos, matchLength),
573            iBuilder->getInt64(1)
574    );
575
576    Value* matchOffset = iBuilder->CreateAdd(
577            iBuilder->CreateZExt(
578                    this->generateLoadSourceInputByte(iBuilder, "byteStream", iBuilder->getScalarField("offsetPos")),
579                    iBuilder->getSizeTy()
580            ),
581            iBuilder->CreateShl(
582                    iBuilder->CreateZExt(this->generateLoadSourceInputByte(
583                            iBuilder,
584                            "byteStream",
585                            iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1))),
586                                         iBuilder->getSizeTy()
587                    ),
588                    iBuilder->getSize(8)
589            )
590    );
591//    iBuilder->CallPrintInt("matchOffset", matchOffset);
592
593
594    this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
595//    iBuilder->CallPrintInt("m0Start", outputPos);
596    this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
597//    iBuilder->CallPrintInt("m0End", outputEndPos);
598    this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
599//    iBuilder->CallPrintInt("matchOffset", matchOffset);
600
601
602    this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
603    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
604//    iBuilder->CallPrintInt("bbb", this->getCursorValue(iBuilder, "extender"));
605
606    iBuilder->CreateBr(compressedBlockLoopFinal);
607
608    // HandleM0Else
609    iBuilder->SetInsertPoint(handleM0ElseBlock);
610    this->advanceCursorUntilPos(iBuilder, "extender", iBuilder->getScalarField("offsetPos"));
611
612    // Store final M0 pos to make sure the bit stream will be long enough
613    Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
614    this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
615    this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
616    this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
617
618    iBuilder->CreateBr(compressedBlockLoopFinal);
619
620    // final
621    iBuilder->SetInsertPoint(compressedBlockLoopFinal);
622    iBuilder->CreateBr(compressedBlockLoopCon);
623
624    // Exit
625    iBuilder->SetInsertPoint(exitBlock);
626
627    return exitBlock;
628}
629
630void LZ4ExtractEM0Kernel::generateRecordUncompressedBlock(const unique_ptr<kernel::KernelBuilder> & iBuilder) {
631    Value* blockStart = this->loadCurrentBlockData(iBuilder, "blockStart");
632    Value* blockEnd = this->loadCurrentBlockData(iBuilder, "blockEnd");
633    Value* length = iBuilder->CreateSub(blockEnd, blockStart);
634    Value* outputPos = iBuilder->getScalarField("m0OutputPos");
635    this->increaseScalarField(iBuilder, "m0OutputPos", length);
636
637    // Store Uncompressed Data
638    this->generateStoreCircularOutput(iBuilder, "uncompressedStartPos", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
639    this->generateStoreCircularOutput(iBuilder, "uncompressedLength", iBuilder->getInt64Ty()->getPointerTo(), length);
640    this->generateStoreCircularOutput(iBuilder, "uncompressedOutputPos", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
641}
642
643void LZ4ExtractEM0Kernel::generateIncreaseBlockDataIndex(const unique_ptr<kernel::KernelBuilder> & iBuilder) {
644    this->increaseScalarField(iBuilder, "blockDataIndex", iBuilder->getSize(1));
645}
646
647Value* LZ4ExtractEM0Kernel::loadCurrentBlockData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::string& name) {
648    Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
649    return this->generateLoadCircularInput(iBuilder, name, blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
650}
651
652LZ4ExtractEM0Kernel::LZ4ExtractEM0Kernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, const std::map<std::string, size_t>& inputIndexMap):
653        SequentialKernel(
654                iBuilder,
655                "lz4_extract_e_m0_kernel",
656                {//Inputs
657                        Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
658                        Binding{iBuilder->getStreamSetTy(1, 1), "extender"},
659                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX"},
660                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF"},
661
662                        // block data
663                        Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1)},
664                        Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1)},
665                        Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1)}
666                },
667                {//Outputs
668                        // Uncompressed_data
669                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos", BoundedRate(0, 1)},
670                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength", BoundedRate(0, 1)},
671                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos", BoundedRate(0, 1)},
672
673                        Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
674                        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
675                        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
676                        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
677                },
678                {//Arguments
679                },
680                {},
681                {//Internal States
682                        Binding{iBuilder->getSizeTy(), "blockDataIndex"},
683                        Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
684                        Binding{iBuilder->getSizeTy(), "tokenPos"},
685                        Binding{iBuilder->getInt8Ty(), "token"},
686                        Binding{iBuilder->getSizeTy(), "matchLengthStartPos"},
687                        Binding{iBuilder->getSizeTy(), "offsetPos"}
688//                        Binding{iBuilder->getInt64Ty(), "temp_TokenMarkers"}
689                }
690        ) {
691    this->initBufferCursor(iBuilder, {"extender"});
692}
Note: See TracBrowser for help on using the repository browser.