source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_extract_e_m0.cpp @ 5905

Last change on this file since 5905 was 5905, checked in by xwa163, 16 months ago
  1. Fix some bugs in match copy kernel
  2. Remove some legacy codes from match copy kernel and sequential kernel
File size: 31.3 KB
Line 
1
2#include "lz4_extract_e_m0.h"
3#include <kernels/kernel_builder.h>
4#include <toolchain/toolchain.h>
5#include <kernels/streamset.h>
6#include <iostream>
7
8#define APPLY_64PACK_ACCELERATION
9// TODO May be we can change it to 256 PACK Acceleration based on SIMD instruction
10
11#define ACCELERATION_LOOP_COUNT (20)
12
13using namespace llvm;
14using namespace kernel;
15using namespace std;
16
17void LZ4ExtractEM0Kernel::generateDoSequentialSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
18    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
19
20    BasicBlock* blockDataLoopCon = iBuilder->CreateBasicBlock("block_data_loop_con");
21    BasicBlock* blockDataLoopProcess = iBuilder->CreateBasicBlock("block_data_loop_process");
22    BasicBlock* blockDataLoopCompressed = iBuilder->CreateBasicBlock("block_data_loop_compressed");
23    BasicBlock* blockDataLoopUncompressed = iBuilder->CreateBasicBlock("block_data_loop_uncompressed");
24
25    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exit_block");
26
27    iBuilder->CreateBr(blockDataLoopCon);
28
29    // blockDataLoopCon
30    iBuilder->SetInsertPoint(blockDataLoopCon);
31    Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
32    Value* availableBlockData = iBuilder->getAvailableItemCount("blockStart");
33    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, availableBlockData), blockDataLoopProcess, exitBlock);
34
35    // blockDataLoopProcess
36    iBuilder->SetInsertPoint(blockDataLoopProcess);
37    Value* isCompressed = this->generateLoadCircularInput(iBuilder, "isCompressed", blockDataIndex, iBuilder->getInt1Ty()->getPointerTo());
38    iBuilder->CreateCondBr(isCompressed, blockDataLoopCompressed, blockDataLoopUncompressed);
39
40    // blockDataLoop Compressed
41    iBuilder->SetInsertPoint(blockDataLoopCompressed);
42    this->generateHandleCompressedBlock(iBuilder);
43
44
45    this->generateIncreaseBlockDataIndex(iBuilder);
46    iBuilder->CreateBr(blockDataLoopCon);
47
48
49    // blockDataLoop Uncompressed
50    iBuilder->SetInsertPoint(blockDataLoopUncompressed);
51    //handle uncompressed block
52    this->generateRecordUncompressedBlock(iBuilder);
53//    iBuilder->setProducedItemCount("e1", this->loadCurrentBlockData(iBuilder, "blockEnd"));
54    this->generateIncreaseBlockDataIndex(iBuilder);
55    iBuilder->CreateBr(blockDataLoopCon);
56
57    //Exit
58    iBuilder->SetInsertPoint(exitBlock);
59
60}
61
62BasicBlock* LZ4ExtractEM0Kernel::generateHandleCompressedBlock(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
63    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
64    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_exit");
65
66    Value* blockStart = this->loadCurrentBlockData(iBuilder, "blockStart");
67
68    this->advanceCursorUntilPos(iBuilder, "extender", iBuilder->CreateZExtOrTrunc(blockStart, iBuilder->getSizeTy()));
69
70    BasicBlock* compressedBlockLoopCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_con");
71    BasicBlock* compressedBlockLoopBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_body");
72    BasicBlock* compressedBlockLoopFinal = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_final");
73
74    iBuilder->CreateBr(compressedBlockLoopCon);
75
76    // compressedBlockLoopCon
77    iBuilder->SetInsertPoint(compressedBlockLoopCon);
78    Value* cursorValue = this->getCursorValue(iBuilder, "extender");
79    Value* blockEndPos = this->loadCurrentBlockData(iBuilder, "blockEnd");
80    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(cursorValue, blockEndPos), compressedBlockLoopBody, exitBlock);
81
82    // body
83    iBuilder->SetInsertPoint(compressedBlockLoopBody);
84
85#ifdef APPLY_64PACK_ACCELERATION
86    BasicBlock* accelerationEndBlock = iBuilder->CreateBasicBlock("acceleration_end_block");
87    BasicBlock* accelerationFinishBlock = iBuilder->CreateBasicBlock("acceleration_finish_block");
88
89    iBuilder->SetInsertPoint(accelerationEndBlock);
90
91    PHINode* phiTokenMarkers = iBuilder->CreatePHI(iBuilder->getInt64Ty(), ACCELERATION_LOOP_COUNT + 1);
92    PHINode* phiE1FinalValue = iBuilder->CreatePHI(iBuilder->getInt64Ty(), ACCELERATION_LOOP_COUNT + 1);
93    Value* extenderOffset = this->getCursorValue(iBuilder, "extender");
94    Value* packBaseOffset = this->offsetToPackBaseOffset(iBuilder, extenderOffset);
95
96    //    Value* tempTokenMarkers = iBuilder->getScalarField("temp_TokenMarkers");
97    Value *tokenPackPos = iBuilder->CreateSub(
98            iBuilder->CreateSub(
99                    iBuilder->getSize(64),
100                    iBuilder->CreateCountReverseZeroes(phiTokenMarkers)
101            ),
102            iBuilder->getSize(1)
103    );
104
105    Value *tokenActualPos = iBuilder->CreateAdd(packBaseOffset, tokenPackPos);
106
107    //TODO output here
108    {
109        Value* targetPackIndex = iBuilder->CreateSub(iBuilder->getSize(64), iBuilder->CreateCountReverseZeroes(phiE1FinalValue));
110        Value* targetActualIndex = iBuilder->CreateAdd(targetPackIndex, packBaseOffset);
111        Value* preActualIndex = iBuilder->getProducedItemCount("e1Marker");
112        targetActualIndex = iBuilder->CreateSelect(
113                iBuilder->CreateICmpUGE(targetActualIndex, preActualIndex),
114                targetActualIndex,
115                preActualIndex
116        );
117        this->markCircularOutputBitstream(iBuilder, "e1Marker", preActualIndex, tokenActualPos, false, false);
118
119        Value* extenderOffset = this->getCursorValue(iBuilder, "extender");
120        Value* targetOutputIndex = iBuilder->CreateLShr(extenderOffset, iBuilder->getSize(std::log2(64)));
121        size_t packNum = this->getOutputBufferSize(iBuilder, "e1Marker") / 64;
122        Value* maskedPackIndex = iBuilder->CreateAnd(targetOutputIndex, iBuilder->getSize(packNum - 1));
123        Value* accelerationOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
124        accelerationOutputPtr = iBuilder->CreateGEP(accelerationOutputPtr, maskedPackIndex);
125
126//        iBuilder->CallPrintInt("expected", iBuilder->CreateLoad(accelerationOutputPtr));
127
128        Value* v = iBuilder->CreateOr(iBuilder->CreateLoad(accelerationOutputPtr), phiE1FinalValue);
129        // TODO phiE1FinalValue
130        iBuilder->CreateStore(v, accelerationOutputPtr);
131//        iBuilder->CallPrintInt("actual", iBuilder->CreateLoad(accelerationOutputPtr));
132
133        iBuilder->setProducedItemCount("e1Marker", targetActualIndex);
134    }
135
136
137
138    this->advanceCursorUntilPos(iBuilder, "extender", tokenActualPos);
139    iBuilder->CreateBr(accelerationFinishBlock);
140
141
142    iBuilder->SetInsertPoint(compressedBlockLoopBody);
143    //------------------------------------ 64 Pack Acceleration Start
144    this->waitCursorUntilInputAvailable(iBuilder, "extender", "extender");
145
146    Value* currentBlockEnd = this->loadCurrentBlockData(iBuilder, "blockEnd");
147
148    extenderOffset = this->getCursorValue(iBuilder, "extender");
149    Value* targetOutputIndex = iBuilder->CreateLShr(extenderOffset, iBuilder->getSize(std::log2(64)));
150    size_t packNum = this->getOutputBufferSize(iBuilder, "e1Marker") / 64;
151    Value* maskedPackIndex = iBuilder->CreateAnd(targetOutputIndex, iBuilder->getSize(packNum - 1));
152    Value* accelerationOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
153    accelerationOutputPtr = iBuilder->CreateGEP(accelerationOutputPtr, maskedPackIndex);
154
155
156
157//    Value* accelerationPackIndex = this->offsetToPackIndex(iBuilder, extenderOffset);
158//    Value* maskedAccelerationPackIndex = this->generateLoadCircularInputPack()
159
160
161    packBaseOffset = this->offsetToPackBaseOffset(iBuilder, extenderOffset);
162    Value* packOffset = this->offsetToPackOffset(iBuilder, extenderOffset);
163
164    Value* extenderPackData = this->generateLoadCircularInputPack(iBuilder, "extender", extenderOffset);
165    Value* CC_0xFXPackData = this->generateLoadCircularInputPack(iBuilder, "CC_0xFX", extenderOffset);
166    Value* CC_0xXFPackData = this->generateLoadCircularInputPack(iBuilder, "CC_0xXF", extenderOffset);
167
168    // extend_match_only_byte = CC_0xFX &~ CC_0xXF
169    Value* extendMatchOnlyBytePack = iBuilder->CreateAnd(
170            CC_0xXFPackData,
171            iBuilder->CreateNot(
172                    CC_0xFXPackData
173            )
174    );
175
176    Value* tokenMarkers = iBuilder->CreateShl(iBuilder->getInt64(1), packOffset);
177//    Value* outputInitValue = iBuilder->CreateLoad(accelerationOutputPtr);
178    Value* outputInitValue = iBuilder->getInt64(0);
179
180
181//    iBuilder->setScalarField("temp_TokenMarkers", tokenMarkers);
182
183    for (int i = 0; i < ACCELERATION_LOOP_COUNT; i++) {
184        Value *tokenPackPos = iBuilder->CreateSub(
185                iBuilder->CreateSub(
186                        iBuilder->getSize(64),
187                        iBuilder->CreateCountReverseZeroes(tokenMarkers)
188                ),
189                iBuilder->getSize(1)
190        );
191        Value *tokenActualPos = iBuilder->CreateAdd(packBaseOffset, tokenPackPos);
192
193        Value *tokenValue = iBuilder->CreateZExt(
194                this->generateLoadSourceInputByte(iBuilder, "byteStream", tokenActualPos),
195                iBuilder->getSizeTy()
196        );
197
198        Value *literalLengthBase =
199                iBuilder->CreateLShr(
200                        tokenValue,
201                        iBuilder->getSize(4)
202                );
203
204        Value* matchLengthBase = iBuilder->CreateAnd(
205                tokenValue,
206                iBuilder->getSize(0x0f)
207        );
208        matchLengthBase = iBuilder->CreateAdd(matchLengthBase, iBuilder->getSize(0x4));
209
210        Value *tokenMarker = iBuilder->CreateShl(iBuilder->getInt64(1), tokenPackPos);
211        Value * notExtendLiteralMarker = iBuilder->CreateAnd(
212                tokenMarker,
213                iBuilder->CreateNot(CC_0xFXPackData)
214        );
215        ////
216        Value* expectedNotExtendNextTokenMarker = iBuilder->CreateShl(
217                notExtendLiteralMarker,
218                iBuilder->CreateAdd(literalLengthBase, iBuilder->getSize(3))
219        );  // If not extend literal or match, next token pos will be here, 1 (token) + literalLengthBase + 2 (match offset)
220
221
222        Value *matchExtendOnlyMarker = iBuilder->CreateAnd(
223                tokenMarker,
224                extendMatchOnlyBytePack
225        );
226
227        Value *extenderMarker = iBuilder->CreateShl(
228                matchExtendOnlyMarker,
229                iBuilder->CreateAdd(literalLengthBase, iBuilder->getSize(2))
230        );  // Match offset
231
232        //ScanThru
233        ////
234        Value *expectedNextTokenMarker = iBuilder->CreateAnd(
235                iBuilder->CreateAdd(
236                        extenderMarker,
237                        iBuilder->CreateOr(
238                                extenderMarker,
239                                extenderPackData
240                        )
241                ),
242                iBuilder->CreateNot(extenderPackData)
243        );
244        expectedNextTokenMarker = iBuilder->CreateShl(
245                expectedNextTokenMarker,
246                iBuilder->getSize(1)
247        );
248
249        Value* needExtendMatch = iBuilder->CreateNot(iBuilder->CreateICmpEQ(matchExtendOnlyMarker, iBuilder->getSize(0)));
250
251        expectedNextTokenMarker = iBuilder->CreateSelect(
252                needExtendMatch,
253                expectedNextTokenMarker,
254                expectedNotExtendNextTokenMarker
255        );
256
257        Value *expectedNextTokenPos = iBuilder->CreateAdd(
258                packBaseOffset,
259                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker)
260        );
261        Value *reachBlockEnd = iBuilder->CreateICmpUGE(
262                expectedNextTokenPos,
263                currentBlockEnd
264        );
265
266        expectedNextTokenMarker = iBuilder->CreateSelect(
267                reachBlockEnd,
268                iBuilder->getInt64(0),
269                expectedNextTokenMarker
270        );
271
272        tokenMarkers = iBuilder->CreateOr(tokenMarkers, expectedNextTokenMarker, "tokenMarkers");
273//        iBuilder->setScalarField("temp_TokenMarkers", tokenMarkers);
274
275        Value* matchOffsetActualPos = iBuilder->CreateAdd(
276                tokenActualPos,
277                iBuilder->CreateAdd(
278                        iBuilder->getSize(1),
279                        literalLengthBase
280                )
281        );
282
283        Value* matchOffsetValue = iBuilder->CreateAdd(
284                iBuilder->CreateZExt(
285                        this->generateLoadSourceInputByte(iBuilder, "byteStream", matchOffsetActualPos),
286                        iBuilder->getSizeTy()
287                ),
288                iBuilder->CreateShl(
289                        iBuilder->CreateZExt(this->generateLoadSourceInputByte(
290                                iBuilder,
291                                "byteStream",
292                                iBuilder->CreateAdd(matchOffsetActualPos , iBuilder->getSize(1))),
293                                             iBuilder->getSizeTy()
294                        ),
295                        iBuilder->getSize(8)
296                )
297        );
298
299
300        Value* shouldEndAcceleration = iBuilder->CreateOr(
301                iBuilder->CreateICmpEQ(expectedNextTokenMarker, iBuilder->getSize(0)),
302                reachBlockEnd
303        );
304
305
306        BasicBlock *pack64AcceMatchExtendOnlyExitBlock = iBuilder->CreateBasicBlock(
307                "pack64AcceMatchExtendOnlyExitBlock");
308
309        phiTokenMarkers->addIncoming(tokenMarkers, iBuilder->GetInsertBlock());
310        phiE1FinalValue->addIncoming(outputInitValue, iBuilder->GetInsertBlock());
311
312        iBuilder->CreateUnlikelyCondBr(
313                shouldEndAcceleration,
314                accelerationEndBlock,
315                pack64AcceMatchExtendOnlyExitBlock
316        );
317
318        iBuilder->SetInsertPoint(pack64AcceMatchExtendOnlyExitBlock);
319
320        // ------------------
321        // Yellow Logic Start
322        Value *nextTokenPos = iBuilder->CreateAdd(
323                packBaseOffset,
324                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker)
325        );
326
327        Value *matchExtendLastBitPos = iBuilder->CreateSub(
328                nextTokenPos,
329                iBuilder->getSize(1)
330        );
331
332        Value *matchExtendLastBitValue = this->generateLoadSourceInputByte(iBuilder, "byteStream",
333                                                                           matchExtendLastBitPos);
334        matchExtendLastBitValue = iBuilder->CreateZExt(matchExtendLastBitValue, iBuilder->getSizeTy());
335        Value *matchLength = iBuilder->CreateAdd(
336                matchExtendLastBitValue,
337                matchLengthBase
338        );
339
340        Value *matchExtendLength = iBuilder->CreateSub(
341                iBuilder->CreateCountForwardZeroes(expectedNextTokenMarker),
342                iBuilder->CreateCountForwardZeroes(extenderMarker)
343        );
344
345        matchLength = iBuilder->CreateSelect(
346                needExtendMatch,
347                iBuilder->CreateAdd(
348                        matchLength,
349                        iBuilder->CreateMul(
350                                iBuilder->CreateSub(matchExtendLength, iBuilder->getSize(2)),
351                                iBuilder->getSize(0xff)
352                        )
353                ),
354                matchLengthBase
355        );
356
357        Value* oldM0OutputPos = iBuilder->getScalarField("m0OutputPos");
358
359
360        // Mark E1
361        Value *expectedNewOffsetPos = iBuilder->CreateAdd(
362                tokenActualPos,
363                iBuilder->CreateAdd(
364                        literalLengthBase,
365                        iBuilder->getSize(1)
366                )
367        );
368        Value *newOffsetPos = expectedNewOffsetPos;
369        iBuilder->setScalarField("offsetPos", newOffsetPos);
370
371        // Yellow Logic End
372        // ------------------------------
373        // Blue Logic Start
374
375        // e1 start:tokenActualPos, e1 end:expectedNewOffsetPos,
376//        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), tokenActualPos, false);
377//        this->markCircularOutputBitstream(iBuilder, "e1Marker", tokenActualPos, expectedNewOffsetPos, true);
378
379
380        Value* newMask = iBuilder->CreateSub(
381                iBuilder->CreateShl(iBuilder->getInt64(1), iBuilder->CreateSub(expectedNewOffsetPos, packBaseOffset)),
382                iBuilder->CreateShl(iBuilder->getInt64(1), iBuilder->CreateAdd(iBuilder->CreateSub(tokenActualPos, packBaseOffset), iBuilder->getSize(1)))
383        );
384
385        outputInitValue = iBuilder->CreateOr(outputInitValue, newMask);
386
387        Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
388
389
390        Value* m0OutputPos = iBuilder->CreateAdd(oldM0OutputPos, literalLengthBase);
391
392        // Mark M0
393        Value *outputEndPos = iBuilder->CreateSub(
394                iBuilder->CreateAdd(m0OutputPos, matchLength),
395                iBuilder->getInt64(1)
396        );
397
398        this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), m0OutputPos);
399//        iBuilder->CallPrintInt("m0Start", m0OutputPos);
400        this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
401//        iBuilder->CallPrintInt("m0End", outputEndPos);
402        this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffsetValue);
403//        iBuilder->CallPrintInt("matchOffset1", matchOffsetValue);
404
405        m0OutputPos = iBuilder->CreateAdd(m0OutputPos, matchLength);
406        iBuilder->setScalarField("m0OutputPos", m0OutputPos);
407
408        // Blue Logic End
409        // ---------------------------
410    }
411    phiTokenMarkers->addIncoming(tokenMarkers, iBuilder->GetInsertBlock());
412    phiE1FinalValue->addIncoming(outputInitValue, iBuilder->GetInsertBlock());
413
414    //------------------------------------ 64 Pack Acceleration End
415    // Config Extender Cursor after Acceleration
416    iBuilder->CreateBr(accelerationEndBlock);
417
418    iBuilder->SetInsertPoint(accelerationFinishBlock);
419
420    //------------------------------------- Finish
421#endif
422
423//    iBuilder->CallPrintInt("tokenPos", this->getCursorValue(iBuilder, "extender"));
424    Value* token = this->generateLoadSourceInputByte(iBuilder, "byteStream", this->getCursorValue(iBuilder, "extender"));
425//    iBuilder->CallPrintInt("token", token);
426//    iBuilder->CallPrintInt("tokenPos", this->getCursorValue(iBuilder, "extender"));
427
428//    iBuilder->CreateAssert(iBuilder->CreateICmpULT(this->getCursorValue(iBuilder, "extender"), iBuilder->getSize(0xcb32a)), "ee");
429    iBuilder->setScalarField("token", token);
430
431    Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
432
433    // TokenPos can not be refered by next few statements since they may be in different segment
434    iBuilder->setScalarField("tokenPos", this->getCursorValue(iBuilder, "extender"));
435
436    BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
437    BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
438
439    iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
440
441    iBuilder->SetInsertPoint(extendLiteralLengthBody);
442
443    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
444
445    this->advanceCursorUntilNextZero(iBuilder, "extender", "extender", this->loadCurrentBlockData(iBuilder, "blockEnd"));
446
447    iBuilder->CreateBr(extendLiteralLengthExit);
448
449    iBuilder->SetInsertPoint(extendLiteralLengthExit);
450    // ----May be in a different segment now
451    Value* literalLengthEndPos = this->getCursorValue(iBuilder, "extender");
452    Value* literalExtensionSize = iBuilder->CreateSub(literalLengthEndPos, iBuilder->getScalarField("tokenPos"));
453    Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, "byteStream", this->getCursorValue(iBuilder, "extender"));
454
455    finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getSizeTy());
456    Value* literalLengthExtendValue = iBuilder->CreateSelect(
457            iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
458            iBuilder->CreateAdd(
459                    iBuilder->CreateMul(
460                            iBuilder->CreateSub(literalExtensionSize, iBuilder->getSize(1)),
461                            iBuilder->getSize(255)
462                    ),
463                    finalLengthByte
464            ),
465            iBuilder->getSize(0)
466    );
467    literalLengthExtendValue = iBuilder->CreateZExt(literalLengthExtendValue, iBuilder->getInt64Ty());
468    token = iBuilder->getScalarField("token");
469    Value* literalLengthBase = iBuilder->CreateLShr(iBuilder->CreateZExt(token, iBuilder->getInt64Ty()), iBuilder->getInt64(4));
470    Value* literalLength = iBuilder->CreateAdd(literalLengthBase, literalLengthExtendValue);
471
472
473    Value* previousOffsetPos = iBuilder->getScalarField("offsetPos");
474
475    Value* offsetPos = iBuilder->CreateAdd(
476            iBuilder->CreateAdd(
477                    literalLengthEndPos,
478                    literalLength),
479            iBuilder->getSize(1));
480    iBuilder->setScalarField("offsetPos", offsetPos);
481//    iBuilder->CallPrintInt("literalStart", iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)));
482//    iBuilder->CallPrintInt("literalLength", literalLength);
483    this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)), false);
484    this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(literalLengthEndPos, iBuilder->getSize(1)), offsetPos, true);
485
486    Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("e1Marker", iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
487
488    this->increaseScalarField(iBuilder, "m0OutputPos", literalLength);
489
490
491    BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
492    BasicBlock* handleM0ElseBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_else");
493
494    iBuilder->CreateCondBr(
495            iBuilder->CreateICmpULT(offsetPos, this->loadCurrentBlockData(iBuilder, "blockEnd")),
496            handleM0BodyBlock,
497            handleM0ElseBlock
498    );
499    // HandleM0Body
500
501    iBuilder->SetInsertPoint(handleM0BodyBlock);
502
503    Value* matchLengthStartPos = iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1));
504    iBuilder->setScalarField("matchLengthStartPos", matchLengthStartPos);
505    this->advanceCursorUntilPos(iBuilder, "extender", matchLengthStartPos);
506
507
508    token = iBuilder->getScalarField("token");
509
510    Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf), "extendedMatchValue");
511
512    BasicBlock* extendMatchBodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_body");
513    BasicBlock* extendMatchExitBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_extend_match_exit");
514
515    iBuilder->CreateCondBr(extendedMatchValue, extendMatchBodyBlock, extendMatchExitBlock);
516
517    iBuilder->SetInsertPoint(extendMatchBodyBlock);
518
519    //ExtendMatchBodyBlock
520    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
521    this->advanceCursorUntilNextZero(iBuilder, "extender", "extender", this->loadCurrentBlockData(iBuilder, "blockEnd"));
522
523    // ----May be in a different segment now
524    iBuilder->CreateBr(extendMatchExitBlock);
525
526    //ExtendMatchExitBlock
527    iBuilder->SetInsertPoint(extendMatchExitBlock);
528    matchLengthStartPos = iBuilder->getScalarField("matchLengthStartPos");
529    Value* oldMatchExtensionSize = iBuilder->CreateSub(this->getCursorValue(iBuilder, "extender"), matchLengthStartPos);
530
531    token = iBuilder->getScalarField("token");
532    extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
533
534    Value* matchExtensionSize = iBuilder->CreateSelect(
535            iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
536            oldMatchExtensionSize,
537            iBuilder->getSize(0)
538    );
539
540
541    Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
542    Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
543
544
545    Value* extensionLastBitPos = iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1));
546    extensionLastBitPos = iBuilder->CreateAdd(extensionLastBitPos, matchExtensionSize);
547    Value* extensionLastBitValue = this->generateLoadSourceInputByte(iBuilder, "byteStream", extensionLastBitPos);
548    extensionLastBitValue = iBuilder->CreateZExt(extensionLastBitValue, iBuilder->getSizeTy());
549
550
551    Value* matchLengthAddValue = iBuilder->CreateSelect(
552            iBuilder->CreateICmpUGT(matchExtensionSize, iBuilder->getSize(0)),
553            iBuilder->CreateAdd(
554                    iBuilder->CreateMul(
555                            iBuilder->CreateSub(matchExtensionSize, iBuilder->getSize(1)),
556                            iBuilder->getSize(255)
557                    ),
558                    extensionLastBitValue
559            )
560            ,
561            iBuilder->getSize(0)
562    );
563    matchLengthAddValue = iBuilder->CreateZExt(matchLengthAddValue, iBuilder->getInt64Ty());
564
565    matchLength = iBuilder->CreateAdd(matchLength, matchLengthAddValue);
566
567    Value* outputPos = iBuilder->getScalarField("m0OutputPos");
568
569    Value* outputEndPos = iBuilder->CreateSub(
570            iBuilder->CreateAdd(outputPos, matchLength),
571            iBuilder->getInt64(1)
572    );
573
574    Value* matchOffset = iBuilder->CreateAdd(
575            iBuilder->CreateZExt(
576                    this->generateLoadSourceInputByte(iBuilder, "byteStream", iBuilder->getScalarField("offsetPos")),
577                    iBuilder->getSizeTy()
578            ),
579            iBuilder->CreateShl(
580                    iBuilder->CreateZExt(this->generateLoadSourceInputByte(
581                            iBuilder,
582                            "byteStream",
583                            iBuilder->CreateAdd(iBuilder->getScalarField("offsetPos"), iBuilder->getSize(1))),
584                                         iBuilder->getSizeTy()
585                    ),
586                    iBuilder->getSize(8)
587            )
588    );
589//    iBuilder->CallPrintInt("matchOffset", matchOffset);
590
591
592    this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
593//    iBuilder->CallPrintInt("m0Start", outputPos);
594    this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
595//    iBuilder->CallPrintInt("m0End", outputEndPos);
596    this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
597//    iBuilder->CallPrintInt("matchOffset", matchOffset);
598
599
600    this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
601    this->advanceCursor(iBuilder, "extender", iBuilder->getSize(1));
602
603    iBuilder->CreateBr(compressedBlockLoopFinal);
604
605    // HandleM0Else
606    iBuilder->SetInsertPoint(handleM0ElseBlock);
607    this->advanceCursorUntilPos(iBuilder, "extender", iBuilder->getScalarField("offsetPos"));
608
609    // Store final M0 pos to make sure the bit stream will be long enough
610    Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
611    this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
612    this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
613    this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
614
615    iBuilder->CreateBr(compressedBlockLoopFinal);
616
617    // final
618    iBuilder->SetInsertPoint(compressedBlockLoopFinal);
619    iBuilder->CreateBr(compressedBlockLoopCon);
620
621    // Exit
622    iBuilder->SetInsertPoint(exitBlock);
623
624    return exitBlock;
625}
626
627void LZ4ExtractEM0Kernel::generateRecordUncompressedBlock(const unique_ptr<kernel::KernelBuilder> & iBuilder) {
628    Value* blockStart = this->loadCurrentBlockData(iBuilder, "blockStart");
629    Value* blockEnd = this->loadCurrentBlockData(iBuilder, "blockEnd");
630    Value* length = iBuilder->CreateSub(blockEnd, blockStart);
631    Value* outputPos = iBuilder->getScalarField("m0OutputPos");
632    this->increaseScalarField(iBuilder, "m0OutputPos", length);
633
634    // Store Uncompressed Data
635    this->generateStoreCircularOutput(iBuilder, "uncompressedStartPos", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
636    this->generateStoreCircularOutput(iBuilder, "uncompressedLength", iBuilder->getInt64Ty()->getPointerTo(), length);
637    this->generateStoreCircularOutput(iBuilder, "uncompressedOutputPos", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
638}
639
640void LZ4ExtractEM0Kernel::generateIncreaseBlockDataIndex(const unique_ptr<kernel::KernelBuilder> & iBuilder) {
641    this->increaseScalarField(iBuilder, "blockDataIndex", iBuilder->getSize(1));
642}
643
644Value* LZ4ExtractEM0Kernel::loadCurrentBlockData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::string& name) {
645    Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
646    return this->generateLoadCircularInput(iBuilder, name, blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
647}
648
649LZ4ExtractEM0Kernel::LZ4ExtractEM0Kernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, const std::map<std::string, size_t>& inputIndexMap):
650        SequentialKernel(
651                iBuilder,
652                "lz4_extract_e_m0_kernel",
653                {//Inputs
654                        Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
655                        Binding{iBuilder->getStreamSetTy(1, 1), "extender"},
656                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX"},
657                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF"},
658
659                        // block data
660                        Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1)},
661                        Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1)},
662                        Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1)}
663                },
664                {//Outputs
665                        // Uncompressed_data
666                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos", BoundedRate(0, 1)},
667                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength", BoundedRate(0, 1)},
668                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos", BoundedRate(0, 1)},
669
670                        Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
671                        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
672                        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
673                        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
674                },
675                {//Arguments
676                },
677                {},
678                {//Internal States
679                        Binding{iBuilder->getSizeTy(), "blockDataIndex"},
680                        Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
681                        Binding{iBuilder->getSizeTy(), "tokenPos"},
682                        Binding{iBuilder->getInt8Ty(), "token"},
683                        Binding{iBuilder->getSizeTy(), "matchLengthStartPos"},
684                        Binding{iBuilder->getSizeTy(), "offsetPos"}
685//                        Binding{iBuilder->getInt64Ty(), "temp_TokenMarkers"}
686                }
687        ) {
688    this->initBufferCursor(iBuilder, {"extender"});
689}
Note: See TracBrowser for help on using the repository browser.