source: icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp @ 5436

Last change on this file since 5436 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 31.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_index_decoder.h"
8#include <kernels/kernel_builder.h>
9 
10using namespace llvm;
11using namespace kernel;
12
13#ifndef NDEBUG
14#define DEBUG_RT_PRINT 1
15#else
16#define DEBUG_RT_PRINT 0
17#endif
18
19#define printRTDebugMsg(MSG) \
20    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintMsgToStderr(MSG); } while (0)
21
22#define printRTDebugInt(NAME, X) \
23    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintIntToStderr(NAME, X); } while (0)
24
25#define printGlobalPos() \
26    printRTDebugInt("GlobalPos", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)))
27
28namespace {
29
30Value * generateBitswap(IDISA::IDISA_Builder * const iBuilder, Value * v) {
31    Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
32            Intrinsic::bswap, v->getType());
33    return iBuilder->CreateCall(bswapFunc, {v});
34}
35
36Value * selectMin(IDISA::IDISA_Builder * const iBuilder, Value * a, Value * b) {
37    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
38}
39
40Value * createStackVar(IDISA::IDISA_Builder * const iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
41    Value * var = iBuilder->CreateAlloca(type, nullptr, name);
42    if (initializer) {
43        iBuilder->CreateStore(initializer, var);
44    } else {
45        iBuilder->CreateStore(ConstantInt::get(type, 0), var);
46    }
47    return var;
48}
49
50void incStackVar(IDISA::IDISA_Builder * const iBuilder, Value * svar, Value * increment = nullptr) {
51    Value * value = iBuilder->CreateLoad(svar);
52    if (increment) {
53        value = iBuilder->CreateAdd(value, increment);
54    } else {
55        value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
56    }
57    iBuilder->CreateStore(value, svar);
58}
59
60Value * getOutputPtr(IDISA::IDISA_Builder * const iBuilder, Value * blockStartPtr, Value * offset) {
61    return iBuilder->CreateGEP(
62            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
63            offset
64            );
65}
66
67}       // anonymouse namespace
68
69
70/**
71 * In order to allow mem2reg to promote the stack variables, alloca's have
72 * to be in the entry block of a function. Thus, we need to disable indirect
73 * branching on this kernel to have a standalone DoMethod function.
74 */
75bool LZ4IndexDecoderKernel::useIndirectBr() const {
76    return false;
77}
78
79
80/**
81 * Get the offset within the current word.
82 */
83Value * LZ4IndexDecoderKernel::getWordOffset() {
84    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
85    return iBuilder->CreateAnd(
86            iBuilder->CreateLoad(sOffset),
87            wordWidthMask
88            );
89}
90
91
92/**
93 * Get the offset of the start of the current word.
94 */
95Value * LZ4IndexDecoderKernel::getWordStartOffset() {
96    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
97    return iBuilder->CreateAnd(
98            iBuilder->CreateLoad(sOffset),
99            iBuilder->CreateNot(wordWidthMask)
100            );
101}
102
103
104/**
105 * Load a raw byte from byteStream.
106 * If offset is not provided, load the current byte by default.
107 */
108Value * LZ4IndexDecoderKernel::loadRawByte(Value * offset = nullptr) {
109    Value * blockStartPtr = iBuilder->CreatePointerCast(
110            getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
111            iBuilder->getInt8PtrTy()
112            );
113    if (offset == nullptr) {
114        offset = iBuilder->CreateLoad(sOffset);
115    }
116    Value * ptr = iBuilder->CreateGEP(blockStartPtr, offset);
117    return iBuilder->CreateLoad(ptr);
118}
119
120
121/**
122 * Set the current extender word up until before the offset position.
123 * extender = .......  (little-endian, LSB on the right)
124 * offset   =    ^
125 * cleared  = ....111
126 */
127void LZ4IndexDecoderKernel::setExtenderUntilOffset() {
128    // Little-endian, offset counts from LSB
129    // extender = extender ^ ~((1 << offset) -1)
130    Value * extender = iBuilder->CreateLoad(sExtender);
131    Value * wordOffset = iBuilder->CreateZExt(
132            getWordOffset(),
133            iBuilder->getSizeTy()
134            );
135    Value * one = iBuilder->getSize(1);
136    Value * mask = iBuilder->CreateSub(
137            iBuilder->CreateShl(one, wordOffset),
138            one);
139    extender = iBuilder->CreateOr(extender, mask);
140    iBuilder->CreateStore(extender, sExtender);
141}
142
143
144/**
145 * Load the extender word at the current offset.
146 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
147 */
148void LZ4IndexDecoderKernel::loadCurrentExtender() {
149    iBuilder->CreateStore(
150            iBuilder->CreateExtractElement(extenders,
151                iBuilder->CreateLShr(
152                    iBuilder->CreateLoad(sOffset),
153                    iBuilder->getInt32(std::log2(wordWidth))
154                    )
155                ),
156            sExtender);
157}
158
159
160void LZ4IndexDecoderKernel::generateProduceOutput() {
161    Value * producedItem = getProducedItemCount("literalIndexes");
162
163#ifndef NDEBUG
164    iBuilder->CallPrintInt("ProducedItem", producedItem);
165    // LiteralStart is adjusted to be relative to the block start, so that
166    // the output can be compared against that of the reference implementation.
167    //iBuilder->CallPrintInt("LiteralStart", getScalarField("LiteralStart"));
168    iBuilder->CallPrintInt("LiteralStart", iBuilder->CreateSub(
169                getScalarField("LiteralStart"), getScalarField("LZ4BlockStart")));
170    iBuilder->CallPrintInt("LiteralLength", getScalarField("LiteralLength"));
171    iBuilder->CallPrintInt("MatchOffset", getScalarField("MatchOffset"));
172    iBuilder->CallPrintInt("MatchLength", getScalarField("MatchLength"));
173#endif
174    printRTDebugMsg("--------------");
175
176    Value * outputOffset = iBuilder->CreateAnd(
177            iBuilder->CreateTrunc(producedItem, iBuilder->getInt32Ty()),
178            iBuilder->getInt32(iBuilder->getBitBlockWidth() - 1)
179            );  // producedItem % blockWidth (as blockWidth is always a power of 2)
180    Value * literalStartPtr = getOutputPtr(iBuilder,
181            getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
182    Value * literalLengthPtr = getOutputPtr(iBuilder,
183            getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
184    Value * matchOffsetPtr = getOutputPtr(iBuilder,
185            getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
186    Value * matchLengthPtr = getOutputPtr(iBuilder,
187            getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
188    iBuilder->CreateStore(getScalarField("LiteralStart"), literalStartPtr);
189    iBuilder->CreateStore(getScalarField("LiteralLength"), literalLengthPtr);
190    iBuilder->CreateStore(getScalarField("MatchOffset"), matchOffsetPtr);
191    iBuilder->CreateStore(getScalarField("MatchLength"), matchLengthPtr);
192    setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
193    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
194}
195
196
197void LZ4IndexDecoderKernel::generateDoBlockMethod() {
198    BasicBlock * entry_block = iBuilder->GetInsertBlock();
199    BasicBlock * exit_block = CreateBasicBlock("exit");
200
201    // %entry
202    iBuilder->SetInsertPoint(entry_block);
203    printRTDebugMsg("entry");
204    // Global positions in the byte stream.
205    Value * blockNo = getScalarField("BlockNo");
206    blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
207    extenders = iBuilder->CreateBitCast(
208            loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
209            VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / wordWidth),
210            "extenders");
211    // Create a series of stack variables which will be promoted by mem2reg.
212    sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
213    // tempLength has different meanings in different states.
214    sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", getScalarField("TempLength"));
215    sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", getScalarField("TempCount"));
216    sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", getScalarField("State"));
217    sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
218            iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
219
220    BasicBlock * skippingBytes = CreateBasicBlock("skipping_bytes");
221    BasicBlock * dispatch = CreateBasicBlock("dispatch");
222
223    iBuilder->CreateCondBr(
224            iBuilder->CreateICmpUGT(getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
225            skippingBytes, dispatch
226            );
227
228    // %skipping_bytes
229    generateSkippingBytes(skippingBytes, exit_block);
230    // Insert point is at the end of skippingBytes.
231    iBuilder->CreateBr(dispatch);
232
233    // %dispatch
234    // Indirect branching will be added to %dispatch at last.
235
236    // %at_block_checksum
237    BasicBlock * atBlockChecksum = CreateBasicBlock("at_block_checksum");
238    generateAtBlockChecksum(atBlockChecksum, skippingBytes);
239 
240    // %at_block_size
241    BasicBlock * atBlockSize = CreateBasicBlock("at_block_size");
242    generateAtBlockSize(atBlockSize, skippingBytes, exit_block);
243
244    // %at_token
245    BasicBlock * atToken = CreateBasicBlock("at_token");
246    generateAtToken(atToken, exit_block);
247
248    // %extending_literal_length
249    BasicBlock * extendingLiteralLen = CreateBasicBlock("extending_literal_length");
250    generateExtendingLiteralLen(extendingLiteralLen, exit_block);
251
252    // %at_literals
253    BasicBlock * atLiterals = CreateBasicBlock("at_literals");
254    generateAtLiterals(atLiterals);
255    iBuilder->CreateBr(skippingBytes);
256
257    // %at_first_offset
258    // Note that the last sequence is incomplete and ends with literals.
259    // If the whole LZ4 block is done, process the (optional) checksum.
260    // Otherwise, go around to process the next sequence.
261    BasicBlock * atOffset1 = CreateBasicBlock("at_first_offset");
262    iBuilder->SetInsertPoint(atOffset1);
263    Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
264    BasicBlock * blockEnd_else = CreateBasicBlock("block_end_else");
265    // Conditional branch inserted at the end of the last block.
266    iBuilder->CreateUnlikelyCondBr(
267            iBuilder->CreateICmpEQ(nowGlobalPos, getScalarField("LZ4BlockEnd")),
268            atBlockChecksum, blockEnd_else
269            );
270    generateAtFirstOffset(blockEnd_else, exit_block);
271
272    // %at_second_offset
273    BasicBlock * atOffset2 = CreateBasicBlock("at_second_offset");
274    generateAtSecondOffset(atOffset2, exit_block);
275
276    // %extending_match_length
277    BasicBlock * extendingMatchLen = CreateBasicBlock("extending_match_length");
278    generateExtendingMatchLen(extendingMatchLen, exit_block);
279    iBuilder->CreateBr(atToken);
280
281    // Indirect branching.
282    iBuilder->SetInsertPoint(dispatch);
283    printRTDebugMsg("dispatch");
284    // The order must comply with enum State.
285    Constant * labels = ConstantVector::get(
286            {BlockAddress::get(atBlockSize), BlockAddress::get(atToken), BlockAddress::get(extendingLiteralLen), BlockAddress::get(atLiterals),
287             BlockAddress::get(atOffset1), BlockAddress::get(atOffset2), BlockAddress::get(extendingMatchLen), BlockAddress::get(atBlockChecksum)}
288            );
289    Value * target = iBuilder->CreateExtractElement(labels, iBuilder->CreateLoad(sState));
290    IndirectBrInst * indirectBr = iBuilder->CreateIndirectBr(target);
291    indirectBr->addDestination(atBlockSize);
292    indirectBr->addDestination(atToken);
293    indirectBr->addDestination(extendingLiteralLen);
294    indirectBr->addDestination(atLiterals);
295    indirectBr->addDestination(atOffset1);
296    indirectBr->addDestination(atOffset2);
297    indirectBr->addDestination(extendingMatchLen);
298    indirectBr->addDestination(atBlockChecksum);
299
300    // %exit
301    iBuilder->SetInsertPoint(exit_block);
302    printRTDebugMsg("exit");
303    setScalarField("State", iBuilder->CreateLoad(sState));
304    setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
305    setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
306    setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
307    // When the kernel builder uses indirectbr, doBlock is not a separate function.
308    // Hence, we branch to a new basic block and fall through instead of returning.
309    BasicBlock * end_block = CreateBasicBlock("end_of_block");
310    iBuilder->CreateBr(end_block);
311    iBuilder->SetInsertPoint(end_block);
312}
313
314
315void LZ4IndexDecoderKernel::generateBoundaryDetection(State state, BasicBlock * exit_block, bool updateExtenderWord=false) {
316    if (updateExtenderWord) {
317        BasicBlock * wordBoundary_then = CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
318        BasicBlock * blockBoundary_else = CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
319        BasicBlock * wordBoundary_cont = CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
320        iBuilder->CreateUnlikelyCondBr(
321                iBuilder->CreateICmpEQ(getWordOffset(), iBuilder->getInt32(0)),
322                wordBoundary_then, wordBoundary_cont
323                );
324
325        iBuilder->SetInsertPoint(wordBoundary_then);
326        iBuilder->CreateUnlikelyCondBr(
327                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
328                exit_block, blockBoundary_else
329                );
330
331        // Reaching word boundary but not block boundary.  Update the extender word as requested.
332        iBuilder->SetInsertPoint(blockBoundary_else);
333        loadCurrentExtender();
334        iBuilder->CreateBr(wordBoundary_cont);
335
336        // Leave the insert point at the end and return.
337        iBuilder->SetInsertPoint(wordBoundary_cont);
338    } else {
339        BasicBlock * blockBoundary_cont = CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
340        iBuilder->CreateUnlikelyCondBr(
341                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
342                exit_block, blockBoundary_cont
343                );
344        // Leave the insert point at the end and return.
345        iBuilder->SetInsertPoint(blockBoundary_cont);
346    }
347}
348
349
350void LZ4IndexDecoderKernel::generateSkippingBytes(BasicBlock * bb, BasicBlock * exit_block) {
351    iBuilder->SetInsertPoint(bb);
352    printRTDebugMsg("skipping bytes");
353
354    Value * remainingBytesInBlock = iBuilder->CreateSub(
355            iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
356            );
357    Value * remainingBytesToSkip = getScalarField("BytesToSkip");
358    Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
359    remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
360    incStackVar(iBuilder, sOffset, advanceDist);
361    setScalarField("BytesToSkip", remainingBytesToSkip);
362
363    generateBoundaryDetection(State::SKIPPING_BYTES, exit_block);
364    // Falls through.
365}
366
367
368void LZ4IndexDecoderKernel::generateAtBlockSize(BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
369    iBuilder->CreateBr(bb);
370    iBuilder->SetInsertPoint(bb);
371    printRTDebugMsg("scanning block size");
372    printGlobalPos();
373
374    // Use tempLength to hold the block size temporarily.
375    // Note that it is initially stored as big-endian (for the ease of reading) and will be "swapped" later.
376    // Use tempCount as the loop counter (0..3).
377    // Both variables are initialized from kernel states at %entry.
378
379    // A do-while loop.
380    BasicBlock * loopBody = CreateBasicBlock("blocksize_loop_body");
381    BasicBlock * loopExit = CreateBasicBlock("blocksize_loop_exit");
382    iBuilder->CreateBr(loopBody);
383
384    iBuilder->SetInsertPoint(loopBody);
385    Value * byte = loadRawByte();
386    Value * newTempLength = iBuilder->CreateAdd(
387            iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
388            iBuilder->CreateZExt(byte, iBuilder->getInt32Ty())
389            );
390    iBuilder->CreateStore(newTempLength, sTempLength);
391    incStackVar(iBuilder, sTempCount);
392    incStackVar(iBuilder, sOffset);
393    // Stop when we read all four bytes or reach the end of the block.
394    iBuilder->CreateCondBr(
395            iBuilder->CreateOr(
396                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
397                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth()))
398                ),
399            loopExit, loopBody
400            );
401
402    iBuilder->SetInsertPoint(loopExit);
403    BasicBlock * blockSizeCompleted_then = CreateBasicBlock("blocksize_completed_then");
404    BasicBlock * blockSizeCompleted_cont = CreateBasicBlock("blocksize_completed_cont");
405    iBuilder->CreateLikelyCondBr(
406            iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
407            blockSizeCompleted_then, blockSizeCompleted_cont
408            );
409
410    // All four bytes of the block size are read in.
411    iBuilder->SetInsertPoint(blockSizeCompleted_then);
412    // Remember to swap the block size back to little-endian.
413    Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
414    Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
415    setScalarField("LZ4BlockStart", currentPos);
416    setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
417    printRTDebugInt("blockSize", blockSize);
418
419    BasicBlock * uncompressedBlock_then = CreateBasicBlock("uncompressed_block_then");
420    BasicBlock * uncompressedBlock_else = CreateBasicBlock("uncompressed_block_cont");
421    iBuilder->CreateUnlikelyCondBr(
422            iBuilder->CreateTrunc(
423                iBuilder->CreateLShr(blockSize, iBuilder->getInt32(31)),
424                iBuilder->getInt1Ty()
425                ),
426            uncompressedBlock_then,
427            uncompressedBlock_else
428            );
429
430    iBuilder->SetInsertPoint(uncompressedBlock_then);
431    Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
432    setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
433    setScalarField("BytesToSkip", realBlockSize);
434    setScalarField("LiteralStart", currentPos);
435    setScalarField("LiteralLength", realBlockSize);
436    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
437    // because %atBlockChecksum will do so as the last sequence.
438    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_CHECKSUM), sState);
439    iBuilder->CreateBr(skippingBytes);
440
441    iBuilder->SetInsertPoint(uncompressedBlock_else);
442    // Reset these temporary values for later use.
443    iBuilder->CreateStore(iBuilder->getInt32(0), sTempLength);
444    iBuilder->CreateStore(iBuilder->getInt32(0), sTempCount);
445    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
446    // A block size of 0 is the end mark of the frame. Exit.
447    iBuilder->CreateUnlikelyCondBr(
448            iBuilder->CreateICmpEQ(blockSize, ConstantInt::getNullValue(blockSize->getType())),
449            exit_block,
450            blockSizeCompleted_cont
451            );
452
453    // We could be at the boundary no matter the block size is completed or not.
454    iBuilder->SetInsertPoint(blockSizeCompleted_cont);
455    generateBoundaryDetection(State::AT_BLOCK_SIZE, exit_block);
456    // Falls through to %at_token.
457}
458
459
460void LZ4IndexDecoderKernel::generateAtToken(BasicBlock * bb, BasicBlock * exit_block) {
461    iBuilder->CreateBr(bb);
462    iBuilder->SetInsertPoint(bb);
463    printRTDebugMsg("reading token");
464
465    Value * token = loadRawByte();
466    Value * literalLen = iBuilder->CreateZExt(
467        iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
468        iBuilder->getInt32Ty()
469        );
470    Value * matchLen = iBuilder->CreateZExt(
471        iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)),
472        iBuilder->getInt32Ty()
473        );
474    incStackVar(iBuilder, sOffset);
475    // Prepare extender word for scanning.
476    loadCurrentExtender();
477    setExtenderUntilOffset();
478    // Store the (partial) match length to be extended later.
479    setScalarField("MatchLength", matchLen);
480    // Use tempLength to accumulate extended lengths (until at_literals).
481    iBuilder->CreateStore(literalLen, sTempLength);
482    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
483
484    generateBoundaryDetection(State::AT_TOKEN, exit_block);
485    // Falls through to %extending_literal_length.
486}
487
488
489void LZ4IndexDecoderKernel::generateExtendingLiteralLen(BasicBlock * bb, BasicBlock * exit_block) {
490    iBuilder->CreateBr(bb);
491    iBuilder->SetInsertPoint(bb);
492    printRTDebugMsg("extending literal len");
493
494    Value * wordOffset = getWordOffset();
495    Value * blockOffset = getWordStartOffset();
496    Value * literalLen = iBuilder->CreateLoad(sTempLength);
497    Value * literalExtEnd = iBuilder->CreateTrunc(
498                iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
499                iBuilder->getInt32Ty());
500    printRTDebugInt("wordOffset", wordOffset);
501    printRTDebugInt("literalExtEnd", literalExtEnd);
502    // number of extender = literalExtEnd - wordOffset
503    Value * numExtenders = iBuilder->CreateSub(literalExtEnd, wordOffset);
504    Value * literalExtReachBoundary =
505            iBuilder->CreateICmpEQ(literalExtEnd, iBuilder->getInt32(wordWidth));
506    // There are literalExtEnd forward zeroes, we load bytes[literalExtEnd]
507    // which is the first non-extender.  If literalExtEnd == 64, we force the
508    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
509    Value * loadOffset = iBuilder->CreateSelect(literalExtReachBoundary,
510            ConstantInt::getNullValue(literalExtEnd->getType()),
511            literalExtEnd);
512    Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
513            iBuilder->getInt8(0),
514            loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
515    Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
516    literalLen = iBuilder->CreateSelect(literalLenExted,
517            iBuilder->CreateAdd(
518                literalLen,
519                iBuilder->CreateAdd(
520                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
521                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
522                    )
523                ),      // literalLen + numExtenders * 255
524            literalLen);
525    wordOffset = iBuilder->CreateSelect(literalLenExted,
526            literalExtEnd,
527            wordOffset);
528    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
529    wordOffset = iBuilder->CreateSelect(
530            iBuilder->CreateAnd(literalLenExted, iBuilder->CreateNot(literalExtReachBoundary)),
531            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
532            wordOffset
533            );
534    iBuilder->CreateStore(literalLen, sTempLength);
535    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
536    Value * unfinished = iBuilder->CreateAnd(literalExtReachBoundary, literalLenExted);
537    Value * newState = iBuilder->CreateSelect(unfinished,
538            iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH),
539            iBuilder->getInt8(State::AT_LITERALS));
540    iBuilder->CreateStore(newState, sState);
541
542    generateBoundaryDetection(State::EXTENDING_LITERAL_LENGTH, exit_block, true);
543    BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
544    // Insert point is still in wordBoundary block now.
545    // See if there are still more extenders.
546    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
547
548    iBuilder->SetInsertPoint(cont_block);
549    // Falls through to %at_literals.
550}
551
552
553void LZ4IndexDecoderKernel::generateAtLiterals(BasicBlock * bb) {
554    iBuilder->CreateBr(bb);
555    iBuilder->SetInsertPoint(bb);
556
557    setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
558    setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
559    setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
560    iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
561
562    // No boundary detection here as we do not advance the cursor.
563    // Control flow will be redirected to %skipping_bytes later.
564}
565
566
567void LZ4IndexDecoderKernel::generateAtFirstOffset(BasicBlock * bb, BasicBlock * exit_block) {
568    iBuilder->SetInsertPoint(bb);
569    printRTDebugMsg("reading first offset");
570
571    Value * byte = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
572    // Use tempLength to store partial offset.
573    iBuilder->CreateStore(byte, sTempLength);
574    incStackVar(iBuilder, sOffset);
575    iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
576
577    generateBoundaryDetection(State::AT_FIRST_OFFSET, exit_block);
578    // Falls through to %at_second_offset.
579}
580
581
582void LZ4IndexDecoderKernel::generateAtSecondOffset(BasicBlock * bb, BasicBlock * exit_block) {
583    iBuilder->CreateBr(bb);
584    iBuilder->SetInsertPoint(bb);
585    printRTDebugMsg("reading second offset");
586
587    Value * byte1 = iBuilder->CreateLoad(sTempLength);
588    Value * byte2 = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
589    Value * offset = iBuilder->CreateAdd(
590            iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
591            byte1
592            );
593    setScalarField("MatchOffset", offset);
594    incStackVar(iBuilder, sOffset);
595    // Prepare extender word and tempLength for extending.
596    loadCurrentExtender();
597    setExtenderUntilOffset();
598    iBuilder->CreateStore(getScalarField("MatchLength"), sTempLength);
599    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
600
601    generateBoundaryDetection(State::AT_SECOND_OFFSET, exit_block);
602    // Falls through to %extending_match_length.
603}
604
605
606void LZ4IndexDecoderKernel::generateExtendingMatchLen(BasicBlock * bb, BasicBlock * exit_block) {
607    iBuilder->CreateBr(bb);
608    iBuilder->SetInsertPoint(bb);
609    printRTDebugMsg("extending match length");
610    printGlobalPos();
611    printRTDebugInt("rawbyte", loadRawByte());
612    printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
613
614    Value * wordOffset = getWordOffset();
615    Value * blockOffset = getWordStartOffset();
616    Value * matchLen = iBuilder->CreateLoad(sTempLength);
617    Value * matchExtEnd = iBuilder->CreateTrunc(
618        iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
619        iBuilder->getInt32Ty()
620        );
621    printRTDebugInt("wordoffset", wordOffset);
622    printRTDebugInt("matchExtEnd", matchExtEnd);
623    // number of extender = matchExtEnd - wordOffset
624    Value * numExtenders = iBuilder->CreateSub(matchExtEnd, wordOffset);
625    Value * matchExtReachBoundary = 
626            iBuilder->CreateICmpEQ(matchExtEnd, iBuilder->getInt32(wordWidth));
627    // There are matchExtEnd forward zeroes, we load bytes[matchExtEnd]
628    // which is the first non-extender.  If matchExtEnd == 64, we force the
629    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
630    Value * loadOffset = iBuilder->CreateSelect(matchExtReachBoundary,
631            ConstantInt::getNullValue(matchExtEnd->getType()),
632            matchExtEnd);
633    Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
634            iBuilder->getInt8(0),
635            loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
636    Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
637    matchLen = iBuilder->CreateSelect(matchLenExted,
638            iBuilder->CreateAdd(
639                matchLen,
640                iBuilder->CreateAdd(
641                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
642                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
643                    )
644                ),      // matchLen + numExtenders * 255
645            matchLen);
646    wordOffset = iBuilder->CreateSelect(matchLenExted,
647            matchExtEnd,
648            wordOffset);
649    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
650    wordOffset = iBuilder->CreateSelect(
651            iBuilder->CreateAnd(matchLenExted, iBuilder->CreateNot(matchExtReachBoundary)),
652            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
653            wordOffset
654            );
655    iBuilder->CreateStore(matchLen, sTempLength);
656    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
657
658    Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
659    BasicBlock * output_then = CreateBasicBlock("output_then");
660    BasicBlock * output_cont = CreateBasicBlock("output_cont");
661    iBuilder->CreateLikelyCondBr(
662            iBuilder->CreateNot(unfinished),
663            output_then, output_cont
664            );
665    iBuilder->SetInsertPoint(output_then);
666    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
667    matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
668    setScalarField("MatchLength", matchLen);
669    generateProduceOutput();
670    iBuilder->CreateBr(output_cont);
671
672    iBuilder->SetInsertPoint(output_cont);
673    generateBoundaryDetection(State::EXTENDING_MATCH_LENGTH, exit_block, true);
674    BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
675    // Insert point is still in wordBoundary block now.
676    // See if there are still more extenders.
677    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
678
679    iBuilder->SetInsertPoint(cont_block);
680}
681
682
683void LZ4IndexDecoderKernel::generateAtBlockChecksum(BasicBlock * bb, BasicBlock * skippingBytes) {
684    // No branch here as we have made a conditional branch outside.
685    iBuilder->SetInsertPoint(bb);
686    printRTDebugMsg("processing block checksum");
687
688    // Produce the partial output (fill matchIndexes with 0).
689    setScalarField("MatchOffset", iBuilder->getInt32(0));
690    setScalarField("MatchLength", iBuilder->getInt32(0));
691    generateProduceOutput();
692
693    BasicBlock * hasChecksum_then = CreateBasicBlock("has_checksum_then");
694    BasicBlock * hasChecksum_cont = CreateBasicBlock("has_checksum_cont");
695
696    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
697    iBuilder->CreateCondBr(getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
698
699    iBuilder->SetInsertPoint(hasChecksum_then);
700    setScalarField("BytesToSkip", iBuilder->getInt32(4));
701    iBuilder->CreateBr(skippingBytes);
702    // Boundary detection will be done in skipping_bytes.
703
704    iBuilder->SetInsertPoint(hasChecksum_cont);
705    // No checksum, offset not advanced.  Falls through to the next block (block_size).
706}
707
708LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
709: BlockOrientedKernel("lz4IndexDecoder",
710    // Inputs
711    {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
712     Binding{iBuilder->getStreamSetTy(1, 1), "extenders"}},
713    // Outputs: literal start, literal length, match offset, match length
714    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
715     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes", FixedRatio(1, 1, "literalIndexes")}},
716    // Arguments
717    {Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"}},
718    {},
719    // Internal states:
720    {Binding{iBuilder->getInt32Ty(), "BlockNo"},
721     Binding{iBuilder->getInt8Ty(), "State"},
722     Binding{iBuilder->getInt32Ty(), "LZ4BlockStart"},
723     Binding{iBuilder->getInt32Ty(), "LZ4BlockEnd"},
724     Binding{iBuilder->getInt32Ty(), "BytesToSkip"},
725     Binding{iBuilder->getInt32Ty(), "TempLength"},
726     Binding{iBuilder->getInt32Ty(), "TempCount"},
727     Binding{iBuilder->getInt32Ty(), "LiteralStart"},
728     Binding{iBuilder->getInt32Ty(), "LiteralLength"},
729     Binding{iBuilder->getInt32Ty(), "MatchOffset"},
730     Binding{iBuilder->getInt32Ty(), "MatchLength"}})
731, wordWidth{iBuilder->getSizeTy()->getBitWidth()} {
732    setNoTerminateAttribute(true);
733}
Note: See TracBrowser for help on using the repository browser.