source: icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp @ 5513

Last change on this file since 5513 was 5493, checked in by cameron, 2 years ago

Restore check-ins from the last several days

File size: 33.3 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_index_decoder.h"
8#include <kernels/kernel_builder.h>
9 
10using namespace llvm;
11using namespace kernel;
12
13#ifndef NDEBUG
14#define DEBUG_RT_PRINT 1
15#else
16#define DEBUG_RT_PRINT 0
17#endif
18
19#define printRTDebugMsg(MSG) \
20    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintMsgToStderr(MSG); } while (0)
21
22#define printRTDebugInt(NAME, X) \
23    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintIntToStderr(NAME, X); } while (0)
24
25#define printGlobalPos() \
26    printRTDebugInt("GlobalPos", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)))
27
28namespace {
29
30Value * generateBitswap(const std::unique_ptr<KernelBuilder> & iBuilder, Value * v) {
31    Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
32            Intrinsic::bswap, v->getType());
33    return iBuilder->CreateCall(bswapFunc, {v});
34}
35
36Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
37    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
38}
39
40Value * createStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
41    Value * var = iBuilder->CreateAlloca(type, nullptr, name);
42    if (initializer) {
43        iBuilder->CreateStore(initializer, var);
44    } else {
45        iBuilder->CreateStore(ConstantInt::get(type, 0), var);
46    }
47    return var;
48}
49
50void incStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Value * svar, Value * increment = nullptr) {
51    Value * value = iBuilder->CreateLoad(svar);
52    if (increment) {
53        value = iBuilder->CreateAdd(value, increment);
54    } else {
55        value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
56    }
57    iBuilder->CreateStore(value, svar);
58}
59
60Value * getOutputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
61    return iBuilder->CreateGEP(
62            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
63            offset
64            );
65}
66
67}       // anonymouse namespace
68
69/**
70 * Get the offset within the current word.
71 */
72Value * LZ4IndexDecoderKernel::getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
73    Value * offset = iBuilder->CreateLoad(sOffset);
74    IntegerType * type = cast<IntegerType>(offset->getType());
75    Constant * mask = ConstantInt::get(type, wordWidth - 1);
76    return iBuilder->CreateAnd(offset, mask);
77}
78
79/**
80 * Get the offset of the start of the current word.
81 */
82Value * LZ4IndexDecoderKernel::getWordStartOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
83    Value * offset = iBuilder->CreateLoad(sOffset);
84    IntegerType * type = cast<IntegerType>(offset->getType());
85    Constant * mask = ConstantExpr::getNeg(ConstantInt::get(type, wordWidth));
86    return iBuilder->CreateAnd(offset, mask);
87}
88
89/**
90 * Load a raw byte from byteStream.
91 * If offset is not provided, load the current byte by default.
92 */
93Value * LZ4IndexDecoderKernel::loadRawByte(const std::unique_ptr<KernelBuilder> & iBuilder, Value * offset) {
94    Value * blockStartPtr = iBuilder->CreatePointerCast(
95            iBuilder->getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
96            iBuilder->getInt8PtrTy()
97            );
98    if (offset == nullptr) {
99        offset = iBuilder->CreateLoad(sOffset);
100    }
101    Value * ptr = iBuilder->CreateGEP(blockStartPtr, offset);
102    return iBuilder->CreateLoad(ptr);
103}
104
105
106/**
107 * Set the current extender word up until before the offset position.
108 * extender = .......  (little-endian, LSB on the right)
109 * offset   =    ^
110 * cleared  = ....111
111 */
112void LZ4IndexDecoderKernel::setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
113    // Little-endian, offset counts from LSB
114    // extender = extender ^ ~((1 << offset) -1)
115    Value * extender = iBuilder->CreateLoad(sExtender);
116    Value * wordOffset = iBuilder->CreateZExt(
117            getWordOffset(iBuilder),
118            iBuilder->getSizeTy()
119            );
120    Value * one = iBuilder->getSize(1);
121    Value * mask = iBuilder->CreateSub(
122            iBuilder->CreateShl(one, wordOffset),
123            one);
124    extender = iBuilder->CreateOr(extender, mask);
125    iBuilder->CreateStore(extender, sExtender);
126}
127
128
129/**
130 * Load the extender word at the current offset.
131 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
132 */
133void LZ4IndexDecoderKernel::loadCurrentExtender(const std::unique_ptr<KernelBuilder> & iBuilder) {
134    Value * offset = iBuilder->CreateLoad(sOffset);
135    IntegerType * type = cast<IntegerType>(offset->getType());
136    ConstantInt * shift = ConstantInt::get(type, std::log2(wordWidth));
137    Value * shiftedOffset = iBuilder->CreateLShr(offset, shift);
138    Value * extender = iBuilder->CreateExtractElement(extenders, shiftedOffset);
139    iBuilder->CreateStore(extender, sExtender);
140}
141
142
143void LZ4IndexDecoderKernel::generateProduceOutput(const std::unique_ptr<KernelBuilder> &iBuilder) {
144    Value * producedItem = iBuilder->getProducedItemCount("literalIndexes");
145
146#ifndef NDEBUG
147    iBuilder->CallPrintInt("ProducedItem", producedItem);
148    // LiteralStart is adjusted to be relative to the block start, so that
149    // the output can be compared against that of the reference implementation.
150    Value * literalStart = iBuilder->CreateSub(iBuilder->getScalarField("LiteralStart"), iBuilder->getScalarField("LZ4BlockStart"));
151    iBuilder->CallPrintInt("LiteralStart", literalStart);
152    iBuilder->CallPrintInt("LiteralLength", iBuilder->getScalarField("LiteralLength"));
153    iBuilder->CallPrintInt("MatchOffset", iBuilder->getScalarField("MatchOffset"));
154    iBuilder->CallPrintInt("MatchLength", iBuilder->getScalarField("MatchLength"));
155#endif
156    printRTDebugMsg("--------------");
157
158    Value * outputOffset = iBuilder->CreateAnd(
159            iBuilder->CreateTrunc(producedItem, iBuilder->getInt32Ty()),
160            iBuilder->getInt32(iBuilder->getBitBlockWidth() - 1)
161            );  // producedItem % blockWidth (as blockWidth is always a power of 2)
162    Value * literalStartPtr = getOutputPtr(iBuilder,
163            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
164    Value * literalLengthPtr = getOutputPtr(iBuilder,
165            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
166    Value * matchOffsetPtr = getOutputPtr(iBuilder,
167            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
168    Value * matchLengthPtr = getOutputPtr(iBuilder,
169            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
170    iBuilder->CreateStore(iBuilder->getScalarField("LiteralStart"), literalStartPtr);
171    iBuilder->CreateStore(iBuilder->getScalarField("LiteralLength"), literalLengthPtr);
172    iBuilder->CreateStore(iBuilder->getScalarField("MatchOffset"), matchOffsetPtr);
173    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), matchLengthPtr);
174    iBuilder->setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
175    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
176}
177
178
179void LZ4IndexDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
180    BasicBlock * entry_block = iBuilder->GetInsertBlock();
181    BasicBlock * exit_block = iBuilder->CreateBasicBlock("exit");
182
183    // %entry
184    iBuilder->SetInsertPoint(entry_block);
185    printRTDebugMsg("entry");
186    // Global positions in the byte stream.
187    Value * blockNo = iBuilder->getScalarField("BlockNo");
188    blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
189    extenders = iBuilder->CreateBitCast(
190            iBuilder->loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
191            VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / iBuilder->getSizeTy()->getBitWidth()),
192            "extenders");
193    // Create a series of stack variables which will be promoted by mem2reg.
194    sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
195    // tempLength has different meanings in different states.
196    sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", iBuilder->getScalarField("TempLength"));
197    sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", iBuilder->getScalarField("TempCount"));
198    sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", iBuilder->getScalarField("State"));
199    sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
200            iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
201
202    BasicBlock * skippingBytes = iBuilder->CreateBasicBlock("skipping_bytes");
203    BasicBlock * dispatch = iBuilder->CreateBasicBlock("dispatch");
204
205    iBuilder->CreateCondBr(
206            iBuilder->CreateICmpUGT(iBuilder->getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
207            skippingBytes, dispatch
208            );
209
210    // %skipping_bytes
211    generateSkippingBytes(iBuilder, skippingBytes, exit_block);
212    // Insert point is at the end of skippingBytes.
213    iBuilder->CreateBr(dispatch);
214
215    // %dispatch
216    // Indirect branching will be added to %dispatch at last.
217
218    // %at_block_checksum
219    BasicBlock * atBlockChecksum = iBuilder->CreateBasicBlock("at_block_checksum");
220    generateAtBlockChecksum(iBuilder, atBlockChecksum, skippingBytes);
221 
222    // %at_block_size
223    BasicBlock * atBlockSize = iBuilder->CreateBasicBlock("at_block_size");
224    generateAtBlockSize(iBuilder, atBlockSize, skippingBytes, exit_block);
225
226    // %at_token
227    BasicBlock * atToken = iBuilder->CreateBasicBlock("at_token");
228    generateAtToken(iBuilder, atToken, exit_block);
229
230    // %extending_literal_length
231    BasicBlock * extendingLiteralLen = iBuilder->CreateBasicBlock("extending_literal_length");
232    generateExtendingLiteralLen(iBuilder, extendingLiteralLen, exit_block);
233
234    // %at_literals
235    BasicBlock * atLiterals = iBuilder->CreateBasicBlock("at_literals");
236    generateAtLiterals(iBuilder, atLiterals);
237    iBuilder->CreateBr(skippingBytes);
238
239    // %at_first_offset
240    // Note that the last sequence is incomplete and ends with literals.
241    // If the whole LZ4 block is done, process the (optional) checksum.
242    // Otherwise, go around to process the next sequence.
243    BasicBlock * atOffset1 = iBuilder->CreateBasicBlock("at_first_offset");
244    iBuilder->SetInsertPoint(atOffset1);
245    Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
246    BasicBlock * blockEnd_else = iBuilder->CreateBasicBlock("block_end_else");
247    // Conditional branch inserted at the end of the last block.
248    iBuilder->CreateUnlikelyCondBr(
249            iBuilder->CreateICmpEQ(nowGlobalPos, iBuilder->getScalarField("LZ4BlockEnd")),
250            atBlockChecksum, blockEnd_else
251            );
252    generateAtFirstOffset(iBuilder, blockEnd_else, exit_block);
253
254    // %at_second_offset
255    BasicBlock * atOffset2 = iBuilder->CreateBasicBlock("at_second_offset");
256    generateAtSecondOffset(iBuilder, atOffset2, exit_block);
257
258    // %extending_match_length
259    BasicBlock * extendingMatchLen = iBuilder->CreateBasicBlock("extending_match_length");
260    generateExtendingMatchLen(iBuilder, extendingMatchLen, exit_block);
261    iBuilder->CreateBr(atToken);
262
263    // Indirect branching.
264    iBuilder->SetInsertPoint(dispatch);
265    printRTDebugMsg("dispatch");
266    // The order must comply with enum State.
267    Constant * labels = ConstantVector::get(
268            {BlockAddress::get(atBlockSize), BlockAddress::get(atToken), BlockAddress::get(extendingLiteralLen), BlockAddress::get(atLiterals),
269             BlockAddress::get(atOffset1), BlockAddress::get(atOffset2), BlockAddress::get(extendingMatchLen), BlockAddress::get(atBlockChecksum)}
270            );
271    Value * target = iBuilder->CreateExtractElement(labels, iBuilder->CreateLoad(sState));
272    IndirectBrInst * indirectBr = iBuilder->CreateIndirectBr(target);
273    indirectBr->addDestination(atBlockSize);
274    indirectBr->addDestination(atToken);
275    indirectBr->addDestination(extendingLiteralLen);
276    indirectBr->addDestination(atLiterals);
277    indirectBr->addDestination(atOffset1);
278    indirectBr->addDestination(atOffset2);
279    indirectBr->addDestination(extendingMatchLen);
280    indirectBr->addDestination(atBlockChecksum);
281
282    // %exit
283    iBuilder->SetInsertPoint(exit_block);
284    printRTDebugMsg("exit");
285    iBuilder->setScalarField("State", iBuilder->CreateLoad(sState));
286    iBuilder->setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
287    iBuilder->setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
288    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
289    // When the kernel builder uses indirectbr, doBlock is not a separate function.
290    // Hence, we branch to a new basic block and fall through instead of returning.
291    BasicBlock * end_block = iBuilder->CreateBasicBlock("end_of_block");
292    iBuilder->CreateBr(end_block);
293    iBuilder->SetInsertPoint(end_block);
294}
295
296
297void LZ4IndexDecoderKernel::generateBoundaryDetection(const std::unique_ptr<KernelBuilder> & iBuilder, State state, BasicBlock * exit_block, bool updateExtenderWord) {
298    if (updateExtenderWord) {
299        BasicBlock * wordBoundary_then = iBuilder->CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
300        BasicBlock * blockBoundary_else = iBuilder->CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
301        BasicBlock * wordBoundary_cont = iBuilder->CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
302        iBuilder->CreateUnlikelyCondBr(
303                iBuilder->CreateICmpEQ(getWordOffset(iBuilder), iBuilder->getInt32(0)),
304                wordBoundary_then, wordBoundary_cont
305                );
306
307        iBuilder->SetInsertPoint(wordBoundary_then);
308        iBuilder->CreateUnlikelyCondBr(
309                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
310                exit_block, blockBoundary_else
311                );
312
313        // Reaching word boundary but not block boundary.  Update the extender word as requested.
314        iBuilder->SetInsertPoint(blockBoundary_else);
315        loadCurrentExtender(iBuilder);
316        iBuilder->CreateBr(wordBoundary_cont);
317
318        // Leave the insert point at the end and return.
319        iBuilder->SetInsertPoint(wordBoundary_cont);
320    } else {
321        BasicBlock * blockBoundary_cont = iBuilder->CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
322        iBuilder->CreateUnlikelyCondBr(
323                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
324                exit_block, blockBoundary_cont
325                );
326        // Leave the insert point at the end and return.
327        iBuilder->SetInsertPoint(blockBoundary_cont);
328    }
329}
330
331
332void LZ4IndexDecoderKernel::generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
333    iBuilder->SetInsertPoint(bb);
334    printRTDebugMsg("skipping bytes");
335
336    Value * remainingBytesInBlock = iBuilder->CreateSub(
337            iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
338            );
339    Value * remainingBytesToSkip = iBuilder->getScalarField("BytesToSkip");
340    Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
341    remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
342    incStackVar(iBuilder, sOffset, advanceDist);
343    iBuilder->setScalarField("BytesToSkip", remainingBytesToSkip);
344
345    generateBoundaryDetection(iBuilder, State::SKIPPING_BYTES, exit_block);
346    // Falls through.
347}
348
349
350void LZ4IndexDecoderKernel::generateAtBlockSize(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
351    iBuilder->CreateBr(bb);
352    iBuilder->SetInsertPoint(bb);
353    printRTDebugMsg("scanning block size");
354    printGlobalPos();
355
356    // Use tempLength to hold the block size temporarily.
357    // Note that it is initially stored as big-endian (for the ease of reading) and will be "swapped" later.
358    // Use tempCount as the loop counter (0..3).
359    // Both variables are initialized from kernel states at %entry.
360
361    // A do-while loop.
362    BasicBlock * loopBody = iBuilder->CreateBasicBlock("blocksize_loop_body");
363    BasicBlock * loopExit = iBuilder->CreateBasicBlock("blocksize_loop_exit");
364    iBuilder->CreateBr(loopBody);
365
366    iBuilder->SetInsertPoint(loopBody);
367    Value * byte = loadRawByte(iBuilder);
368    Value * newTempLength = iBuilder->CreateAdd(
369            iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
370            iBuilder->CreateZExt(byte, iBuilder->getInt32Ty())
371            );
372    iBuilder->CreateStore(newTempLength, sTempLength);
373    incStackVar(iBuilder, sTempCount);
374    incStackVar(iBuilder, sOffset);
375    // Stop when we read all four bytes or reach the end of the block.
376    iBuilder->CreateCondBr(
377            iBuilder->CreateOr(
378                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
379                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth()))
380                ),
381            loopExit, loopBody
382            );
383
384    iBuilder->SetInsertPoint(loopExit);
385    BasicBlock * blockSizeCompleted_then = iBuilder->CreateBasicBlock("blocksize_completed_then");
386    BasicBlock * blockSizeCompleted_cont = iBuilder->CreateBasicBlock("blocksize_completed_cont");
387    iBuilder->CreateLikelyCondBr(
388            iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
389            blockSizeCompleted_then, blockSizeCompleted_cont
390            );
391
392    // All four bytes of the block size are read in.
393    iBuilder->SetInsertPoint(blockSizeCompleted_then);
394    // Remember to swap the block size back to little-endian.
395    Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
396    Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
397    iBuilder->setScalarField("LZ4BlockStart", currentPos);
398    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
399    printRTDebugInt("blockSize", blockSize);
400
401    BasicBlock * uncompressedBlock_then = iBuilder->CreateBasicBlock("uncompressed_block_then");
402    BasicBlock * uncompressedBlock_else = iBuilder->CreateBasicBlock("uncompressed_block_cont");
403    iBuilder->CreateUnlikelyCondBr(
404            iBuilder->CreateTrunc(
405                iBuilder->CreateLShr(blockSize, iBuilder->getInt32(31)),
406                iBuilder->getInt1Ty()
407                ),
408            uncompressedBlock_then,
409            uncompressedBlock_else
410            );
411
412    iBuilder->SetInsertPoint(uncompressedBlock_then);
413    Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
414    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
415    iBuilder->setScalarField("BytesToSkip", realBlockSize);
416    iBuilder->setScalarField("LiteralStart", currentPos);
417    iBuilder->setScalarField("LiteralLength", realBlockSize);
418    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
419    // because %atBlockChecksum will do so as the last sequence.
420    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_CHECKSUM), sState);
421    iBuilder->CreateBr(skippingBytes);
422
423    iBuilder->SetInsertPoint(uncompressedBlock_else);
424    // Reset these temporary values for later use.
425    iBuilder->CreateStore(iBuilder->getInt32(0), sTempLength);
426    iBuilder->CreateStore(iBuilder->getInt32(0), sTempCount);
427    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
428    // A block size of 0 is the end mark of the frame. Exit.
429    iBuilder->CreateUnlikelyCondBr(
430            iBuilder->CreateICmpEQ(blockSize, ConstantInt::getNullValue(blockSize->getType())),
431            exit_block,
432            blockSizeCompleted_cont
433            );
434
435    // We could be at the boundary no matter the block size is completed or not.
436    iBuilder->SetInsertPoint(blockSizeCompleted_cont);
437    generateBoundaryDetection(iBuilder, State::AT_BLOCK_SIZE, exit_block);
438    // Falls through to %at_token.
439}
440
441
442void LZ4IndexDecoderKernel::generateAtToken(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
443    iBuilder->CreateBr(bb);
444    iBuilder->SetInsertPoint(bb);
445    printRTDebugMsg("reading token");
446
447    Value * token = loadRawByte(iBuilder);
448    Value * literalLen = iBuilder->CreateZExt(
449        iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
450        iBuilder->getInt32Ty()
451        );
452    Value * matchLen = iBuilder->CreateZExt(
453        iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)),
454        iBuilder->getInt32Ty()
455        );
456    incStackVar(iBuilder, sOffset);
457    // Prepare extender word for scanning.
458    loadCurrentExtender(iBuilder);
459    setExtenderUntilOffset(iBuilder);
460    // Store the (partial) match length to be extended later.
461    iBuilder->setScalarField("MatchLength", matchLen);
462    // Use tempLength to accumulate extended lengths (until at_literals).
463    iBuilder->CreateStore(literalLen, sTempLength);
464    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
465
466    generateBoundaryDetection(iBuilder, State::AT_TOKEN, exit_block);
467    // Falls through to %extending_literal_length.
468}
469
470
471void LZ4IndexDecoderKernel::generateExtendingLiteralLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
472    iBuilder->CreateBr(bb);
473    iBuilder->SetInsertPoint(bb);
474    printRTDebugMsg("extending literal len");
475
476    Value * wordOffset = getWordOffset(iBuilder);
477    Value * blockOffset = getWordStartOffset(iBuilder);
478    Value * literalLen = iBuilder->CreateLoad(sTempLength);
479    Value * literalExtEnd = iBuilder->CreateTrunc(
480                iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
481                iBuilder->getInt32Ty());
482    printRTDebugInt("wordOffset", wordOffset);
483    printRTDebugInt("literalExtEnd", literalExtEnd);
484    // number of extender = literalExtEnd - wordOffset
485    Value * numExtenders = iBuilder->CreateSub(literalExtEnd, wordOffset);
486    Value * literalExtReachBoundary =
487            iBuilder->CreateICmpEQ(literalExtEnd, iBuilder->getInt32(wordWidth));
488    // There are literalExtEnd forward zeroes, we load bytes[literalExtEnd]
489    // which is the first non-extender.  If literalExtEnd == 64, we force the
490    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
491    Value * loadOffset = iBuilder->CreateSelect(literalExtReachBoundary,
492            ConstantInt::getNullValue(literalExtEnd->getType()),
493            literalExtEnd);
494    Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
495            iBuilder->getInt8(0),
496            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
497    Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
498    literalLen = iBuilder->CreateSelect(literalLenExted,
499            iBuilder->CreateAdd(
500                literalLen,
501                iBuilder->CreateAdd(
502                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
503                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
504                    )
505                ),      // literalLen + numExtenders * 255
506            literalLen);
507    wordOffset = iBuilder->CreateSelect(literalLenExted,
508            literalExtEnd,
509            wordOffset);
510    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
511    wordOffset = iBuilder->CreateSelect(
512            iBuilder->CreateAnd(literalLenExted, iBuilder->CreateNot(literalExtReachBoundary)),
513            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
514            wordOffset
515            );
516    iBuilder->CreateStore(literalLen, sTempLength);
517    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
518    Value * unfinished = iBuilder->CreateAnd(literalExtReachBoundary, literalLenExted);
519    Value * newState = iBuilder->CreateSelect(unfinished,
520            iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH),
521            iBuilder->getInt8(State::AT_LITERALS));
522    iBuilder->CreateStore(newState, sState);
523
524    generateBoundaryDetection(iBuilder, State::EXTENDING_LITERAL_LENGTH, exit_block, true);
525    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
526    // Insert point is still in wordBoundary block now.
527    // See if there are still more extenders.
528    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
529
530    iBuilder->SetInsertPoint(cont_block);
531    // Falls through to %at_literals.
532}
533
534
535void LZ4IndexDecoderKernel::generateAtLiterals(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb) {
536    iBuilder->CreateBr(bb);
537    iBuilder->SetInsertPoint(bb);
538
539    iBuilder->setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
540    iBuilder->setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
541    iBuilder->setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
542    iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
543
544    // No boundary detection here as we do not advance the cursor.
545    // Control flow will be redirected to %skipping_bytes later.
546}
547
548
549void LZ4IndexDecoderKernel::generateAtFirstOffset(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
550    iBuilder->SetInsertPoint(bb);
551    printRTDebugMsg("reading first offset");
552
553    Value * byte = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
554    // Use tempLength to store partial offset.
555    iBuilder->CreateStore(byte, sTempLength);
556    incStackVar(iBuilder, sOffset);
557    iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
558
559    generateBoundaryDetection(iBuilder, State::AT_FIRST_OFFSET, exit_block);
560    // Falls through to %at_second_offset.
561}
562
563
564void LZ4IndexDecoderKernel::generateAtSecondOffset(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
565    iBuilder->CreateBr(bb);
566    iBuilder->SetInsertPoint(bb);
567    printRTDebugMsg("reading second offset");
568
569    Value * byte1 = iBuilder->CreateLoad(sTempLength);
570    Value * byte2 = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
571    Value * offset = iBuilder->CreateAdd(
572            iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
573            byte1
574            );
575    iBuilder->setScalarField("MatchOffset", offset);
576    incStackVar(iBuilder, sOffset);
577    // Prepare extender word and tempLength for extending.
578    loadCurrentExtender(iBuilder);
579    setExtenderUntilOffset(iBuilder);
580    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), sTempLength);
581    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
582
583    generateBoundaryDetection(iBuilder, State::AT_SECOND_OFFSET, exit_block);
584    // Falls through to %extending_match_length.
585}
586
587
588void LZ4IndexDecoderKernel::generateExtendingMatchLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
589    iBuilder->CreateBr(bb);
590    iBuilder->SetInsertPoint(bb);
591    printRTDebugMsg("extending match length");
592    printGlobalPos();
593    printRTDebugInt("rawbyte", loadRawByte(iBuilder));
594    printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
595
596    Value * wordOffset = getWordOffset(iBuilder);
597    Value * blockOffset = getWordStartOffset(iBuilder);
598    Value * matchLen = iBuilder->CreateLoad(sTempLength);
599    Value * matchExtEnd = iBuilder->CreateTrunc(
600        iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
601        iBuilder->getInt32Ty()
602        );
603    printRTDebugInt("wordoffset", wordOffset);
604    printRTDebugInt("matchExtEnd", matchExtEnd);
605    // number of extender = matchExtEnd - wordOffset
606    Value * numExtenders = iBuilder->CreateSub(matchExtEnd, wordOffset);
607    Value * matchExtReachBoundary = 
608            iBuilder->CreateICmpEQ(matchExtEnd, iBuilder->getInt32(wordWidth));
609    // There are matchExtEnd forward zeroes, we load bytes[matchExtEnd]
610    // which is the first non-extender.  If matchExtEnd == 64, we force the
611    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
612    Value * loadOffset = iBuilder->CreateSelect(matchExtReachBoundary,
613            ConstantInt::getNullValue(matchExtEnd->getType()),
614            matchExtEnd);
615    Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
616            iBuilder->getInt8(0),
617            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
618    Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
619    matchLen = iBuilder->CreateSelect(matchLenExted,
620            iBuilder->CreateAdd(
621                matchLen,
622                iBuilder->CreateAdd(
623                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
624                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
625                    )
626                ),      // matchLen + numExtenders * 255
627            matchLen);
628    wordOffset = iBuilder->CreateSelect(matchLenExted,
629            matchExtEnd,
630            wordOffset);
631    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
632    wordOffset = iBuilder->CreateSelect(
633            iBuilder->CreateAnd(matchLenExted, iBuilder->CreateNot(matchExtReachBoundary)),
634            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
635            wordOffset
636            );
637    iBuilder->CreateStore(matchLen, sTempLength);
638    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
639
640    Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
641    BasicBlock * output_then = iBuilder->CreateBasicBlock("output_then");
642    BasicBlock * output_cont = iBuilder->CreateBasicBlock("output_cont");
643    iBuilder->CreateLikelyCondBr(
644            iBuilder->CreateNot(unfinished),
645            output_then, output_cont
646            );
647    iBuilder->SetInsertPoint(output_then);
648    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
649    matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
650    iBuilder->setScalarField("MatchLength", matchLen);
651    generateProduceOutput(iBuilder);
652    iBuilder->CreateBr(output_cont);
653
654    iBuilder->SetInsertPoint(output_cont);
655    generateBoundaryDetection(iBuilder, State::EXTENDING_MATCH_LENGTH, exit_block, true);
656    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
657    // Insert point is still in wordBoundary block now.
658    // See if there are still more extenders.
659    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
660
661    iBuilder->SetInsertPoint(cont_block);
662}
663
664
665void LZ4IndexDecoderKernel::generateAtBlockChecksum(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * skippingBytes) {
666    // No branch here as we have made a conditional branch outside.
667    iBuilder->SetInsertPoint(bb);
668    printRTDebugMsg("processing block checksum");
669
670    // Produce the partial output (fill matchIndexes with 0).
671    iBuilder->setScalarField("MatchOffset", iBuilder->getInt32(0));
672    iBuilder->setScalarField("MatchLength", iBuilder->getInt32(0));
673    generateProduceOutput(iBuilder);
674
675    BasicBlock * hasChecksum_then = iBuilder->CreateBasicBlock("has_checksum_then");
676    BasicBlock * hasChecksum_cont = iBuilder->CreateBasicBlock("has_checksum_cont");
677
678    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
679    iBuilder->CreateCondBr(iBuilder->getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
680
681    iBuilder->SetInsertPoint(hasChecksum_then);
682    iBuilder->setScalarField("BytesToSkip", iBuilder->getInt32(4));
683    iBuilder->CreateBr(skippingBytes);
684    // Boundary detection will be done in skipping_bytes.
685
686    iBuilder->SetInsertPoint(hasChecksum_cont);
687    // No checksum, offset not advanced.  Falls through to the next block (block_size).
688}
689
690LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
691: BlockOrientedKernel("lz4IndexDecoder",
692    // Inputs
693    {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
694     Binding{iBuilder->getStreamSetTy(1, 1), "extenders"}},
695    // Outputs: literal start, literal length, match offset, match length
696    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
697     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes", FixedRatio(1, 1, "literalIndexes")}},
698    // Arguments
699    {Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"}},
700    {},
701    // Internal states:
702    {Binding{iBuilder->getInt32Ty(), "BlockNo"},
703     Binding{iBuilder->getInt8Ty(), "State"},
704     Binding{iBuilder->getInt32Ty(), "LZ4BlockStart"},
705     Binding{iBuilder->getInt32Ty(), "LZ4BlockEnd"},
706     Binding{iBuilder->getInt32Ty(), "BytesToSkip"},
707     Binding{iBuilder->getInt32Ty(), "TempLength"},
708     Binding{iBuilder->getInt32Ty(), "TempCount"},
709     Binding{iBuilder->getInt32Ty(), "LiteralStart"},
710     Binding{iBuilder->getInt32Ty(), "LiteralLength"},
711     Binding{iBuilder->getInt32Ty(), "MatchOffset"},
712     Binding{iBuilder->getInt32Ty(), "MatchLength"}})
713, wordWidth{iBuilder->getSizeTy()->getBitWidth()} {
714    setNoTerminateAttribute(true);
715}
Note: See TracBrowser for help on using the repository browser.