source: icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp @ 5446

Last change on this file since 5446 was 5440, checked in by nmedfort, 2 years ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 33.1 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_index_decoder.h"
8#include <kernels/kernel_builder.h>
9 
10using namespace llvm;
11using namespace kernel;
12
13#ifndef NDEBUG
14#define DEBUG_RT_PRINT 1
15#else
16#define DEBUG_RT_PRINT 0
17#endif
18
19#define printRTDebugMsg(MSG) \
20    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintMsgToStderr(MSG); } while (0)
21
22#define printRTDebugInt(NAME, X) \
23    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintIntToStderr(NAME, X); } while (0)
24
25#define printGlobalPos() \
26    printRTDebugInt("GlobalPos", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)))
27
28namespace {
29
30Value * generateBitswap(const std::unique_ptr<KernelBuilder> & iBuilder, Value * v) {
31    Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
32            Intrinsic::bswap, v->getType());
33    return iBuilder->CreateCall(bswapFunc, {v});
34}
35
36Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
37    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
38}
39
40Value * createStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
41    Value * var = iBuilder->CreateAlloca(type, nullptr, name);
42    if (initializer) {
43        iBuilder->CreateStore(initializer, var);
44    } else {
45        iBuilder->CreateStore(ConstantInt::get(type, 0), var);
46    }
47    return var;
48}
49
50void incStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Value * svar, Value * increment = nullptr) {
51    Value * value = iBuilder->CreateLoad(svar);
52    if (increment) {
53        value = iBuilder->CreateAdd(value, increment);
54    } else {
55        value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
56    }
57    iBuilder->CreateStore(value, svar);
58}
59
60Value * getOutputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
61    return iBuilder->CreateGEP(
62            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
63            offset
64            );
65}
66
67}       // anonymouse namespace
68
69/**
70 * Get the offset within the current word.
71 */
72Value * LZ4IndexDecoderKernel::getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
73    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
74    return iBuilder->CreateAnd(
75            iBuilder->CreateLoad(sOffset),
76            wordWidthMask
77            );
78}
79
80
81/**
82 * Get the offset of the start of the current word.
83 */
84Value * LZ4IndexDecoderKernel::getWordStartOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
85    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
86    return iBuilder->CreateAnd(
87            iBuilder->CreateLoad(sOffset),
88            iBuilder->CreateNot(wordWidthMask)
89            );
90}
91
92
93/**
94 * Load a raw byte from byteStream.
95 * If offset is not provided, load the current byte by default.
96 */
97Value * LZ4IndexDecoderKernel::loadRawByte(const std::unique_ptr<KernelBuilder> & iBuilder, Value * offset) {
98    Value * blockStartPtr = iBuilder->CreatePointerCast(
99            iBuilder->getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
100            iBuilder->getInt8PtrTy()
101            );
102    if (offset == nullptr) {
103        offset = iBuilder->CreateLoad(sOffset);
104    }
105    Value * ptr = iBuilder->CreateGEP(blockStartPtr, offset);
106    return iBuilder->CreateLoad(ptr);
107}
108
109
110/**
111 * Set the current extender word up until before the offset position.
112 * extender = .......  (little-endian, LSB on the right)
113 * offset   =    ^
114 * cleared  = ....111
115 */
116void LZ4IndexDecoderKernel::setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
117    // Little-endian, offset counts from LSB
118    // extender = extender ^ ~((1 << offset) -1)
119    Value * extender = iBuilder->CreateLoad(sExtender);
120    Value * wordOffset = iBuilder->CreateZExt(
121            getWordOffset(iBuilder),
122            iBuilder->getSizeTy()
123            );
124    Value * one = iBuilder->getSize(1);
125    Value * mask = iBuilder->CreateSub(
126            iBuilder->CreateShl(one, wordOffset),
127            one);
128    extender = iBuilder->CreateOr(extender, mask);
129    iBuilder->CreateStore(extender, sExtender);
130}
131
132
133/**
134 * Load the extender word at the current offset.
135 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
136 */
137void LZ4IndexDecoderKernel::loadCurrentExtender(const std::unique_ptr<KernelBuilder> & iBuilder) {
138    iBuilder->CreateStore(
139            iBuilder->CreateExtractElement(extenders,
140                iBuilder->CreateLShr(
141                    iBuilder->CreateLoad(sOffset),
142                    iBuilder->getInt32(std::log2(wordWidth))
143                    )
144                ),
145            sExtender);
146}
147
148
149void LZ4IndexDecoderKernel::generateProduceOutput(const std::unique_ptr<KernelBuilder> &iBuilder) {
150    Value * producedItem = iBuilder->getProducedItemCount("literalIndexes");
151
152#ifndef NDEBUG
153    iBuilder->CallPrintInt("ProducedItem", producedItem);
154    // LiteralStart is adjusted to be relative to the block start, so that
155    // the output can be compared against that of the reference implementation.
156    Value * literalStart = iBuilder->CreateSub(iBuilder->getScalarField("LiteralStart"), iBuilder->getScalarField("LZ4BlockStart"));
157    iBuilder->CallPrintInt("LiteralStart", literalStart);
158    iBuilder->CallPrintInt("LiteralLength", iBuilder->getScalarField("LiteralLength"));
159    iBuilder->CallPrintInt("MatchOffset", iBuilder->getScalarField("MatchOffset"));
160    iBuilder->CallPrintInt("MatchLength", iBuilder->getScalarField("MatchLength"));
161#endif
162    printRTDebugMsg("--------------");
163
164    Value * outputOffset = iBuilder->CreateAnd(
165            iBuilder->CreateTrunc(producedItem, iBuilder->getInt32Ty()),
166            iBuilder->getInt32(iBuilder->getBitBlockWidth() - 1)
167            );  // producedItem % blockWidth (as blockWidth is always a power of 2)
168    Value * literalStartPtr = getOutputPtr(iBuilder,
169            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
170    Value * literalLengthPtr = getOutputPtr(iBuilder,
171            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
172    Value * matchOffsetPtr = getOutputPtr(iBuilder,
173            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
174    Value * matchLengthPtr = getOutputPtr(iBuilder,
175            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
176    iBuilder->CreateStore(iBuilder->getScalarField("LiteralStart"), literalStartPtr);
177    iBuilder->CreateStore(iBuilder->getScalarField("LiteralLength"), literalLengthPtr);
178    iBuilder->CreateStore(iBuilder->getScalarField("MatchOffset"), matchOffsetPtr);
179    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), matchLengthPtr);
180    iBuilder->setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
181    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
182}
183
184
185void LZ4IndexDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
186    BasicBlock * entry_block = iBuilder->GetInsertBlock();
187    BasicBlock * exit_block = iBuilder->CreateBasicBlock("exit");
188
189    // %entry
190    iBuilder->SetInsertPoint(entry_block);
191    printRTDebugMsg("entry");
192    // Global positions in the byte stream.
193    Value * blockNo = iBuilder->getScalarField("BlockNo");
194    blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
195    extenders = iBuilder->CreateBitCast(
196            iBuilder->loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
197            VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / wordWidth),
198            "extenders");
199    // Create a series of stack variables which will be promoted by mem2reg.
200    sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
201    // tempLength has different meanings in different states.
202    sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", iBuilder->getScalarField("TempLength"));
203    sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", iBuilder->getScalarField("TempCount"));
204    sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", iBuilder->getScalarField("State"));
205    sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
206            iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
207
208    BasicBlock * skippingBytes = iBuilder->CreateBasicBlock("skipping_bytes");
209    BasicBlock * dispatch = iBuilder->CreateBasicBlock("dispatch");
210
211    iBuilder->CreateCondBr(
212            iBuilder->CreateICmpUGT(iBuilder->getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
213            skippingBytes, dispatch
214            );
215
216    // %skipping_bytes
217    generateSkippingBytes(iBuilder, skippingBytes, exit_block);
218    // Insert point is at the end of skippingBytes.
219    iBuilder->CreateBr(dispatch);
220
221    // %dispatch
222    // Indirect branching will be added to %dispatch at last.
223
224    // %at_block_checksum
225    BasicBlock * atBlockChecksum = iBuilder->CreateBasicBlock("at_block_checksum");
226    generateAtBlockChecksum(iBuilder, atBlockChecksum, skippingBytes);
227 
228    // %at_block_size
229    BasicBlock * atBlockSize = iBuilder->CreateBasicBlock("at_block_size");
230    generateAtBlockSize(iBuilder, atBlockSize, skippingBytes, exit_block);
231
232    // %at_token
233    BasicBlock * atToken = iBuilder->CreateBasicBlock("at_token");
234    generateAtToken(iBuilder, atToken, exit_block);
235
236    // %extending_literal_length
237    BasicBlock * extendingLiteralLen = iBuilder->CreateBasicBlock("extending_literal_length");
238    generateExtendingLiteralLen(iBuilder, extendingLiteralLen, exit_block);
239
240    // %at_literals
241    BasicBlock * atLiterals = iBuilder->CreateBasicBlock("at_literals");
242    generateAtLiterals(iBuilder, atLiterals);
243    iBuilder->CreateBr(skippingBytes);
244
245    // %at_first_offset
246    // Note that the last sequence is incomplete and ends with literals.
247    // If the whole LZ4 block is done, process the (optional) checksum.
248    // Otherwise, go around to process the next sequence.
249    BasicBlock * atOffset1 = iBuilder->CreateBasicBlock("at_first_offset");
250    iBuilder->SetInsertPoint(atOffset1);
251    Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
252    BasicBlock * blockEnd_else = iBuilder->CreateBasicBlock("block_end_else");
253    // Conditional branch inserted at the end of the last block.
254    iBuilder->CreateUnlikelyCondBr(
255            iBuilder->CreateICmpEQ(nowGlobalPos, iBuilder->getScalarField("LZ4BlockEnd")),
256            atBlockChecksum, blockEnd_else
257            );
258    generateAtFirstOffset(iBuilder, blockEnd_else, exit_block);
259
260    // %at_second_offset
261    BasicBlock * atOffset2 = iBuilder->CreateBasicBlock("at_second_offset");
262    generateAtSecondOffset(iBuilder, atOffset2, exit_block);
263
264    // %extending_match_length
265    BasicBlock * extendingMatchLen = iBuilder->CreateBasicBlock("extending_match_length");
266    generateExtendingMatchLen(iBuilder, extendingMatchLen, exit_block);
267    iBuilder->CreateBr(atToken);
268
269    // Indirect branching.
270    iBuilder->SetInsertPoint(dispatch);
271    printRTDebugMsg("dispatch");
272    // The order must comply with enum State.
273    Constant * labels = ConstantVector::get(
274            {BlockAddress::get(atBlockSize), BlockAddress::get(atToken), BlockAddress::get(extendingLiteralLen), BlockAddress::get(atLiterals),
275             BlockAddress::get(atOffset1), BlockAddress::get(atOffset2), BlockAddress::get(extendingMatchLen), BlockAddress::get(atBlockChecksum)}
276            );
277    Value * target = iBuilder->CreateExtractElement(labels, iBuilder->CreateLoad(sState));
278    IndirectBrInst * indirectBr = iBuilder->CreateIndirectBr(target);
279    indirectBr->addDestination(atBlockSize);
280    indirectBr->addDestination(atToken);
281    indirectBr->addDestination(extendingLiteralLen);
282    indirectBr->addDestination(atLiterals);
283    indirectBr->addDestination(atOffset1);
284    indirectBr->addDestination(atOffset2);
285    indirectBr->addDestination(extendingMatchLen);
286    indirectBr->addDestination(atBlockChecksum);
287
288    // %exit
289    iBuilder->SetInsertPoint(exit_block);
290    printRTDebugMsg("exit");
291    iBuilder->setScalarField("State", iBuilder->CreateLoad(sState));
292    iBuilder->setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
293    iBuilder->setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
294    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
295    // When the kernel builder uses indirectbr, doBlock is not a separate function.
296    // Hence, we branch to a new basic block and fall through instead of returning.
297    BasicBlock * end_block = iBuilder->CreateBasicBlock("end_of_block");
298    iBuilder->CreateBr(end_block);
299    iBuilder->SetInsertPoint(end_block);
300}
301
302
303void LZ4IndexDecoderKernel::generateBoundaryDetection(const std::unique_ptr<KernelBuilder> & iBuilder, State state, BasicBlock * exit_block, bool updateExtenderWord) {
304    if (updateExtenderWord) {
305        BasicBlock * wordBoundary_then = iBuilder->CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
306        BasicBlock * blockBoundary_else = iBuilder->CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
307        BasicBlock * wordBoundary_cont = iBuilder->CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
308        iBuilder->CreateUnlikelyCondBr(
309                iBuilder->CreateICmpEQ(getWordOffset(iBuilder), iBuilder->getInt32(0)),
310                wordBoundary_then, wordBoundary_cont
311                );
312
313        iBuilder->SetInsertPoint(wordBoundary_then);
314        iBuilder->CreateUnlikelyCondBr(
315                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
316                exit_block, blockBoundary_else
317                );
318
319        // Reaching word boundary but not block boundary.  Update the extender word as requested.
320        iBuilder->SetInsertPoint(blockBoundary_else);
321        loadCurrentExtender(iBuilder);
322        iBuilder->CreateBr(wordBoundary_cont);
323
324        // Leave the insert point at the end and return.
325        iBuilder->SetInsertPoint(wordBoundary_cont);
326    } else {
327        BasicBlock * blockBoundary_cont = iBuilder->CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
328        iBuilder->CreateUnlikelyCondBr(
329                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
330                exit_block, blockBoundary_cont
331                );
332        // Leave the insert point at the end and return.
333        iBuilder->SetInsertPoint(blockBoundary_cont);
334    }
335}
336
337
338void LZ4IndexDecoderKernel::generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
339    iBuilder->SetInsertPoint(bb);
340    printRTDebugMsg("skipping bytes");
341
342    Value * remainingBytesInBlock = iBuilder->CreateSub(
343            iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
344            );
345    Value * remainingBytesToSkip = iBuilder->getScalarField("BytesToSkip");
346    Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
347    remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
348    incStackVar(iBuilder, sOffset, advanceDist);
349    iBuilder->setScalarField("BytesToSkip", remainingBytesToSkip);
350
351    generateBoundaryDetection(iBuilder, State::SKIPPING_BYTES, exit_block);
352    // Falls through.
353}
354
355
356void LZ4IndexDecoderKernel::generateAtBlockSize(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
357    iBuilder->CreateBr(bb);
358    iBuilder->SetInsertPoint(bb);
359    printRTDebugMsg("scanning block size");
360    printGlobalPos();
361
362    // Use tempLength to hold the block size temporarily.
363    // Note that it is initially stored as big-endian (for the ease of reading) and will be "swapped" later.
364    // Use tempCount as the loop counter (0..3).
365    // Both variables are initialized from kernel states at %entry.
366
367    // A do-while loop.
368    BasicBlock * loopBody = iBuilder->CreateBasicBlock("blocksize_loop_body");
369    BasicBlock * loopExit = iBuilder->CreateBasicBlock("blocksize_loop_exit");
370    iBuilder->CreateBr(loopBody);
371
372    iBuilder->SetInsertPoint(loopBody);
373    Value * byte = loadRawByte(iBuilder);
374    Value * newTempLength = iBuilder->CreateAdd(
375            iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
376            iBuilder->CreateZExt(byte, iBuilder->getInt32Ty())
377            );
378    iBuilder->CreateStore(newTempLength, sTempLength);
379    incStackVar(iBuilder, sTempCount);
380    incStackVar(iBuilder, sOffset);
381    // Stop when we read all four bytes or reach the end of the block.
382    iBuilder->CreateCondBr(
383            iBuilder->CreateOr(
384                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
385                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth()))
386                ),
387            loopExit, loopBody
388            );
389
390    iBuilder->SetInsertPoint(loopExit);
391    BasicBlock * blockSizeCompleted_then = iBuilder->CreateBasicBlock("blocksize_completed_then");
392    BasicBlock * blockSizeCompleted_cont = iBuilder->CreateBasicBlock("blocksize_completed_cont");
393    iBuilder->CreateLikelyCondBr(
394            iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
395            blockSizeCompleted_then, blockSizeCompleted_cont
396            );
397
398    // All four bytes of the block size are read in.
399    iBuilder->SetInsertPoint(blockSizeCompleted_then);
400    // Remember to swap the block size back to little-endian.
401    Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
402    Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
403    iBuilder->setScalarField("LZ4BlockStart", currentPos);
404    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
405    printRTDebugInt("blockSize", blockSize);
406
407    BasicBlock * uncompressedBlock_then = iBuilder->CreateBasicBlock("uncompressed_block_then");
408    BasicBlock * uncompressedBlock_else = iBuilder->CreateBasicBlock("uncompressed_block_cont");
409    iBuilder->CreateUnlikelyCondBr(
410            iBuilder->CreateTrunc(
411                iBuilder->CreateLShr(blockSize, iBuilder->getInt32(31)),
412                iBuilder->getInt1Ty()
413                ),
414            uncompressedBlock_then,
415            uncompressedBlock_else
416            );
417
418    iBuilder->SetInsertPoint(uncompressedBlock_then);
419    Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
420    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
421    iBuilder->setScalarField("BytesToSkip", realBlockSize);
422    iBuilder->setScalarField("LiteralStart", currentPos);
423    iBuilder->setScalarField("LiteralLength", realBlockSize);
424    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
425    // because %atBlockChecksum will do so as the last sequence.
426    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_CHECKSUM), sState);
427    iBuilder->CreateBr(skippingBytes);
428
429    iBuilder->SetInsertPoint(uncompressedBlock_else);
430    // Reset these temporary values for later use.
431    iBuilder->CreateStore(iBuilder->getInt32(0), sTempLength);
432    iBuilder->CreateStore(iBuilder->getInt32(0), sTempCount);
433    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
434    // A block size of 0 is the end mark of the frame. Exit.
435    iBuilder->CreateUnlikelyCondBr(
436            iBuilder->CreateICmpEQ(blockSize, ConstantInt::getNullValue(blockSize->getType())),
437            exit_block,
438            blockSizeCompleted_cont
439            );
440
441    // We could be at the boundary no matter the block size is completed or not.
442    iBuilder->SetInsertPoint(blockSizeCompleted_cont);
443    generateBoundaryDetection(iBuilder, State::AT_BLOCK_SIZE, exit_block);
444    // Falls through to %at_token.
445}
446
447
448void LZ4IndexDecoderKernel::generateAtToken(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
449    iBuilder->CreateBr(bb);
450    iBuilder->SetInsertPoint(bb);
451    printRTDebugMsg("reading token");
452
453    Value * token = loadRawByte(iBuilder);
454    Value * literalLen = iBuilder->CreateZExt(
455        iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
456        iBuilder->getInt32Ty()
457        );
458    Value * matchLen = iBuilder->CreateZExt(
459        iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)),
460        iBuilder->getInt32Ty()
461        );
462    incStackVar(iBuilder, sOffset);
463    // Prepare extender word for scanning.
464    loadCurrentExtender(iBuilder);
465    setExtenderUntilOffset(iBuilder);
466    // Store the (partial) match length to be extended later.
467    iBuilder->setScalarField("MatchLength", matchLen);
468    // Use tempLength to accumulate extended lengths (until at_literals).
469    iBuilder->CreateStore(literalLen, sTempLength);
470    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
471
472    generateBoundaryDetection(iBuilder, State::AT_TOKEN, exit_block);
473    // Falls through to %extending_literal_length.
474}
475
476
477void LZ4IndexDecoderKernel::generateExtendingLiteralLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
478    iBuilder->CreateBr(bb);
479    iBuilder->SetInsertPoint(bb);
480    printRTDebugMsg("extending literal len");
481
482    Value * wordOffset = getWordOffset(iBuilder);
483    Value * blockOffset = getWordStartOffset(iBuilder);
484    Value * literalLen = iBuilder->CreateLoad(sTempLength);
485    Value * literalExtEnd = iBuilder->CreateTrunc(
486                iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
487                iBuilder->getInt32Ty());
488    printRTDebugInt("wordOffset", wordOffset);
489    printRTDebugInt("literalExtEnd", literalExtEnd);
490    // number of extender = literalExtEnd - wordOffset
491    Value * numExtenders = iBuilder->CreateSub(literalExtEnd, wordOffset);
492    Value * literalExtReachBoundary =
493            iBuilder->CreateICmpEQ(literalExtEnd, iBuilder->getInt32(wordWidth));
494    // There are literalExtEnd forward zeroes, we load bytes[literalExtEnd]
495    // which is the first non-extender.  If literalExtEnd == 64, we force the
496    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
497    Value * loadOffset = iBuilder->CreateSelect(literalExtReachBoundary,
498            ConstantInt::getNullValue(literalExtEnd->getType()),
499            literalExtEnd);
500    Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
501            iBuilder->getInt8(0),
502            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
503    Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
504    literalLen = iBuilder->CreateSelect(literalLenExted,
505            iBuilder->CreateAdd(
506                literalLen,
507                iBuilder->CreateAdd(
508                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
509                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
510                    )
511                ),      // literalLen + numExtenders * 255
512            literalLen);
513    wordOffset = iBuilder->CreateSelect(literalLenExted,
514            literalExtEnd,
515            wordOffset);
516    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
517    wordOffset = iBuilder->CreateSelect(
518            iBuilder->CreateAnd(literalLenExted, iBuilder->CreateNot(literalExtReachBoundary)),
519            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
520            wordOffset
521            );
522    iBuilder->CreateStore(literalLen, sTempLength);
523    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
524    Value * unfinished = iBuilder->CreateAnd(literalExtReachBoundary, literalLenExted);
525    Value * newState = iBuilder->CreateSelect(unfinished,
526            iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH),
527            iBuilder->getInt8(State::AT_LITERALS));
528    iBuilder->CreateStore(newState, sState);
529
530    generateBoundaryDetection(iBuilder, State::EXTENDING_LITERAL_LENGTH, exit_block, true);
531    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
532    // Insert point is still in wordBoundary block now.
533    // See if there are still more extenders.
534    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
535
536    iBuilder->SetInsertPoint(cont_block);
537    // Falls through to %at_literals.
538}
539
540
541void LZ4IndexDecoderKernel::generateAtLiterals(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb) {
542    iBuilder->CreateBr(bb);
543    iBuilder->SetInsertPoint(bb);
544
545    iBuilder->setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
546    iBuilder->setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
547    iBuilder->setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
548    iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
549
550    // No boundary detection here as we do not advance the cursor.
551    // Control flow will be redirected to %skipping_bytes later.
552}
553
554
555void LZ4IndexDecoderKernel::generateAtFirstOffset(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
556    iBuilder->SetInsertPoint(bb);
557    printRTDebugMsg("reading first offset");
558
559    Value * byte = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
560    // Use tempLength to store partial offset.
561    iBuilder->CreateStore(byte, sTempLength);
562    incStackVar(iBuilder, sOffset);
563    iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
564
565    generateBoundaryDetection(iBuilder, State::AT_FIRST_OFFSET, exit_block);
566    // Falls through to %at_second_offset.
567}
568
569
570void LZ4IndexDecoderKernel::generateAtSecondOffset(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
571    iBuilder->CreateBr(bb);
572    iBuilder->SetInsertPoint(bb);
573    printRTDebugMsg("reading second offset");
574
575    Value * byte1 = iBuilder->CreateLoad(sTempLength);
576    Value * byte2 = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
577    Value * offset = iBuilder->CreateAdd(
578            iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
579            byte1
580            );
581    iBuilder->setScalarField("MatchOffset", offset);
582    incStackVar(iBuilder, sOffset);
583    // Prepare extender word and tempLength for extending.
584    loadCurrentExtender(iBuilder);
585    setExtenderUntilOffset(iBuilder);
586    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), sTempLength);
587    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
588
589    generateBoundaryDetection(iBuilder, State::AT_SECOND_OFFSET, exit_block);
590    // Falls through to %extending_match_length.
591}
592
593
594void LZ4IndexDecoderKernel::generateExtendingMatchLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
595    iBuilder->CreateBr(bb);
596    iBuilder->SetInsertPoint(bb);
597    printRTDebugMsg("extending match length");
598    printGlobalPos();
599    printRTDebugInt("rawbyte", loadRawByte(iBuilder));
600    printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
601
602    Value * wordOffset = getWordOffset(iBuilder);
603    Value * blockOffset = getWordStartOffset(iBuilder);
604    Value * matchLen = iBuilder->CreateLoad(sTempLength);
605    Value * matchExtEnd = iBuilder->CreateTrunc(
606        iBuilder->CreateCountForwardZeroes(iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
607        iBuilder->getInt32Ty()
608        );
609    printRTDebugInt("wordoffset", wordOffset);
610    printRTDebugInt("matchExtEnd", matchExtEnd);
611    // number of extender = matchExtEnd - wordOffset
612    Value * numExtenders = iBuilder->CreateSub(matchExtEnd, wordOffset);
613    Value * matchExtReachBoundary = 
614            iBuilder->CreateICmpEQ(matchExtEnd, iBuilder->getInt32(wordWidth));
615    // There are matchExtEnd forward zeroes, we load bytes[matchExtEnd]
616    // which is the first non-extender.  If matchExtEnd == 64, we force the
617    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
618    Value * loadOffset = iBuilder->CreateSelect(matchExtReachBoundary,
619            ConstantInt::getNullValue(matchExtEnd->getType()),
620            matchExtEnd);
621    Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
622            iBuilder->getInt8(0),
623            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
624    Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
625    matchLen = iBuilder->CreateSelect(matchLenExted,
626            iBuilder->CreateAdd(
627                matchLen,
628                iBuilder->CreateAdd(
629                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
630                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
631                    )
632                ),      // matchLen + numExtenders * 255
633            matchLen);
634    wordOffset = iBuilder->CreateSelect(matchLenExted,
635            matchExtEnd,
636            wordOffset);
637    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
638    wordOffset = iBuilder->CreateSelect(
639            iBuilder->CreateAnd(matchLenExted, iBuilder->CreateNot(matchExtReachBoundary)),
640            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
641            wordOffset
642            );
643    iBuilder->CreateStore(matchLen, sTempLength);
644    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
645
646    Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
647    BasicBlock * output_then = iBuilder->CreateBasicBlock("output_then");
648    BasicBlock * output_cont = iBuilder->CreateBasicBlock("output_cont");
649    iBuilder->CreateLikelyCondBr(
650            iBuilder->CreateNot(unfinished),
651            output_then, output_cont
652            );
653    iBuilder->SetInsertPoint(output_then);
654    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
655    matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
656    iBuilder->setScalarField("MatchLength", matchLen);
657    generateProduceOutput(iBuilder);
658    iBuilder->CreateBr(output_cont);
659
660    iBuilder->SetInsertPoint(output_cont);
661    generateBoundaryDetection(iBuilder, State::EXTENDING_MATCH_LENGTH, exit_block, true);
662    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
663    // Insert point is still in wordBoundary block now.
664    // See if there are still more extenders.
665    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
666
667    iBuilder->SetInsertPoint(cont_block);
668}
669
670
671void LZ4IndexDecoderKernel::generateAtBlockChecksum(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * skippingBytes) {
672    // No branch here as we have made a conditional branch outside.
673    iBuilder->SetInsertPoint(bb);
674    printRTDebugMsg("processing block checksum");
675
676    // Produce the partial output (fill matchIndexes with 0).
677    iBuilder->setScalarField("MatchOffset", iBuilder->getInt32(0));
678    iBuilder->setScalarField("MatchLength", iBuilder->getInt32(0));
679    generateProduceOutput(iBuilder);
680
681    BasicBlock * hasChecksum_then = iBuilder->CreateBasicBlock("has_checksum_then");
682    BasicBlock * hasChecksum_cont = iBuilder->CreateBasicBlock("has_checksum_cont");
683
684    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
685    iBuilder->CreateCondBr(iBuilder->getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
686
687    iBuilder->SetInsertPoint(hasChecksum_then);
688    iBuilder->setScalarField("BytesToSkip", iBuilder->getInt32(4));
689    iBuilder->CreateBr(skippingBytes);
690    // Boundary detection will be done in skipping_bytes.
691
692    iBuilder->SetInsertPoint(hasChecksum_cont);
693    // No checksum, offset not advanced.  Falls through to the next block (block_size).
694}
695
696LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
697: BlockOrientedKernel("lz4IndexDecoder",
698    // Inputs
699    {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
700     Binding{iBuilder->getStreamSetTy(1, 1), "extenders"}},
701    // Outputs: literal start, literal length, match offset, match length
702    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
703     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes", FixedRatio(1, 1, "literalIndexes")}},
704    // Arguments
705    {Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"}},
706    {},
707    // Internal states:
708    {Binding{iBuilder->getInt32Ty(), "BlockNo"},
709     Binding{iBuilder->getInt8Ty(), "State"},
710     Binding{iBuilder->getInt32Ty(), "LZ4BlockStart"},
711     Binding{iBuilder->getInt32Ty(), "LZ4BlockEnd"},
712     Binding{iBuilder->getInt32Ty(), "BytesToSkip"},
713     Binding{iBuilder->getInt32Ty(), "TempLength"},
714     Binding{iBuilder->getInt32Ty(), "TempCount"},
715     Binding{iBuilder->getInt32Ty(), "LiteralStart"},
716     Binding{iBuilder->getInt32Ty(), "LiteralLength"},
717     Binding{iBuilder->getInt32Ty(), "MatchOffset"},
718     Binding{iBuilder->getInt32Ty(), "MatchLength"}})
719, wordWidth{iBuilder->getSizeTy()->getBitWidth()} {
720    setNoTerminateAttribute(true);
721}
Note: See TracBrowser for help on using the repository browser.