source: icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp @ 5422

Last change on this file since 5422 was 5422, checked in by cameron, 2 years ago

lz4d - LZ4 decompressor - initial check-in

File size: 32.6 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <llvm/IR/Constants.h>
8#include <llvm/IR/Function.h>
9#include <llvm/IR/Intrinsics.h>
10#include <llvm/IR/Instructions.h>
11#include <llvm/IR/Module.h>
12#include <IR_Gen/idisa_builder.h>
13
14#include <string>
15
16#include "lz4_index_decoder.h"
17 
18using namespace llvm;
19using namespace kernel;
20
21#ifndef NDEBUG
22#define DEBUG_RT_PRINT 1
23#else
24#define DEBUG_RT_PRINT 0
25#endif
26
27#define printRTDebugMsg(MSG) \
28    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintMsgToStderr(MSG); } while (0)
29
30#define printRTDebugInt(NAME, X) \
31    do { if (DEBUG_RT_PRINT) iBuilder->CallPrintIntToStderr(NAME, X); } while (0)
32
33#define printGlobalPos() \
34    printRTDebugInt("GlobalPos", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)))
35
36namespace {
37
38Value * countForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
39    Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
40            Intrinsic::cttz, iBuilder->getSizeTy());
41    // The last argument is is_zero_undef.  We'd like it to be false (defined)
42    // so that cttz(0) == width.
43    return iBuilder->CreateCall(cttzFunc, {bits, iBuilder->getFalse()});
44}
45
46#if 0
47Value * countReverseZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
48    Value * ctlzFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
49            Intrinsic::ctlz, iBuilder->getSizeTy());
50    // The last argument is is_zero_undef.  We'd like it to be false (defined)
51    // so that ctlz(0) == width.
52    return iBuilder->CreateCall(ctlzFunc, {bits, iBuilder->getFalse()});
53}
54#endif
55
56Value * generateBitswap(IDISA::IDISA_Builder * iBuilder, Value * v) {
57    Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
58            Intrinsic::bswap, v->getType());
59    return iBuilder->CreateCall(bswapFunc, {v});
60}
61
62Value * selectMin(IDISA::IDISA_Builder * iBuilder, Value * a, Value * b) {
63    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
64}
65
66#if 0
67Value * selectMax(IDISA::IDISA_Builder * iBuilder, Value * a, Value * b) {
68    return iBuilder->CreateSelect(iBuilder->CreateICmpUGT(a, b), a, b);
69}
70#endif
71
72Value * createStackVar(IDISA::IDISA_Builder * iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
73    Value * var = iBuilder->CreateAlloca(type, nullptr, name);
74    if (initializer) {
75        iBuilder->CreateStore(initializer, var);
76    } else {
77        iBuilder->CreateStore(ConstantInt::get(type, 0), var);
78    }
79    return var;
80}
81
82void incStackVar(IDISA::IDISA_Builder * iBuilder, Value * svar, Value * increment = nullptr) {
83    Value * value = iBuilder->CreateLoad(svar);
84    if (increment) {
85        value = iBuilder->CreateAdd(value, increment);
86    } else {
87        value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
88    }
89    iBuilder->CreateStore(value, svar);
90}
91
92Value * getOutputPtr(IDISA::IDISA_Builder * iBuilder, Value * blockStartPtr, Value * offset) {
93    return iBuilder->CreateGEP(
94            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
95            offset
96            );
97}
98
99}       // anonymouse namespace
100
101
102/**
103 * In order to allow mem2reg to promote the stack variables, alloca's have
104 * to be in the entry block of a function. Thus, we need to disable indirect
105 * branching on this kernel to have a standalone DoMethod function.
106 */
107bool LZ4IndexDecoderKernel::useIndirectBr() const {
108    return false;
109}
110
111
112/**
113 * Get the offset within the current word.
114 */
115Value * LZ4IndexDecoderKernel::getWordOffset() {
116    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
117    return iBuilder->CreateAnd(
118            iBuilder->CreateLoad(sOffset),
119            wordWidthMask
120            );
121}
122
123
124/**
125 * Get the offset of the start of the current word.
126 */
127Value * LZ4IndexDecoderKernel::getWordStartOffset() {
128    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
129    return iBuilder->CreateAnd(
130            iBuilder->CreateLoad(sOffset),
131            iBuilder->CreateNot(wordWidthMask)
132            );
133}
134
135
136/**
137 * Load a raw byte from byteStream.
138 * If offset is not provided, load the current byte by default.
139 */
140Value * LZ4IndexDecoderKernel::loadRawByte(Value * offset = nullptr) {
141    Value * blockStartPtr = iBuilder->CreatePointerCast(
142            getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
143            iBuilder->getInt8PtrTy()
144            );
145    if (offset == nullptr) {
146        offset = iBuilder->CreateLoad(sOffset);
147    }
148    Value * ptr = iBuilder->CreateGEP(blockStartPtr, offset);
149    return iBuilder->CreateLoad(ptr);
150}
151
152
153/**
154 * Set the current extender word up until before the offset position.
155 * extender = .......  (little-endian, LSB on the right)
156 * offset   =    ^
157 * cleared  = ....111
158 */
159void LZ4IndexDecoderKernel::setExtenderUntilOffset() {
160    // Little-endian, offset counts from LSB
161    // extender = extender ^ ~((1 << offset) -1)
162    Value * extender = iBuilder->CreateLoad(sExtender);
163    Value * wordOffset = iBuilder->CreateZExt(
164            getWordOffset(),
165            iBuilder->getSizeTy()
166            );
167    Value * one = iBuilder->getSize(1);
168    Value * mask = iBuilder->CreateSub(
169            iBuilder->CreateShl(one, wordOffset),
170            one);
171    extender = iBuilder->CreateOr(extender, mask);
172    iBuilder->CreateStore(extender, sExtender);
173}
174
175
176/**
177 * Load the extender word at the current offset.
178 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
179 */
180void LZ4IndexDecoderKernel::loadCurrentExtender() {
181    iBuilder->CreateStore(
182            iBuilder->CreateExtractElement(extenders,
183                iBuilder->CreateLShr(
184                    iBuilder->CreateLoad(sOffset),
185                    iBuilder->getInt32(std::log2(wordWidth))
186                    )
187                ),
188            sExtender);
189}
190
191
192void LZ4IndexDecoderKernel::generateProduceOutput() {
193    Value * producedItem = getProducedItemCount("literalIndexes");
194
195#ifndef NDEBUG
196    iBuilder->CallPrintInt("ProducedItem", producedItem);
197    // LiteralStart is adjusted to be relative to the block start, so that
198    // the output can be compared against that of the reference implementation.
199    //iBuilder->CallPrintInt("LiteralStart", getScalarField("LiteralStart"));
200    iBuilder->CallPrintInt("LiteralStart", iBuilder->CreateSub(
201                getScalarField("LiteralStart"), getScalarField("LZ4BlockStart")));
202    iBuilder->CallPrintInt("LiteralLength", getScalarField("LiteralLength"));
203    iBuilder->CallPrintInt("MatchOffset", getScalarField("MatchOffset"));
204    iBuilder->CallPrintInt("MatchLength", getScalarField("MatchLength"));
205#endif
206    printRTDebugMsg("--------------");
207
208    Value * outputOffset = iBuilder->CreateAnd(
209            iBuilder->CreateTrunc(producedItem, iBuilder->getInt32Ty()),
210            iBuilder->getInt32(iBuilder->getBitBlockWidth() - 1)
211            );  // producedItem % blockWidth (as blockWidth is always a power of 2)
212    Value * literalStartPtr = getOutputPtr(iBuilder,
213            getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
214    Value * literalLengthPtr = getOutputPtr(iBuilder,
215            getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
216    Value * matchOffsetPtr = getOutputPtr(iBuilder,
217            getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
218    Value * matchLengthPtr = getOutputPtr(iBuilder,
219            getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
220    iBuilder->CreateStore(getScalarField("LiteralStart"), literalStartPtr);
221    iBuilder->CreateStore(getScalarField("LiteralLength"), literalLengthPtr);
222    iBuilder->CreateStore(getScalarField("MatchOffset"), matchOffsetPtr);
223    iBuilder->CreateStore(getScalarField("MatchLength"), matchLengthPtr);
224    setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
225    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
226}
227
228
229void LZ4IndexDecoderKernel::generateDoBlockMethod() {
230    BasicBlock * entry_block = iBuilder->GetInsertBlock();
231    BasicBlock * exit_block = CreateBasicBlock("exit");
232
233    // %entry
234    iBuilder->SetInsertPoint(entry_block);
235    printRTDebugMsg("entry");
236    // Global positions in the byte stream.
237    Value * blockNo = getScalarField("BlockNo");
238    blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
239    extenders = iBuilder->CreateBitCast(
240            loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
241            VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / wordWidth),
242            "extenders");
243    // Create a series of stack variables which will be promoted by mem2reg.
244    sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
245    // tempLength has different meanings in different states.
246    sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", getScalarField("TempLength"));
247    sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", getScalarField("TempCount"));
248    sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", getScalarField("State"));
249    sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
250            iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
251
252    BasicBlock * skippingBytes = CreateBasicBlock("skipping_bytes");
253    BasicBlock * dispatch = CreateBasicBlock("dispatch");
254
255    iBuilder->CreateCondBr(
256            iBuilder->CreateICmpUGT(getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
257            skippingBytes, dispatch
258            );
259
260    // %skipping_bytes
261    generateSkippingBytes(skippingBytes, exit_block);
262    // Insert point is at the end of skippingBytes.
263    iBuilder->CreateBr(dispatch);
264
265    // %dispatch
266    // Indirect branching will be added to %dispatch at last.
267
268    // %at_block_checksum
269    BasicBlock * atBlockChecksum = CreateBasicBlock("at_block_checksum");
270    generateAtBlockChecksum(atBlockChecksum, skippingBytes);
271 
272    // %at_block_size
273    BasicBlock * atBlockSize = CreateBasicBlock("at_block_size");
274    generateAtBlockSize(atBlockSize, skippingBytes, exit_block);
275
276    // %at_token
277    BasicBlock * atToken = CreateBasicBlock("at_token");
278    generateAtToken(atToken, exit_block);
279
280    // %extending_literal_length
281    BasicBlock * extendingLiteralLen = CreateBasicBlock("extending_literal_length");
282    generateExtendingLiteralLen(extendingLiteralLen, exit_block);
283
284    // %at_literals
285    BasicBlock * atLiterals = CreateBasicBlock("at_literals");
286    generateAtLiterals(atLiterals);
287    iBuilder->CreateBr(skippingBytes);
288
289    // %at_first_offset
290    // Note that the last sequence is incomplete and ends with literals.
291    // If the whole LZ4 block is done, process the (optional) checksum.
292    // Otherwise, go around to process the next sequence.
293    BasicBlock * atOffset1 = CreateBasicBlock("at_first_offset");
294    iBuilder->SetInsertPoint(atOffset1);
295    Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
296    BasicBlock * blockEnd_else = CreateBasicBlock("block_end_else");
297    // Conditional branch inserted at the end of the last block.
298    iBuilder->CreateUnlikelyCondBr(
299            iBuilder->CreateICmpEQ(nowGlobalPos, getScalarField("LZ4BlockEnd")),
300            atBlockChecksum, blockEnd_else
301            );
302    generateAtFirstOffset(blockEnd_else, exit_block);
303
304    // %at_second_offset
305    BasicBlock * atOffset2 = CreateBasicBlock("at_second_offset");
306    generateAtSecondOffset(atOffset2, exit_block);
307
308    // %extending_match_length
309    BasicBlock * extendingMatchLen = CreateBasicBlock("extending_match_length");
310    generateExtendingMatchLen(extendingMatchLen, exit_block);
311    iBuilder->CreateBr(atToken);
312
313    // Indirect branching.
314    iBuilder->SetInsertPoint(dispatch);
315    printRTDebugMsg("dispatch");
316    // The order must comply with enum State.
317    Constant * labels = ConstantVector::get(
318            {BlockAddress::get(atBlockSize), BlockAddress::get(atToken), BlockAddress::get(extendingLiteralLen), BlockAddress::get(atLiterals),
319             BlockAddress::get(atOffset1), BlockAddress::get(atOffset2), BlockAddress::get(extendingMatchLen), BlockAddress::get(atBlockChecksum)}
320            );
321    Value * target = iBuilder->CreateExtractElement(labels, iBuilder->CreateLoad(sState));
322    IndirectBrInst * indirectBr = iBuilder->CreateIndirectBr(target);
323    indirectBr->addDestination(atBlockSize);
324    indirectBr->addDestination(atToken);
325    indirectBr->addDestination(extendingLiteralLen);
326    indirectBr->addDestination(atLiterals);
327    indirectBr->addDestination(atOffset1);
328    indirectBr->addDestination(atOffset2);
329    indirectBr->addDestination(extendingMatchLen);
330    indirectBr->addDestination(atBlockChecksum);
331
332    // %exit
333    iBuilder->SetInsertPoint(exit_block);
334    printRTDebugMsg("exit");
335    setScalarField("State", iBuilder->CreateLoad(sState));
336    setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
337    setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
338    setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
339    // When the kernel builder uses indirectbr, doBlock is not a separate function.
340    // Hence, we branch to a new basic block and fall through instead of returning.
341    BasicBlock * end_block = CreateBasicBlock("end_of_block");
342    iBuilder->CreateBr(end_block);
343    iBuilder->SetInsertPoint(end_block);
344}
345
346
347void LZ4IndexDecoderKernel::generateBoundaryDetection(State state, BasicBlock * exit_block, bool updateExtenderWord=false) {
348    if (updateExtenderWord) {
349        BasicBlock * wordBoundary_then = CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
350        BasicBlock * blockBoundary_else = CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
351        BasicBlock * wordBoundary_cont = CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
352        iBuilder->CreateUnlikelyCondBr(
353                iBuilder->CreateICmpEQ(getWordOffset(), iBuilder->getInt32(0)),
354                wordBoundary_then, wordBoundary_cont
355                );
356
357        iBuilder->SetInsertPoint(wordBoundary_then);
358        iBuilder->CreateUnlikelyCondBr(
359                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
360                exit_block, blockBoundary_else
361                );
362
363        // Reaching word boundary but not block boundary.  Update the extender word as requested.
364        iBuilder->SetInsertPoint(blockBoundary_else);
365        loadCurrentExtender();
366        iBuilder->CreateBr(wordBoundary_cont);
367
368        // Leave the insert point at the end and return.
369        iBuilder->SetInsertPoint(wordBoundary_cont);
370    } else {
371        BasicBlock * blockBoundary_cont = CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
372        iBuilder->CreateUnlikelyCondBr(
373                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
374                exit_block, blockBoundary_cont
375                );
376        // Leave the insert point at the end and return.
377        iBuilder->SetInsertPoint(blockBoundary_cont);
378    }
379}
380
381
382void LZ4IndexDecoderKernel::generateSkippingBytes(BasicBlock * bb, BasicBlock * exit_block) {
383    iBuilder->SetInsertPoint(bb);
384    printRTDebugMsg("skipping bytes");
385
386    Value * remainingBytesInBlock = iBuilder->CreateSub(
387            iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
388            );
389    Value * remainingBytesToSkip = getScalarField("BytesToSkip");
390    Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
391    remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
392    incStackVar(iBuilder, sOffset, advanceDist);
393    setScalarField("BytesToSkip", remainingBytesToSkip);
394
395    generateBoundaryDetection(State::SKIPPING_BYTES, exit_block);
396    // Falls through.
397}
398
399
400void LZ4IndexDecoderKernel::generateAtBlockSize(BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
401    iBuilder->CreateBr(bb);
402    iBuilder->SetInsertPoint(bb);
403    printRTDebugMsg("scanning block size");
404    printGlobalPos();
405
406    // Use tempLength to hold the block size temporarily.
407    // Note that it is initially stored as big-endian (for the ease of reading) and will be "swapped" later.
408    // Use tempCount as the loop counter (0..3).
409    // Both variables are initialized from kernel states at %entry.
410
411    // A do-while loop.
412    BasicBlock * loopBody = CreateBasicBlock("blocksize_loop_body");
413    BasicBlock * loopExit = CreateBasicBlock("blocksize_loop_exit");
414    iBuilder->CreateBr(loopBody);
415
416    iBuilder->SetInsertPoint(loopBody);
417    Value * byte = loadRawByte();
418    Value * newTempLength = iBuilder->CreateAdd(
419            iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
420            iBuilder->CreateZExt(byte, iBuilder->getInt32Ty())
421            );
422    iBuilder->CreateStore(newTempLength, sTempLength);
423    incStackVar(iBuilder, sTempCount);
424    incStackVar(iBuilder, sOffset);
425    // Stop when we read all four bytes or reach the end of the block.
426    iBuilder->CreateCondBr(
427            iBuilder->CreateOr(
428                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
429                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth()))
430                ),
431            loopExit, loopBody
432            );
433
434    iBuilder->SetInsertPoint(loopExit);
435    BasicBlock * blockSizeCompleted_then = CreateBasicBlock("blocksize_completed_then");
436    BasicBlock * blockSizeCompleted_cont = CreateBasicBlock("blocksize_completed_cont");
437    iBuilder->CreateLikelyCondBr(
438            iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
439            blockSizeCompleted_then, blockSizeCompleted_cont
440            );
441
442    // All four bytes of the block size are read in.
443    iBuilder->SetInsertPoint(blockSizeCompleted_then);
444    // Remember to swap the block size back to little-endian.
445    Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
446    Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
447    setScalarField("LZ4BlockStart", currentPos);
448    setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
449    printRTDebugInt("blockSize", blockSize);
450
451    BasicBlock * uncompressedBlock_then = CreateBasicBlock("uncompressed_block_then");
452    BasicBlock * uncompressedBlock_else = CreateBasicBlock("uncompressed_block_cont");
453    iBuilder->CreateUnlikelyCondBr(
454            iBuilder->CreateTrunc(
455                iBuilder->CreateLShr(blockSize, iBuilder->getInt32(31)),
456                iBuilder->getInt1Ty()
457                ),
458            uncompressedBlock_then,
459            uncompressedBlock_else
460            );
461
462    iBuilder->SetInsertPoint(uncompressedBlock_then);
463    Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
464    setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
465    setScalarField("BytesToSkip", realBlockSize);
466    setScalarField("LiteralStart", currentPos);
467    setScalarField("LiteralLength", realBlockSize);
468    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
469    // because %atBlockChecksum will do so as the last sequence.
470    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_CHECKSUM), sState);
471    iBuilder->CreateBr(skippingBytes);
472
473    iBuilder->SetInsertPoint(uncompressedBlock_else);
474    // Reset these temporary values for later use.
475    iBuilder->CreateStore(iBuilder->getInt32(0), sTempLength);
476    iBuilder->CreateStore(iBuilder->getInt32(0), sTempCount);
477    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
478    // A block size of 0 is the end mark of the frame. Exit.
479    iBuilder->CreateUnlikelyCondBr(
480            iBuilder->CreateICmpEQ(blockSize, ConstantInt::getNullValue(blockSize->getType())),
481            exit_block,
482            blockSizeCompleted_cont
483            );
484
485    // We could be at the boundary no matter the block size is completed or not.
486    iBuilder->SetInsertPoint(blockSizeCompleted_cont);
487    generateBoundaryDetection(State::AT_BLOCK_SIZE, exit_block);
488    // Falls through to %at_token.
489}
490
491
492void LZ4IndexDecoderKernel::generateAtToken(BasicBlock * bb, BasicBlock * exit_block) {
493    iBuilder->CreateBr(bb);
494    iBuilder->SetInsertPoint(bb);
495    printRTDebugMsg("reading token");
496
497    Value * token = loadRawByte();
498    Value * literalLen = iBuilder->CreateZExt(
499        iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
500        iBuilder->getInt32Ty()
501        );
502    Value * matchLen = iBuilder->CreateZExt(
503        iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)),
504        iBuilder->getInt32Ty()
505        );
506    incStackVar(iBuilder, sOffset);
507    // Prepare extender word for scanning.
508    loadCurrentExtender();
509    setExtenderUntilOffset();
510    // Store the (partial) match length to be extended later.
511    setScalarField("MatchLength", matchLen);
512    // Use tempLength to accumulate extended lengths (until at_literals).
513    iBuilder->CreateStore(literalLen, sTempLength);
514    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
515
516    generateBoundaryDetection(State::AT_TOKEN, exit_block);
517    // Falls through to %extending_literal_length.
518}
519
520
521void LZ4IndexDecoderKernel::generateExtendingLiteralLen(BasicBlock * bb, BasicBlock * exit_block) {
522    iBuilder->CreateBr(bb);
523    iBuilder->SetInsertPoint(bb);
524    printRTDebugMsg("extending literal len");
525
526    Value * wordOffset = getWordOffset();
527    Value * blockOffset = getWordStartOffset();
528    Value * literalLen = iBuilder->CreateLoad(sTempLength);
529    Value * literalExtEnd = iBuilder->CreateTrunc(
530        countForwardZeroes(iBuilder, iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
531        iBuilder->getInt32Ty()
532        );
533    printRTDebugInt("wordOffset", wordOffset);
534    printRTDebugInt("literalExtEnd", literalExtEnd);
535    // number of extender = literalExtEnd - wordOffset
536    Value * numExtenders = iBuilder->CreateSub(literalExtEnd, wordOffset);
537    Value * literalExtReachBoundary =
538            iBuilder->CreateICmpEQ(literalExtEnd, iBuilder->getInt32(wordWidth));
539    // There are literalExtEnd forward zeroes, we load bytes[literalExtEnd]
540    // which is the first non-extender.  If literalExtEnd == 64, we force the
541    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
542    Value * loadOffset = iBuilder->CreateSelect(literalExtReachBoundary,
543            ConstantInt::getNullValue(literalExtEnd->getType()),
544            literalExtEnd);
545    Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
546            iBuilder->getInt8(0),
547            loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
548    Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
549    literalLen = iBuilder->CreateSelect(literalLenExted,
550            iBuilder->CreateAdd(
551                literalLen,
552                iBuilder->CreateAdd(
553                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
554                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
555                    )
556                ),      // literalLen + numExtenders * 255
557            literalLen);
558    wordOffset = iBuilder->CreateSelect(literalLenExted,
559            literalExtEnd,
560            wordOffset);
561    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
562    wordOffset = iBuilder->CreateSelect(
563            iBuilder->CreateAnd(literalLenExted, iBuilder->CreateNot(literalExtReachBoundary)),
564            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
565            wordOffset
566            );
567    iBuilder->CreateStore(literalLen, sTempLength);
568    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
569    Value * unfinished = iBuilder->CreateAnd(literalExtReachBoundary, literalLenExted);
570    Value * newState = iBuilder->CreateSelect(unfinished,
571            iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH),
572            iBuilder->getInt8(State::AT_LITERALS));
573    iBuilder->CreateStore(newState, sState);
574
575    generateBoundaryDetection(State::EXTENDING_LITERAL_LENGTH, exit_block, true);
576    BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
577    // Insert point is still in wordBoundary block now.
578    // See if there are still more extenders.
579    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
580
581    iBuilder->SetInsertPoint(cont_block);
582    // Falls through to %at_literals.
583}
584
585
586void LZ4IndexDecoderKernel::generateAtLiterals(BasicBlock * bb) {
587    iBuilder->CreateBr(bb);
588    iBuilder->SetInsertPoint(bb);
589
590    setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
591    setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
592    setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
593    iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
594
595    // No boundary detection here as we do not advance the cursor.
596    // Control flow will be redirected to %skipping_bytes later.
597}
598
599
600void LZ4IndexDecoderKernel::generateAtFirstOffset(BasicBlock * bb, BasicBlock * exit_block) {
601    iBuilder->SetInsertPoint(bb);
602    printRTDebugMsg("reading first offset");
603
604    Value * byte = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
605    // Use tempLength to store partial offset.
606    iBuilder->CreateStore(byte, sTempLength);
607    incStackVar(iBuilder, sOffset);
608    iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
609
610    generateBoundaryDetection(State::AT_FIRST_OFFSET, exit_block);
611    // Falls through to %at_second_offset.
612}
613
614
615void LZ4IndexDecoderKernel::generateAtSecondOffset(BasicBlock * bb, BasicBlock * exit_block) {
616    iBuilder->CreateBr(bb);
617    iBuilder->SetInsertPoint(bb);
618    printRTDebugMsg("reading second offset");
619
620    Value * byte1 = iBuilder->CreateLoad(sTempLength);
621    Value * byte2 = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
622    Value * offset = iBuilder->CreateAdd(
623            iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
624            byte1
625            );
626    setScalarField("MatchOffset", offset);
627    incStackVar(iBuilder, sOffset);
628    // Prepare extender word and tempLength for extending.
629    loadCurrentExtender();
630    setExtenderUntilOffset();
631    iBuilder->CreateStore(getScalarField("MatchLength"), sTempLength);
632    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
633
634    generateBoundaryDetection(State::AT_SECOND_OFFSET, exit_block);
635    // Falls through to %extending_match_length.
636}
637
638
639void LZ4IndexDecoderKernel::generateExtendingMatchLen(BasicBlock * bb, BasicBlock * exit_block) {
640    iBuilder->CreateBr(bb);
641    iBuilder->SetInsertPoint(bb);
642    printRTDebugMsg("extending match length");
643    printGlobalPos();
644    printRTDebugInt("rawbyte", loadRawByte());
645    printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
646
647    Value * wordOffset = getWordOffset();
648    Value * blockOffset = getWordStartOffset();
649    Value * matchLen = iBuilder->CreateLoad(sTempLength);
650    Value * matchExtEnd = iBuilder->CreateTrunc(
651        countForwardZeroes(iBuilder, iBuilder->CreateNot(iBuilder->CreateLoad(sExtender))),
652        iBuilder->getInt32Ty()
653        );
654    printRTDebugInt("wordoffset", wordOffset);
655    printRTDebugInt("matchExtEnd", matchExtEnd);
656    // number of extender = matchExtEnd - wordOffset
657    Value * numExtenders = iBuilder->CreateSub(matchExtEnd, wordOffset);
658    Value * matchExtReachBoundary = 
659            iBuilder->CreateICmpEQ(matchExtEnd, iBuilder->getInt32(wordWidth));
660    // There are matchExtEnd forward zeroes, we load bytes[matchExtEnd]
661    // which is the first non-extender.  If matchExtEnd == 64, we force the
662    // load index to be 0 to avoid out-of-bound access, and lastByte will be 0.
663    Value * loadOffset = iBuilder->CreateSelect(matchExtReachBoundary,
664            ConstantInt::getNullValue(matchExtEnd->getType()),
665            matchExtEnd);
666    Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
667            iBuilder->getInt8(0),
668            loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
669    Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
670    matchLen = iBuilder->CreateSelect(matchLenExted,
671            iBuilder->CreateAdd(
672                matchLen,
673                iBuilder->CreateAdd(
674                    iBuilder->CreateMul(numExtenders, iBuilder->getInt32(0xff)),
675                    iBuilder->CreateZExt(lastByte, iBuilder->getInt32Ty())
676                    )
677                ),      // matchLen + numExtenders * 255
678            matchLen);
679    wordOffset = iBuilder->CreateSelect(matchLenExted,
680            matchExtEnd,
681            wordOffset);
682    // If lastByte is truly the last length byte, we need to advance the cursor by 1.
683    wordOffset = iBuilder->CreateSelect(
684            iBuilder->CreateAnd(matchLenExted, iBuilder->CreateNot(matchExtReachBoundary)),
685            iBuilder->CreateAdd(wordOffset, iBuilder->getInt32(1)),
686            wordOffset
687            );
688    iBuilder->CreateStore(matchLen, sTempLength);
689    iBuilder->CreateStore(iBuilder->CreateAdd(blockOffset, wordOffset), sOffset);
690
691    Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
692    BasicBlock * output_then = CreateBasicBlock("output_then");
693    BasicBlock * output_cont = CreateBasicBlock("output_cont");
694    iBuilder->CreateLikelyCondBr(
695            iBuilder->CreateNot(unfinished),
696            output_then, output_cont
697            );
698    iBuilder->SetInsertPoint(output_then);
699    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
700    matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
701    setScalarField("MatchLength", matchLen);
702    generateProduceOutput();
703    iBuilder->CreateBr(output_cont);
704
705    iBuilder->SetInsertPoint(output_cont);
706    generateBoundaryDetection(State::EXTENDING_MATCH_LENGTH, exit_block, true);
707    BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
708    // Insert point is still in wordBoundary block now.
709    // See if there are still more extenders.
710    iBuilder->CreateUnlikelyCondBr(unfinished, bb, cont_block);
711
712    iBuilder->SetInsertPoint(cont_block);
713}
714
715
716void LZ4IndexDecoderKernel::generateAtBlockChecksum(BasicBlock * bb, BasicBlock * skippingBytes) {
717    // No branch here as we have made a conditional branch outside.
718    iBuilder->SetInsertPoint(bb);
719    printRTDebugMsg("processing block checksum");
720
721    // Produce the partial output (fill matchIndexes with 0).
722    setScalarField("MatchOffset", iBuilder->getInt32(0));
723    setScalarField("MatchLength", iBuilder->getInt32(0));
724    generateProduceOutput();
725
726    BasicBlock * hasChecksum_then = CreateBasicBlock("has_checksum_then");
727    BasicBlock * hasChecksum_cont = CreateBasicBlock("has_checksum_cont");
728
729    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
730    iBuilder->CreateCondBr(getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
731
732    iBuilder->SetInsertPoint(hasChecksum_then);
733    setScalarField("BytesToSkip", iBuilder->getInt32(4));
734    iBuilder->CreateBr(skippingBytes);
735    // Boundary detection will be done in skipping_bytes.
736
737    iBuilder->SetInsertPoint(hasChecksum_cont);
738    // No checksum, offset not advanced.  Falls through to the next block (block_size).
739}
740
741LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(IDISA::IDISA_Builder * iBuilder)
742: BlockOrientedKernel(iBuilder, "lz4IndexDecoder",
743    // Inputs
744    {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
745     Binding{iBuilder->getStreamSetTy(1, 1), "extenders"}},
746    // Outputs: literal start, literal length, match offset, match length
747    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
748     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes", FixedRatio(1, 1, "literalIndexes")}},
749    // Arguments
750    {Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"}},
751    {},
752    // Internal states:
753    {Binding{iBuilder->getInt32Ty(), "BlockNo"},
754     Binding{iBuilder->getInt8Ty(), "State"},
755     Binding{iBuilder->getInt32Ty(), "LZ4BlockStart"},
756     Binding{iBuilder->getInt32Ty(), "LZ4BlockEnd"},
757     Binding{iBuilder->getInt32Ty(), "BytesToSkip"},
758     Binding{iBuilder->getInt32Ty(), "TempLength"},
759     Binding{iBuilder->getInt32Ty(), "TempCount"},
760     Binding{iBuilder->getInt32Ty(), "LiteralStart"},
761     Binding{iBuilder->getInt32Ty(), "LiteralLength"},
762     Binding{iBuilder->getInt32Ty(), "MatchOffset"},
763     Binding{iBuilder->getInt32Ty(), "MatchLength"}})
764, wordWidth{iBuilder->getSizeTy()->getBitWidth()} {
765    setNoTerminateAttribute(true);
766}
Note: See TracBrowser for help on using the repository browser.