source: icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

Last change on this file was 6261, checked in by nmedfort, 7 months ago

Work on OptimizationBranch?; revisited pipeline termination

File size: 12.1 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "scanmatchgen.h"
7#include <llvm/IR/Intrinsics.h>
8#include <llvm/IR/Module.h>
9#include <kernels/kernel_builder.h>
10#include <IR_Gen/FunctionTypeBuilder.h>
11#include <llvm/Support/raw_ostream.h>
12#include <grep/grep_engine.h>
13
14using namespace llvm;
15
16inline static unsigned floor_log2(const unsigned v) {
17    assert ("log2(0) is undefined!" && v != 0);
18    return 31 - __builtin_clz(v);
19}
20
21namespace kernel {
22
23void ScanMatchKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * const numOfStrides) {
24
25    Module * const m = b->getModule();
26
27    BasicBlock * const entryBlock = b->GetInsertBlock();
28    BasicBlock * const initialBlock = b->CreateBasicBlock("initialBlock");
29    BasicBlock * const scanWordIteration = b->CreateBasicBlock("ScanWordIteration");
30    BasicBlock * const matches_test_block = b->CreateBasicBlock("matches_test_block");
31    BasicBlock * const processMatchesEntry = b->CreateBasicBlock("process_matches_loop");
32    BasicBlock * const prior_breaks_block = b->CreateBasicBlock("prior_breaks_block");
33    BasicBlock * const loop_final_block = b->CreateBasicBlock("loop_final_block");
34    BasicBlock * const processMatchesExit = b->CreateBasicBlock("matches_done_block");
35    BasicBlock * const remaining_breaks_block = b->CreateBasicBlock("remaining_breaks_block");
36    BasicBlock * const return_block = b->CreateBasicBlock("return_block");
37    BasicBlock * const scanWordExit = b->CreateBasicBlock("ScanWordExit");
38    BasicBlock * const blocksExit = b->CreateBasicBlock("blocksExit");
39    BasicBlock * const callFinalizeScan = b->CreateBasicBlock("callFinalizeScan");
40    BasicBlock * const scanReturn = b->CreateBasicBlock("scanReturn");
41    IntegerType * const sizeTy = b->getSizeTy();
42    const unsigned fieldCount = b->getBitBlockWidth() / sizeTy->getBitWidth();
43    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
44    Constant * const ZERO = b->getSize(0);
45    Constant * const ONE = b->getSize(1);
46    Value * const blocksToDo = b->CreateMul(numOfStrides, b->getSize(mStride / b->getBitBlockWidth()));
47    Value * accumulator = b->getScalarField("accumulator_address");
48
49    b->CreateBr(initialBlock);
50
51    b->SetInsertPoint(initialBlock);
52    PHINode * const blockOffset = b->CreatePHI(sizeTy, 2);
53    blockOffset->addIncoming(ZERO, entryBlock);
54
55    Value * matches = b->loadInputStreamBlock("matchResult", ZERO, blockOffset);
56    matches = b->CreateBitCast(matches, scanwordVectorType);
57    Value * linebreaks = b->loadInputStreamBlock("lineBreak", ZERO, blockOffset);
58    linebreaks = b->CreateBitCast(linebreaks, scanwordVectorType);
59
60    Value * const blockNo = b->getScalarField("BlockNo");
61    Value * const scanwordPos = b->CreateShl(blockNo, floor_log2(b->getBitBlockWidth()));
62    Value * const consumed = b->getProcessedItemCount("InputStream");
63    Value * const consumedLines = b->getScalarField("LineNum");
64    Value * const avail = b->getAvailableItemCount("InputStream");
65    b->CreateBr(scanWordIteration);
66
67    b->SetInsertPoint(scanWordIteration);
68
69        // while (phiIndex < words per stride)
70        PHINode * const phiIndex = b->CreatePHI(sizeTy, 2, "index");
71        phiIndex->addIncoming(ZERO, initialBlock);
72        PHINode * const phiScanwordPos = b->CreatePHI(scanwordPos->getType(), 2, "pos");
73        phiScanwordPos->addIncoming(scanwordPos, initialBlock);
74        PHINode * const phiLineStart = b->CreatePHI(consumed->getType(), 2, "recordstart");
75        phiLineStart->addIncoming(consumed, initialBlock);
76        PHINode * const phiLineNum = b->CreatePHI(consumedLines->getType(), 2, "recordnum");
77        phiLineNum->addIncoming(consumedLines, initialBlock);
78
79        Value * const matchWord = b->CreateExtractElement(matches, phiIndex);
80        Value * const recordBreaks = b->CreateExtractElement(linebreaks, phiIndex);
81
82        // The match scanner works with a loop involving four variables:
83        // (a) the bit stream scanword of matches marking the ends of selected records,
84        // (b) the bit stream scanword of record_breaks marking the ends of all records,
85        // (c) the integer lastRecordNum indicating the number of records processed so far,
86        // (d) the index lastRecordStart indicating the file position of the last record.
87        // We set up a loop structure, in which a set of 4 phi nodes initialize these
88        // variables from either the input to the scanner or the computed values within
89        // the loop body.
90
91        b->CreateBr(matches_test_block);
92
93        // LOOP Test Block
94        b->SetInsertPoint(matches_test_block);
95        PHINode * const phiMatchWord = b->CreatePHI(sizeTy, 2, "matches");
96        PHINode * const phiRecordBreaks = b->CreatePHI(sizeTy, 2, "recordbreaks");
97        PHINode * const phiRecordStart = b->CreatePHI(sizeTy, 2, "recordstart");
98        PHINode * const phiRecordNum = b->CreatePHI(sizeTy, 2, "recordnum");
99        phiMatchWord->addIncoming(matchWord, scanWordIteration);
100        phiRecordBreaks->addIncoming(recordBreaks, scanWordIteration);
101        phiRecordStart->addIncoming(phiLineStart, scanWordIteration);
102        phiRecordNum->addIncoming(phiLineNum, scanWordIteration);
103        Value * const anyMatches = b->CreateICmpNE(phiMatchWord, ZERO);
104        b->CreateCondBr(anyMatches, processMatchesEntry, processMatchesExit);
105
106            // LOOP BODY
107            // The loop body is entered if we have more matches to process.
108            b->SetInsertPoint(processMatchesEntry);
109            Value * prior_breaks = b->CreateAnd(b->CreateMaskToLowestBitExclusive(phiMatchWord), phiRecordBreaks);
110            // Within the loop we have a conditional block that is executed if there are any prior record breaks.
111            Value * prior_breaks_cond = b->CreateICmpNE(prior_breaks, ZERO);
112            b->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
113
114                // PRIOR_BREAKS_BLOCK
115                // If there are prior breaks, we count them and compute the record start position.
116                b->SetInsertPoint(prior_breaks_block);
117                Value * matchedRecordNum = b->CreateAdd(b->CreatePopcount(prior_breaks), phiRecordNum);
118                Value * reverseDistance = b->CreateCountReverseZeroes(prior_breaks, true);
119                Value * width = ConstantInt::get(sizeTy, sizeTy->getBitWidth());
120                Value * priorRecordStart = b->CreateAdd(phiScanwordPos, b->CreateSub(width, reverseDistance));
121                b->CreateBr(loop_final_block);
122
123            // LOOP FINAL BLOCK
124            // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
125            // and recortStart depending on whether the conditional execution of prior_breaks_block.
126            b->SetInsertPoint(loop_final_block);
127            PHINode * matchRecordNum = b->CreatePHI(sizeTy, 2, "matchRecordNum");
128            matchRecordNum->addIncoming(phiRecordNum, processMatchesEntry);
129            matchRecordNum->addIncoming(matchedRecordNum, prior_breaks_block);
130            phiRecordNum->addIncoming(matchRecordNum, loop_final_block);
131
132            PHINode * const matchRecordStart = b->CreatePHI(sizeTy, 2, "matchRecordStart");
133            matchRecordStart->addIncoming(phiRecordStart, processMatchesEntry);
134            matchRecordStart->addIncoming(priorRecordStart, prior_breaks_block);
135            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
136            Value * matchRecordEnd = b->CreateAdd(phiScanwordPos, b->CreateCountForwardZeroes(phiMatchWord, true));
137            // It is possible that the matchRecordEnd position is one past EOF.  Make sure not
138            // to access past EOF.
139            Value * const bufLimit = b->CreateSub(b->CreateAdd(scanwordPos, avail), ONE);
140            matchRecordEnd = b->CreateUMin(matchRecordEnd, bufLimit);
141            Function * const dispatcher = m->getFunction("accumulate_match_wrapper"); assert (dispatcher);
142            Value * const startPtr = b->getRawInputPointer("InputStream", matchRecordStart);
143            Value * const endPtr = b->getRawInputPointer("InputStream", matchRecordEnd);
144            auto argi = dispatcher->arg_begin();
145            const auto matchRecNumArg = &*(argi++);
146            Value * const matchRecNum = b->CreateZExtOrTrunc(matchRecordNum, matchRecNumArg->getType());
147            b->CreateCall(dispatcher, {accumulator, matchRecNum, startPtr, endPtr});
148            Value * remaining_matches = b->CreateResetLowestBit(phiMatchWord);
149            phiMatchWord->addIncoming(remaining_matches, loop_final_block);
150
151            Value * remaining_breaks = b->CreateXor(phiRecordBreaks, prior_breaks);
152            phiRecordBreaks->addIncoming(remaining_breaks, loop_final_block);
153
154            b->CreateBr(matches_test_block);
155
156        // LOOP EXIT/MATCHES_DONE
157        b->SetInsertPoint(processMatchesExit);
158        // When the matches are done, there may be additional record breaks remaining
159        Value * more_breaks_cond = b->CreateICmpNE(phiRecordBreaks, ZERO);
160        b->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
161
162            // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
163            b->SetInsertPoint(remaining_breaks_block);
164            Value * break_count = b->CreatePopcount(phiRecordBreaks);
165            Value * final_record_num = b->CreateAdd(phiRecordNum, break_count);
166            Value * reverseZeroes = b->CreateCountReverseZeroes(phiRecordBreaks);
167            Value * pendingLineStart = b->CreateAdd(phiScanwordPos, b->CreateSub(width, reverseZeroes));
168            b->CreateBr(return_block);
169
170        // RETURN block
171        b->SetInsertPoint(return_block);
172        PHINode * phiFinalRecordNum = b->CreatePHI(sizeTy, 2, "finalRecordCount");
173        PHINode * phiFinalRecordStart = b->CreatePHI(sizeTy, 2, "finalRecordStart");
174
175        phiFinalRecordNum->addIncoming(phiRecordNum, processMatchesExit);
176        phiFinalRecordNum->addIncoming(final_record_num, remaining_breaks_block);
177        phiLineNum->addIncoming(phiFinalRecordNum, return_block);
178
179        phiFinalRecordStart->addIncoming(phiRecordStart, processMatchesExit);
180        phiFinalRecordStart->addIncoming(pendingLineStart, remaining_breaks_block);
181        phiLineStart->addIncoming(phiFinalRecordStart, return_block);
182
183        Value * nextScanwordPos = b->CreateAdd(phiScanwordPos, ConstantInt::get(sizeTy, sizeTy->getBitWidth()));
184        phiScanwordPos->addIncoming(nextScanwordPos, return_block);
185        Value * nextIndex = b->CreateAdd(phiIndex, ONE);
186        phiIndex->addIncoming(nextIndex, return_block);
187        b->CreateLikelyCondBr(b->CreateICmpNE(nextIndex, b->getSize(fieldCount)), scanWordIteration, scanWordExit);
188
189    b->SetInsertPoint(scanWordExit);
190    b->setScalarField("BlockNo", b->CreateAdd(blockNo, ConstantInt::get(blockNo->getType(), 1)));
191    b->setScalarField("LineNum", phiFinalRecordNum);
192    b->setProcessedItemCount("InputStream", phiFinalRecordStart);
193    Value * const nextBlockOffset = b->CreateAdd(blockOffset, ONE);
194    blockOffset->addIncoming(nextBlockOffset, scanWordExit);
195    b->CreateLikelyCondBr(b->CreateICmpNE(nextBlockOffset, blocksToDo), initialBlock, blocksExit);
196
197    b->SetInsertPoint(blocksExit);
198    b->CreateCondBr(mIsFinal, callFinalizeScan, scanReturn);
199
200    b->SetInsertPoint(callFinalizeScan);
201    Function * finalizer = m->getFunction("finalize_match_wrapper"); assert (finalizer);
202    Value * const bufferEnd = b->getRawInputPointer("InputStream", avail);
203    b->CreateCall(finalizer, {accumulator, bufferEnd});
204    b->CreateBr(scanReturn);
205
206    b->SetInsertPoint(scanReturn);
207}
208
209ScanMatchKernel::ScanMatchKernel(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * const Matches, StreamSet * const LineBreakStream, StreamSet * const ByteStream, Scalar * const callbackObject)
210: MultiBlockKernel(b, "scanMatch",
211// inputs
212{Binding{"matchResult", Matches, FixedRate(), Principal()}
213,Binding{"lineBreak", LineBreakStream}
214,Binding{"InputStream", ByteStream, FixedRate(), Deferred()}},
215// outputs
216{},
217// input scalars
218{Binding{"accumulator_address", callbackObject}},
219// output scalars
220{},
221// kernel state
222{Binding{b->getSizeTy(), "BlockNo"}
223,Binding{b->getSizeTy(), "LineNum"}}) {
224    addAttribute(SideEffecting());
225}
226
227}
Note: See TracBrowser for help on using the repository browser.