source: icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.cpp @ 5217

Last change on this file since 5217 was 5217, checked in by nmedfort, 3 years ago

Merged PabloFunction? and PabloKernel? classes. Updated projects where necessary.

File size: 40.5 KB
RevLine 
[4959]1#include "symboltablepipeline.h"
2
3/*
4 *  Copyright (c) 2016 International Characters.
5 *  This software is licensed to the public under the Open Software License 3.0.
6 */
7
8#include "pipeline.h"
9#include "toolchain.h"
10#include "utf_encoding.h"
11
12#include <kernels/s2p_kernel.h>
[4974]13#include <kernels/instance.h>
[4959]14
[5217]15#include <pablo/prototype.h>
[4959]16#include <pablo/pablo_compiler.h>
[4991]17#include <pablo/analysis/pabloverifier.hpp>
[4959]18
19#include <re/re_cc.h>
20#include <re/re_rep.h>
21#include <re/re_name.h>
22#include <re/re_compiler.h>
23#include <re/printer_re.h>
24
25#include <cc/cc_compiler.h>
26
27#include <pablo/printer_pablos.h>
28#include <iostream>
29
[4991]30#include <llvm/IR/Intrinsics.h>
31
[4959]32using namespace re;
33using namespace pablo;
34
[4974]35namespace kernel {
36
[4959]37SymbolTableBuilder::SymbolTableBuilder(Module * m, IDISA::IDISA_Builder * b)
38: mMod(m)
39, iBuilder(b)
[4968]40, mLongestLookahead(0)
[4959]41, mBitBlockType(b->getBitBlockType())
[4974]42, mBlockSize(b->getBitBlockWidth()) {
[4959]43
44}
45
46/** ------------------------------------------------------------------------------------------------------------- *
47 * @brief generateLeadingFunction
48 ** ------------------------------------------------------------------------------------------------------------- */
49PabloFunction * SymbolTableBuilder::generateLeadingFunction(const std::vector<unsigned> & endpoints) {
50    PabloFunction * const function = PabloFunction::Create("leading", 8, endpoints.size() + 2);
51    Encoding enc(Encoding::Type::ASCII, 8);
52    cc::CC_Compiler ccCompiler(*function, enc);
53    re::RE_Compiler reCompiler(*function, ccCompiler);
[4995]54    RE * cc = makeName(makeCC(makeCC(makeCC('a', 'z'), makeCC('A', 'Z')), makeCC('0', '9')));
[4959]55    reCompiler.compileUnicodeNames(cc);
56    PabloAST * const matches = reCompiler.compile(cc).stream;
57    PabloBlock * const entry = function->getEntryBlock();
58    PabloAST * const adv = entry->createAdvance(matches, 1);
59    PabloAST * const starts = entry->createAnd(matches, entry->createNot(adv));
60    PabloAST * const ends = entry->createAnd(adv, entry->createNot(matches));
61
[4995]62    function->setResult(0, entry->createAssign("l.S", starts));
63    function->setResult(1, entry->createAssign("l.E", ends));
[4959]64
65    PabloAST * M = ends;
66    unsigned step = 1;
67    unsigned i = 0;
68    for (unsigned endpoint : endpoints) {
69        assert (endpoint >= step);
70        unsigned span = endpoint - step;
71        while (span > step) {
72            M = entry->createOr(entry->createAdvance(M, step), M);
73            span = span - step;
74            step *= 2;
75        }
76        M = entry->createOr(entry->createAdvance(M, span), M);
[4995]77        function->setResult(i + 2, entry->createAssign("l.M" + std::to_string(i), M));
[4959]78        ++i;
79        step += span;
80    }
81
82    return function;
83}
84
85/** ------------------------------------------------------------------------------------------------------------- *
[4968]86 * @brief generateSortingFunction
[4959]87 ** ------------------------------------------------------------------------------------------------------------- */
[4968]88PabloFunction * SymbolTableBuilder::generateSortingFunction(const PabloFunction * const leading, const std::vector<unsigned> & endpoints) {
[4995]89    PabloFunction * const function = PabloFunction::Create("sorting", leading->getNumOfResults(), (leading->getNumOfResults() - 1) * 2);
[4991]90    PabloBlock * entry = function->getEntryBlock();
[4995]91    function->setParameter(0, entry->createVar("l.S"));
92    function->setParameter(1, entry->createVar("l.E"));
[4959]93    for (unsigned i = 2; i < leading->getNumOfResults(); ++i) {
[4995]94        function->setParameter(i, entry->createVar("l.M" + std::to_string(i - 2)));
[4959]95    }
[4968]96    PabloAST * R = function->getParameter(0);
97    PabloAST * const E = entry->createNot(function->getParameter(1));
[4995]98    unsigned i = 0;
[4959]99    unsigned lowerbound = 0;
100    for (unsigned endpoint : endpoints) {
[4995]101        PabloAST * const M = function->getParameter(i + 2);
[4974]102        PabloAST * const L = entry->createLookahead(M, endpoint, "lookahead" + std::to_string(endpoint));
[4968]103        PabloAST * S = entry->createAnd(L, R);
[4995]104        Assign * Si = entry->createAssign("s.S_" + std::to_string(i + 1), S);
105        PabloAST * F = entry->createScanThru(S, E);
106        Assign * Ei = entry->createAssign("s.E_" + std::to_string(i + 1), F);
[4968]107        function->setResult(i * 2, Si);
108        function->setResult(i * 2 + 1, Ei);
[4991]109        R = entry->createXor(R, S);
[4959]110        ++i;
111        lowerbound = endpoint;
112    }
[4995]113    Assign * Si = entry->createAssign("s.S_n", R);
[4968]114    PabloAST * F = entry->createScanThru(R, E);
[4995]115    Assign * Ei = entry->createAssign("s.E_n", F);
[4968]116    function->setResult(i * 2, Si);
117    function->setResult(i * 2 + 1, Ei);
118    mLongestLookahead = lowerbound;
[4991]119
[4959]120    return function;
121}
122
123/** ------------------------------------------------------------------------------------------------------------- *
[4991]124 * @brief generateCountForwardZeroes
125 ** ------------------------------------------------------------------------------------------------------------- */
126inline Value * generateCountForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
127    Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::cttz, bits->getType());
128    return iBuilder->CreateCall(cttzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
129}
130
131/** ------------------------------------------------------------------------------------------------------------- *
132 * @brief generateMaskedGather
133 ** ------------------------------------------------------------------------------------------------------------- */
134inline Value * SymbolTableBuilder::generateMaskedGather(Value * const base, Value * const vindex, Value * const mask) {
135
136    /*
137        From Intel:
138
139        extern __m256i _mm256_mask_i32gather_epi32(__m256i def_vals, int const * base, __m256i vindex, __m256i vmask, const int scale);
140
141        From Clang avx2intrin.h:
142
143        #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
144           (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
145                                                (int const *)(m), \
146                                                (__v8si)(__m256i)(i), \
147                                                (__v8si)(__m256i)(mask), (s)); })
148        From llvm IntrinsicsX86.td:
149
150        def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
151
152        def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
153           Intrinsic<[llvm_v8i32_ty],
154           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
155           [IntrReadArgMem]>;
156
157     */
158
159    VectorType * const vecType = VectorType::get(iBuilder->getInt32Ty(), 8);
[4992]160    Function * const vgather = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_avx2_gather_d_d_256);
[4991]161    return iBuilder->CreateCall(vgather, {Constant::getNullValue(vecType), base, iBuilder->CreateBitCast(vindex, vecType), iBuilder->CreateBitCast(mask, vecType), iBuilder->getInt8(1)});
162}
163
164/** ------------------------------------------------------------------------------------------------------------- *
165 * @brief generateResetLowestBit
166 ** ------------------------------------------------------------------------------------------------------------- */
167inline Value * generateResetLowestBit(IDISA::IDISA_Builder * iBuilder, Value * bits) {
168    Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
169    return iBuilder->CreateAnd(bits_minus1, bits);
170}
171
172/** ------------------------------------------------------------------------------------------------------------- *
[4992]173 * @brief generateGatherKernel
[4991]174 ** ------------------------------------------------------------------------------------------------------------- */
[4992]175void SymbolTableBuilder::generateGatherKernel(KernelBuilder * kBuilder, const std::vector<unsigned> & endpoints, const unsigned scanWordBitWidth) {
[4991]176
[4992]177    Type * const intScanWordTy = iBuilder->getIntNTy(scanWordBitWidth);
[4991]178    const unsigned fieldCount = iBuilder->getBitBlockWidth() / scanWordBitWidth;
[4992]179    Type * const scanWordVectorType = VectorType::get(intScanWordTy, fieldCount);
[4991]180    const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
[4992]181    const unsigned gatherCount = vectorWidth * 4;
[4991]182
[4995]183    Type * startArrayType = ArrayType::get(iBuilder->getInt32Ty(), iBuilder->getBitBlockWidth() + gatherCount);
184    Type * endArrayType = ArrayType::get(iBuilder->getInt32Ty(), gatherCount);
185    Type * groupType = StructType::get(iBuilder->getInt32Ty(), startArrayType, iBuilder->getInt32Ty(), endArrayType, nullptr);
186    const unsigned baseIdx = kBuilder->addInternalState(iBuilder->getInt8PtrTy(), "Base");
[5000]187    const unsigned gatherPositionArrayIdx = kBuilder->addInternalState(ArrayType::get(groupType, endpoints.size()), "Positions");
[4995]188
[4992]189    for (unsigned maxKeyLength : endpoints) {
190        kBuilder->addInputStream(1, "startStream" + std::to_string(maxKeyLength));
191        kBuilder->addInputStream(1, "endStream" + std::to_string(maxKeyLength));
[5000]192        kBuilder->addOutputStream(4); // ((maxKeyLength + 3) / 4) * 4
[4992]193    }
[4995]194    kBuilder->addInputStream(1, "startStreamN");
195    kBuilder->addInputStream(1, "endStreamN");
[4992]196
[4995]197    Function * const function = kBuilder->prepareFunction();
[4991]198
199    BasicBlock * const entry = iBuilder->GetInsertBlock();
200
[4992]201    BasicBlock * groupCond = BasicBlock::Create(mMod->getContext(), "groupCond", function, 0);
202    BasicBlock * groupBody = BasicBlock::Create(mMod->getContext(), "groupBody", function, 0);
203
[4991]204    BasicBlock * startOuterCond = BasicBlock::Create(mMod->getContext(), "startOuterCond", function, 0);
205    BasicBlock * startOuterBody = BasicBlock::Create(mMod->getContext(), "startOuterBody", function, 0);
206    BasicBlock * startInnerCond = BasicBlock::Create(mMod->getContext(), "startInnerCond", function, 0);
207    BasicBlock * startInnerBody = BasicBlock::Create(mMod->getContext(), "startInnerBody", function, 0);
208
209    BasicBlock * endOuterCond = BasicBlock::Create(mMod->getContext(), "endOuterCond", function, 0);
210    BasicBlock * endOuterBody = BasicBlock::Create(mMod->getContext(), "endOuterBody", function, 0);
211    BasicBlock * endInnerCond = BasicBlock::Create(mMod->getContext(), "endInnerCond", function, 0);
212    BasicBlock * endInnerBody = BasicBlock::Create(mMod->getContext(), "endInnerBody", function, 0);
213
[4992]214    BasicBlock * gather = BasicBlock::Create(mMod->getContext(), "gather", function, 0);
[4991]215
[4992]216    BasicBlock * nextGroup = BasicBlock::Create(mMod->getContext(), "nextGroup", function, 0);
[4991]217
218    BasicBlock * exit = BasicBlock::Create(mMod->getContext(), "exit", function, 0);
219
[5000]220
221    // ENTRY BLOCK
222    iBuilder->SetInsertPoint(entry);
[4995]223    Type * const int32PtrTy = PointerType::get(iBuilder->getInt32Ty(), 0);
[5000]224    FunctionType * const gatherFunctionType = FunctionType::get(iBuilder->getVoidTy(), {iBuilder->getInt8PtrTy(), int32PtrTy, int32PtrTy, iBuilder->getInt32Ty(), iBuilder->getInt8PtrTy()}, false);
[5003]225    Value * const gatherFunctionPtrArray = iBuilder->CreateAlloca(PointerType::get(gatherFunctionType, 0), iBuilder->getInt32(endpoints.size()), "gatherFunctionPtrArray");
[5000]226
[4992]227    unsigned i = 0;
[5000]228    unsigned minKeyLength = 0;
[4992]229    for (unsigned maxKeyLength : endpoints) {
[5001]230        Function * f = generateGatherFunction(minKeyLength, maxKeyLength);
[5000]231        mGatherFunction.push_back(f);
232        iBuilder->CreateStore(f, iBuilder->CreateGEP(gatherFunctionPtrArray, iBuilder->getInt32(i++)));
[4992]233        minKeyLength = maxKeyLength;
234    }
[4995]235
236    //TODO: this won't work on files > 2^32 bytes yet; needs an intermediate flush then a recalculation of the base pointer.
237    Value * const base = iBuilder->CreateLoad(kBuilder->getInternalState(baseIdx), "base");
[5000]238    Value * const positionArray = kBuilder->getInternalState(gatherPositionArrayIdx);
[4995]239
240    Value * blockPos = iBuilder->CreateLoad(kBuilder->getBlockNo());
[5106]241    blockPos = iBuilder->CreateMul(blockPos, ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth()));
[4995]242
[4992]243    iBuilder->CreateBr(groupCond);
244
245    // GROUP COND
246    iBuilder->SetInsertPoint(groupCond);
247    PHINode * groupIV = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
248    groupIV->addIncoming(iBuilder->getInt32(0), entry);
249    Value * groupTest = iBuilder->CreateICmpNE(groupIV, iBuilder->getInt32(endpoints.size()));
250    iBuilder->CreateCondBr(groupTest, groupBody, exit);
251
252    // GROUP BODY
253    iBuilder->SetInsertPoint(groupBody);
[4991]254    // if two positions cannot be in the same vector element, we could possibly do some work in parallel here.
255
[4995]256    Value * index = iBuilder->CreateMul(groupIV, iBuilder->getInt32(2));
257    Value * startStreamPtr = kBuilder->getInputStream(index);
258    Value * startStream = iBuilder->CreateBlockAlignedLoad(startStreamPtr);
259    startStream = iBuilder->CreateBitCast(startStream, scanWordVectorType, "startStream");
260
261    index = iBuilder->CreateAdd(index, iBuilder->getInt32(1));
262    Value * endStreamPtr = kBuilder->getInputStream(index);
263    Value * endStream = iBuilder->CreateBlockAlignedLoad(endStreamPtr);
264    endStream = iBuilder->CreateBitCast(endStream, scanWordVectorType, "endStream");
265
[5008]266    Value * startIndexPtr = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(0)});
[4995]267    Value * startIndex = iBuilder->CreateLoad(startIndexPtr, "startIndex");
[5008]268    Value * startPosArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(1)}, "startPosArray");
[4995]269    Value * endIndexPtr = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(2)}, "endIndexPtr");
270    Value * endIndex = iBuilder->CreateLoad(endIndexPtr, "endIndex");
[5008]271    Value * endPosArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(3)}, "endPosArray");
[4995]272
[4991]273    iBuilder->CreateBr(startOuterCond);
[4992]274
275    // START OUTER COND
[4991]276    iBuilder->SetInsertPoint(startOuterCond);
[5106]277    PHINode * startBlockOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
[4995]278    startBlockOffset->addIncoming(blockPos, groupBody);
[5001]279    PHINode * startIndexPhi1 = iBuilder->CreatePHI(startIndex->getType(), 2, "startIndexPhi1");
[4992]280    startIndexPhi1->addIncoming(startIndex, groupBody);
[5106]281    PHINode * startIV = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
282    startIV->addIncoming(ConstantInt::get(iBuilder->getSizeTy(), 0), groupBody);
283    Value * startOuterTest = iBuilder->CreateICmpNE(startIV, ConstantInt::get(iBuilder->getSizeTy(), fieldCount));
[4991]284    iBuilder->CreateCondBr(startOuterTest, startOuterBody, endOuterCond);
285
[4992]286    // START OUTER BODY
[4991]287    iBuilder->SetInsertPoint(startOuterBody);
288    Value * startField = iBuilder->CreateExtractElement(startStream, startIV);
[5106]289    startIV->addIncoming(iBuilder->CreateAdd(startIV, ConstantInt::get(iBuilder->getSizeTy(), 1)), startInnerCond);
290    startBlockOffset->addIncoming(iBuilder->CreateAdd(startBlockOffset, ConstantInt::get(iBuilder->getSizeTy(), scanWordBitWidth)), startInnerCond);
[4991]291    iBuilder->CreateBr(startInnerCond);
292
[4992]293    // START INNER COND
[4991]294    iBuilder->SetInsertPoint(startInnerCond);
[5001]295    PHINode * startIndexPhi2 = iBuilder->CreatePHI(startIndex->getType(), 2, "startIndexPhi2");
296    startIndexPhi2->addIncoming(startIndexPhi1, startOuterBody);
297    startIndexPhi1->addIncoming(startIndexPhi2, startInnerCond);
[4991]298    PHINode * startFieldPhi = iBuilder->CreatePHI(intScanWordTy, 2);
299    startFieldPhi->addIncoming(startField, startOuterBody);
300    Value * test = iBuilder->CreateICmpNE(startFieldPhi, ConstantInt::getNullValue(intScanWordTy));
301    iBuilder->CreateCondBr(test, startInnerBody, startOuterCond);
302
[4992]303    // START INNER BODY
[4991]304    iBuilder->SetInsertPoint(startInnerBody);
305    Value * startPos = generateCountForwardZeroes(iBuilder, startFieldPhi);
306    startFieldPhi->addIncoming(generateResetLowestBit(iBuilder, startFieldPhi), startInnerBody);
[4995]307    startPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(startPos, startBlockOffset), iBuilder->getInt32Ty());
[5008]308    iBuilder->CreateStore(startPos, iBuilder->CreateGEP(startPosArray, {iBuilder->getInt32(0), startIndexPhi2}));
[5001]309    startIndexPhi2->addIncoming(iBuilder->CreateAdd(startIndexPhi2, ConstantInt::get(startIndexPhi2->getType(), 1)), startInnerBody);
[4991]310    iBuilder->CreateBr(startInnerCond);
[4992]311
[4991]312    // END POINT OUTER COND
313    iBuilder->SetInsertPoint(endOuterCond);
[5106]314    PHINode * endBlockOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
[4995]315    endBlockOffset->addIncoming(blockPos, startOuterCond);
[4992]316    PHINode * endIndexPhi1 = iBuilder->CreatePHI(endIndex->getType(), 2);
317    endIndexPhi1->addIncoming(endIndex, startOuterCond);
[5001]318    PHINode * startIndexPhi3 = iBuilder->CreatePHI(startIndex->getType(), 2, "startIndexPhi3");
319    startIndexPhi3->addIncoming(startIndexPhi1, startOuterCond);
[5106]320    PHINode * endIV = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
321    endIV->addIncoming(ConstantInt::get(iBuilder->getSizeTy(), 0), startOuterCond);
322    Value * endOuterTest = iBuilder->CreateICmpNE(endIV, ConstantInt::get(iBuilder->getSizeTy(), fieldCount));
[4992]323    iBuilder->CreateCondBr(endOuterTest, endOuterBody, nextGroup);
324
[4991]325    // END POINT OUTER BODY
326    iBuilder->SetInsertPoint(endOuterBody);
327    Value * endField = iBuilder->CreateExtractElement(endStream, endIV);
[5106]328    endIV->addIncoming(iBuilder->CreateAdd(endIV, ConstantInt::get(iBuilder->getSizeTy(), 1)), endInnerCond);
329    endBlockOffset->addIncoming(iBuilder->CreateAdd(endBlockOffset, ConstantInt::get(iBuilder->getSizeTy(), scanWordBitWidth)), endInnerCond);
[4991]330    iBuilder->CreateBr(endInnerCond);
[4992]331
[4991]332    // END POINT INNER COND
333    iBuilder->SetInsertPoint(endInnerCond);
[5001]334    PHINode * startIndexPhi4 = iBuilder->CreatePHI(startIndexPhi3->getType(), 3, "startIndexPhi4");
335    startIndexPhi4->addIncoming(startIndexPhi3, endOuterBody);
336    startIndexPhi4->addIncoming(startIndexPhi4, endInnerBody);
337    startIndexPhi3->addIncoming(startIndexPhi4, endInnerCond);
[4992]338    PHINode * endIndexPhi2 = iBuilder->CreatePHI(endIndex->getType(), 3);
339    endIndexPhi2->addIncoming(endIndexPhi1, endOuterBody);
340    endIndexPhi1->addIncoming(endIndexPhi2, endInnerCond);
341    endIndexPhi2->addIncoming(ConstantInt::getNullValue(endIndex->getType()), gather);
[4991]342    PHINode * endFieldPhi = iBuilder->CreatePHI(intScanWordTy, 3);
343    endFieldPhi->addIncoming(endField, endOuterBody);
344    Value * endInnerTest = iBuilder->CreateICmpNE(endFieldPhi, ConstantInt::getNullValue(intScanWordTy));
345    iBuilder->CreateCondBr(endInnerTest, endInnerBody, endOuterCond);
[4992]346
[4991]347    // END POINT INNER BODY
348    iBuilder->SetInsertPoint(endInnerBody);
349    Value * endPos = generateCountForwardZeroes(iBuilder, endFieldPhi);
350    Value * updatedEndFieldPhi = generateResetLowestBit(iBuilder, endFieldPhi);
351    endFieldPhi->addIncoming(updatedEndFieldPhi, endInnerBody);
[4992]352    endFieldPhi->addIncoming(updatedEndFieldPhi, gather);
[4995]353    endPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(endPos, endBlockOffset), iBuilder->getInt32Ty());
[5008]354    iBuilder->CreateStore(endPos, iBuilder->CreateGEP(endPosArray, {iBuilder->getInt32(0), endIndexPhi2}));
[4992]355    Value * updatedEndIndexPhi = iBuilder->CreateAdd(endIndexPhi2, ConstantInt::get(endIndexPhi2->getType(), 1));
356    endIndexPhi2->addIncoming(updatedEndIndexPhi, endInnerBody);
357    Value * filledEndPosBufferTest = iBuilder->CreateICmpEQ(updatedEndIndexPhi, ConstantInt::get(updatedEndIndexPhi->getType(), gatherCount));
358    iBuilder->CreateCondBr(filledEndPosBufferTest, gather, endInnerCond);
[4991]359
[4992]360    // GATHER
361    iBuilder->SetInsertPoint(gather);
[4995]362
[5008]363    Value * startArrayPtr = iBuilder->CreatePointerCast(startPosArray, PointerType::get(iBuilder->getInt32Ty(), 0));
364    Value * endArrayPtr = iBuilder->CreatePointerCast(endPosArray, PointerType::get(iBuilder->getInt32Ty(), 0));
[4992]365    Value * gatherFunctionPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(gatherFunctionPtrArray, groupIV));
[5000]366    Value * outputBuffer = iBuilder->CreatePointerCast(kBuilder->getOutputStream(groupIV), iBuilder->getInt8PtrTy());
[5014]367    iBuilder->CreateCall(gatherFunctionPtr, {base, startArrayPtr, endArrayPtr, iBuilder->getInt32(32), outputBuffer});
[5008]368    // Copy the unused start positions to the front of the start position array and adjust the start index
[4995]369    Value * remainingArrayPtr = iBuilder->CreateGEP(startArrayPtr, iBuilder->getInt32(gatherCount));
[5001]370    Value * remainingCount = iBuilder->CreateSub(startIndexPhi4, iBuilder->getInt32(gatherCount));
371    Value * remainingBytes = iBuilder->CreateMul(remainingCount, iBuilder->getInt32(4));
372    iBuilder->CreateMemMove(startArrayPtr, remainingArrayPtr, remainingBytes, 4);
373    startIndexPhi4->addIncoming(remainingCount, gather);
[4992]374    iBuilder->CreateBr(endInnerCond);
[4991]375
[4992]376    // NEXT GROUP
377    iBuilder->SetInsertPoint(nextGroup);
[5001]378    iBuilder->CreateStore(startIndexPhi3, startIndexPtr);
[4995]379    iBuilder->CreateStore(endIndexPhi1, endIndexPtr);
[4992]380    groupIV->addIncoming(iBuilder->CreateAdd(groupIV, ConstantInt::get(groupIV->getType(), 1)), nextGroup);
381    iBuilder->CreateBr(groupCond);
[4991]382
383    iBuilder->SetInsertPoint(exit);
384    kBuilder->finalize();
385}
386
387/** ------------------------------------------------------------------------------------------------------------- *
[4992]388 * @brief generateGatherFunction
389 ** ------------------------------------------------------------------------------------------------------------- */
[5001]390Function * SymbolTableBuilder::generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength) {
[4992]391
[5000]392    assert (minKeyLength < maxKeyLength);
[4992]393
[5000]394    const std::string functionName = "gather_" + std::to_string(minKeyLength) + "_to_" + std::to_string(maxKeyLength);
[4992]395    Function * function = mMod->getFunction(functionName);
396    if (function == nullptr) {
397
398        const auto ip = iBuilder->saveIP();
399
[5000]400        const unsigned minCount = (minKeyLength / 4);
401        const unsigned maxCount = ((maxKeyLength + 3) / 4);
402
[4992]403        const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
404        Type * const gatherVectorType =  VectorType::get(iBuilder->getInt32Ty(), vectorWidth);
[5001]405        const unsigned gatherByteWidth = gatherVectorType->getPrimitiveSizeInBits() / 8;
406        Type * const transposedVectorType = VectorType::get(iBuilder->getInt8Ty(), iBuilder->getBitBlockWidth() / 8);
407        const unsigned transposedByteWidth = transposedVectorType->getPrimitiveSizeInBits() / 8;
[4992]408
[5001]409
[4995]410        Type * const int32PtrTy = PointerType::get(iBuilder->getInt32Ty(), 0);
[5000]411        FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {iBuilder->getInt8PtrTy(), int32PtrTy, int32PtrTy, iBuilder->getInt32Ty(), iBuilder->getInt8PtrTy()}, false);
[4992]412        function = Function::Create(functionType, GlobalValue::ExternalLinkage, functionName, mMod);
413        function->setCallingConv(CallingConv::C);
414        function->setDoesNotCapture(1);
415        function->setDoesNotCapture(2);
416        function->setDoesNotCapture(3);
417        function->setDoesNotThrow();
418
419        Function::arg_iterator args = function->arg_begin();
[5014]420        Value * const base = &*(args++);
[4992]421        base->setName("base");
[5014]422        Value * startArray = &*(args++);
[4992]423        startArray->setName("startArray");
[5014]424        Value * endArray = &*(args++);
[4992]425        endArray->setName("endArray");
[5014]426        Value * const numOfKeys = &*(args++);
[4995]427        numOfKeys->setName("numOfKeys");
[5014]428        Value * result = &*(args++);
[5000]429        result->setName("result");
[4992]430
431        BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", function, 0);
432        BasicBlock * gatherCond = BasicBlock::Create(mMod->getContext(), "gatherCond", function, 0);
433        BasicBlock * partialGatherCond = BasicBlock::Create(mMod->getContext(), "partialGatherCond", function, 0);
434        BasicBlock * partialGatherBody = BasicBlock::Create(mMod->getContext(), "partialGatherBody", function, 0);
435        BasicBlock * gatherBody = BasicBlock::Create(mMod->getContext(), "gatherBody", function, 0);
436        BasicBlock * transposeCond = BasicBlock::Create(mMod->getContext(), "transposeCond", function, 0);
437        BasicBlock * transposeBody = BasicBlock::Create(mMod->getContext(), "transposeBody", function, 0);
438        BasicBlock * exit = BasicBlock::Create(mMod->getContext(), "exit", function, 0);
439
440        Value * const four = iBuilder->CreateVectorSplat(vectorWidth, iBuilder->getInt32(4));
441
442        // ENTRY
443        iBuilder->SetInsertPoint(entry);
[5001]444
445        AllocaInst * const buffer = iBuilder->CreateAlloca(gatherVectorType, iBuilder->getInt32(maxCount * 4), "buffer");
[5037]446        Value * end = iBuilder->CreateGEP(buffer, iBuilder->getInt32(maxCount * 4));
[5106]447        Value * size = iBuilder->CreateSub(iBuilder->CreatePtrToInt(end, iBuilder->getSizeTy()), iBuilder->CreatePtrToInt(buffer, iBuilder->getSizeTy()));
[5001]448        iBuilder->CreateMemSet(buffer, iBuilder->getInt8(0), size, 4);
449        Value * const transposed = iBuilder->CreateBitCast(buffer, transposedVectorType->getPointerTo(), "transposed");
450
451        startArray = iBuilder->CreateBitCast(startArray, gatherVectorType->getPointerTo());
452        endArray = iBuilder->CreateBitCast(endArray, gatherVectorType->getPointerTo());
453
454        iBuilder->CallPrintInt(functionName + ".numOfKeys", numOfKeys);
455
[4992]456        iBuilder->CreateBr(gatherCond);
457
458        // FULL GATHER COND
459        iBuilder->SetInsertPoint(gatherCond);
460        PHINode * remainingLanes = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
[4995]461        remainingLanes->addIncoming(numOfKeys, entry);
[5001]462
[4992]463        PHINode * gatherIV = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
464        gatherIV->addIncoming(iBuilder->getInt32(0), entry);
465
[5001]466        Value * gatherLoopTest = iBuilder->CreateICmpSGE(remainingLanes, iBuilder->getInt32(vectorWidth));
467        iBuilder->CreateCondBr(gatherLoopTest, gatherBody, partialGatherCond);
468
[4992]469        // PARTIAL GATHER COND
470        iBuilder->SetInsertPoint(partialGatherCond);
[5001]471        Value * partialGatherLoopTest = iBuilder->CreateICmpSLE(remainingLanes, iBuilder->getInt32(0));
472        iBuilder->CreateCondBr(partialGatherLoopTest, transposeCond, partialGatherBody);
[4992]473
474        // PARTIAL GATHER BODY
475        iBuilder->SetInsertPoint(partialGatherBody);
476        Type * registerType = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
[5001]477        Value * maskedLanes = iBuilder->CreateSub(iBuilder->getInt32(vectorWidth), remainingLanes);       
[4992]478        maskedLanes = iBuilder->CreateMul(maskedLanes, iBuilder->getInt32(32));
479        maskedLanes = iBuilder->CreateZExt(maskedLanes, registerType);
[5000]480        maskedLanes = iBuilder->CreateLShr(Constant::getAllOnesValue(registerType), maskedLanes);
[4992]481        maskedLanes = iBuilder->CreateBitCast(maskedLanes, gatherVectorType);
482        iBuilder->CreateBr(gatherBody);
483
484        // FULL GATHER BODY
485        iBuilder->SetInsertPoint(gatherBody);
486        PHINode * activeLanes = iBuilder->CreatePHI(gatherVectorType, 2, "activeLanes");
[5001]487        activeLanes->addIncoming(Constant::getAllOnesValue(gatherVectorType), gatherCond);
[4992]488        activeLanes->addIncoming(maskedLanes, partialGatherBody);
489
[5000]490
[4992]491        Value * startPos = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(startArray, gatherIV), 4);
[5001]492        Value * const endPos = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(endArray, gatherIV), 4);
493
[4992]494        for (unsigned blockCount = 0; blockCount < minCount; ++blockCount) {
495            Value * tokenData = generateMaskedGather(base, startPos, activeLanes);
[5003]496            Value * ptr = iBuilder->CreateGEP(buffer, iBuilder->CreateOr(gatherIV, iBuilder->getInt32(blockCount * 4)));
[5001]497            iBuilder->CreateAlignedStore(tokenData, ptr, transposedByteWidth);
[4992]498            startPos = iBuilder->CreateAdd(startPos, four);
499        }
500
501        for (unsigned blockCount = minCount; blockCount < maxCount; ++blockCount) {
[4995]502
[4992]503            // if we have not fully gathered the data for this key
[5001]504            Value * atLeastOneByte = iBuilder->CreateSExt(iBuilder->CreateICmpSLT(startPos, endPos), startPos->getType());
505            atLeastOneByte = iBuilder->CreateAnd(atLeastOneByte, activeLanes, "atLeastOneByte");
[4995]506
[4992]507            // gather it ...
508            Value * tokenData = generateMaskedGather(base, startPos, atLeastOneByte);
[5001]509
[4992]510            // and compute how much data is remaining.
511            Value * remaining = iBuilder->CreateSub(endPos, startPos);
[4995]512
[5008]513            // if this token has at least 4 bytes remaining ...
[5001]514            Value * atLeastFourBytes = iBuilder->CreateSExt(iBuilder->CreateICmpUGE(remaining, four), remaining->getType(), "atLeastFourBytes");
[4995]515
516            // determine how many bits do *not* belong to the token
517            remaining = iBuilder->CreateSub(four, remaining);
518            remaining = iBuilder->CreateShl(remaining, ConstantInt::get(remaining->getType(), 3));
519
[4992]520            // then mask them out prior to storing the value
521            Value * partialTokenMask = iBuilder->CreateLShr(ConstantInt::getAllOnesValue(remaining->getType()), remaining);
[4995]522            partialTokenMask = iBuilder->CreateOr(partialTokenMask, atLeastFourBytes);
[5001]523            tokenData = iBuilder->CreateAnd(partialTokenMask, tokenData);
524            Value * ptr = iBuilder->CreateGEP(buffer, iBuilder->CreateOr(gatherIV, iBuilder->getInt32(blockCount * 4)));
525            iBuilder->CreateAlignedStore(tokenData, ptr, transposedByteWidth);
[4995]526
[5001]527            startPos = iBuilder->CreateAdd(startPos, four);
[4992]528        }
[5001]529
[4992]530        gatherIV->addIncoming(iBuilder->CreateAdd(gatherIV, iBuilder->getInt32(1)), gatherBody);
531        remainingLanes->addIncoming(iBuilder->CreateSub(remainingLanes, iBuilder->getInt32(vectorWidth)), gatherBody);
532        iBuilder->CreateBr(gatherCond);
533
534        // TRANSPOSE COND
535        iBuilder->SetInsertPoint(transposeCond);
536        PHINode * transposeIV = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
[5001]537        transposeIV->addIncoming(iBuilder->getInt32(0), partialGatherCond);
[4992]538        Value * transposeLoopTest = iBuilder->CreateICmpNE(transposeIV, iBuilder->getInt32(maxCount));
539        iBuilder->CreateCondBr(transposeLoopTest, transposeBody, exit);
540
541        // TRANSPOSE BODY
542        iBuilder->SetInsertPoint(transposeBody);
543
[5001]544        Value * offset = iBuilder->CreateMul(transposeIV, iBuilder->getInt32(4));
545
[4992]546        Value * value[4];
547        for (unsigned i = 0; i < 4; ++i) {
[5001]548            Value * const ptr = iBuilder->CreateGEP(buffer, iBuilder->CreateAdd(offset, iBuilder->getInt32(i)));
549            value[i] = iBuilder->CreateLoad(ptr);
[4992]550        }
[5001]551
552        for (unsigned byteWidth = 2; byteWidth; --byteWidth) {
553            const unsigned fieldWidth = (byteWidth * 8);
[4992]554            const unsigned fieldCount = iBuilder->getBitBlockWidth() / fieldWidth;
[5001]555            VectorType * const type = VectorType::get(Type::getIntNTy(iBuilder->getContext(), fieldWidth), fieldCount);
556            std::vector<Constant *> even(fieldCount);
557            std::vector<Constant *> odd(fieldCount);
[4992]558            for (unsigned j = 0; j < fieldCount; ++j) {
[5001]559                even[j] = iBuilder->getInt32(j * 2);
560                odd[j] = iBuilder->getInt32(j * 2 + 1);
[4992]561            }
[5001]562            Constant * const evenVector = ConstantVector::get(even);
563            Constant * const oddVector = ConstantVector::get(odd);
564            Value * result[4];
[4992]565            for (unsigned i = 0; i < 4; i += 2) {
[5001]566                value[i] = iBuilder->CreateBitCast(value[i], type);
567                value[i + 1] = iBuilder->CreateBitCast(value[i + 1], type);
568                result[(i / byteWidth)] = iBuilder->CreateShuffleVector(value[i], value[i + 1], evenVector);
569                result[(i / byteWidth) + byteWidth] = iBuilder->CreateShuffleVector(value[i], value[i + 1], oddVector);
[4992]570            }
[5001]571            for (unsigned i = 0; i < 4; ++i) {
572                value[i] = result[i];
573            }
[4992]574        }
[5000]575
[4992]576        for (unsigned i = 0; i < 4; ++i) {
[5001]577            Value * ptr = iBuilder->CreateGEP(transposed, iBuilder->CreateAdd(offset, iBuilder->getInt32(i)));
578            iBuilder->CreateAlignedStore(value[i], ptr, gatherByteWidth);
[4992]579        }
580
[5001]581        transposeIV->addIncoming(iBuilder->CreateAdd(transposeIV, iBuilder->getInt32(1)), transposeBody);
582        iBuilder->CreateBr(transposeCond);
[5000]583
[4992]584        // EXIT
585        iBuilder->SetInsertPoint(exit);
[5000]586
587        // ... call hashing function ...
588
[5001]589        for (unsigned i = 0; i < maxKeyLength; ++i) {
590            Value * ptr = iBuilder->CreateGEP(transposed, iBuilder->getInt32(i));
591            Value * value = iBuilder->CreateAlignedLoad(ptr, gatherByteWidth);
592            iBuilder->CallPrintRegister(functionName + ".output" + std::to_string(i), value);
593        }
[5000]594
[4995]595        iBuilder->CreateRetVoid();
[4992]596
597        iBuilder->restoreIP(ip);
598    }
599
600    return function;
601}
602
603
604/** ------------------------------------------------------------------------------------------------------------- *
[4959]605 * @brief createKernels
606 ** ------------------------------------------------------------------------------------------------------------- */
607void SymbolTableBuilder::createKernels() {
608
609    std::vector<unsigned> endpoints;
610    endpoints.push_back(8);
[5001]611    endpoints.push_back(17);
612    endpoints.push_back(27);
[5003]613    endpoints.push_back(39);
614    endpoints.push_back(77);
615    endpoints.push_back(124);
616    endpoints.push_back(178);
617    endpoints.push_back(278);
[4959]618
619    PabloCompiler pablo_compiler(mMod, iBuilder);
620    PabloFunction * const leading = generateLeadingFunction(endpoints);
[4968]621    PabloFunction * const sorting = generateSortingFunction(leading, endpoints);
[4959]622
[4991]623    const auto bufferSize = ((mLongestLookahead + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth()) + 1;
[4968]624
[5000]625    mS2PKernel = new KernelBuilder(iBuilder, "s2p", 1);
626    mLeadingKernel = new KernelBuilder(iBuilder, "leading", bufferSize);
627    mSortingKernel = new KernelBuilder(iBuilder, "sorting", bufferSize);
628    mGatherKernel = new KernelBuilder(iBuilder, "gathering", 1);
[4968]629
630    generateS2PKernel(mMod, iBuilder, mS2PKernel);
631
[4959]632    pablo_compiler.setKernel(mLeadingKernel);
633    pablo_compiler.compile(leading);
[4968]634    pablo_compiler.setKernel(mSortingKernel);
635    pablo_compiler.compile(sorting);
[4959]636
637    delete leading;
[4968]638    delete sorting;
639
[4959]640    releaseSlabAllocatorMemory();
[4991]641
[4992]642    generateGatherKernel(mGatherKernel, endpoints, 64);
[4959]643}
644
[4974]645Function * SymbolTableBuilder::ExecuteKernels(){
[4959]646
[5106]647    Type * intType = iBuilder->getSizeTy();
[4974]648
[4968]649    Type * inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
[4974]650    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", Type::getVoidTy(mMod->getContext()), inputType, intType, nullptr));
[4968]651    main->setCallingConv(CallingConv::C);
652    Function::arg_iterator args = main->arg_begin();
[4959]653
[5014]654    Value * const inputStream = &*(args++);
[4991]655    inputStream->setName("inputStream");
[4959]656
[5014]657    Value * const bufferSize = &*(args++);
[4991]658    bufferSize->setName("bufferSize");
[4959]659
[4968]660    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
661
662    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
663
[5003]664    BasicBlock * leadingBlock = BasicBlock::Create(mMod->getContext(), "leadingBody", main, 0);
[4968]665
[5003]666    BasicBlock * partialLeadingCond = BasicBlock::Create(mMod->getContext(), "partialLeadingCond", main, 0);
667    BasicBlock * partialLeadingBody = BasicBlock::Create(mMod->getContext(), "partialLeadingBody", main, 0);
[4968]668
[5003]669    BasicBlock * regularCondBlock = BasicBlock::Create(mMod->getContext(), "regularCond", main, 0);
670    BasicBlock * regularBodyBlock = BasicBlock::Create(mMod->getContext(), "regularBody", main, 0);
[4959]671
[5003]672    BasicBlock * partialCondBlock = BasicBlock::Create(mMod->getContext(), "partialCond", main, 0);
673    BasicBlock * partialBodyBlock = BasicBlock::Create(mMod->getContext(),  "partialBody", main, 0);
[4959]674
[5003]675    BasicBlock * flushLengthGroupsBlock = BasicBlock::Create(mMod->getContext(), "flushLengthGroups", main, 0);
[4968]676
[4986]677    Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
[5010]678    Instance * leadingInstance = mLeadingKernel->instantiate(s2pInstance->getOutputStreamBuffer());
679    Instance * sortingInstance = mSortingKernel->instantiate(leadingInstance->getOutputStreamBuffer());
680    Instance * gatheringInstance = mGatherKernel->instantiate(sortingInstance->getOutputStreamBuffer());
[4968]681
[4995]682    gatheringInstance->setInternalState("Base", iBuilder->CreateBitCast(inputStream, iBuilder->getInt8PtrTy()));
683
[4968]684    const unsigned leadingBlocks = (mLongestLookahead + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth();
685
[5106]686    Value * const requiredBytes = ConstantInt::get(iBuilder->getSizeTy(), mBlockSize * leadingBlocks);
687    Value * const blockSize = ConstantInt::get(iBuilder->getSizeTy(), mBlockSize);
[4974]688
689    // First compute any necessary leading blocks to allow the sorting kernel access to the "future" data produced by
690    // the leading kernel ...
[4991]691
[5003]692    Value * enoughDataForLookaheadCond = iBuilder->CreateICmpUGE(bufferSize, requiredBytes);
693    iBuilder->CreateCondBr(enoughDataForLookaheadCond, leadingBlock, partialLeadingCond);
[4991]694
[5003]695    iBuilder->SetInsertPoint(leadingBlock);
696    for (unsigned i = 0; i < leadingBlocks; ++i) {
697        s2pInstance->CreateDoBlockCall();
698        leadingInstance->CreateDoBlockCall();
699    }
700    iBuilder->CreateBr(regularCondBlock);
[5000]701
[5003]702    iBuilder->SetInsertPoint(partialLeadingCond);
703    PHINode * remainingBytes1 = iBuilder->CreatePHI(intType, 2);
704    remainingBytes1->addIncoming(bufferSize, entryBlock);
705    Value * remainingCond = iBuilder->CreateICmpUGT(remainingBytes1, blockSize);
706    iBuilder->CreateCondBr(remainingCond, partialLeadingBody, partialCondBlock);
707
708    iBuilder->SetInsertPoint(partialLeadingBody);
[4986]709    s2pInstance->CreateDoBlockCall();
710    leadingInstance->CreateDoBlockCall();
[5003]711    remainingBytes1->addIncoming(iBuilder->CreateSub(remainingBytes1, blockSize), partialLeadingBody);
712    iBuilder->CreateBr(partialLeadingCond);
[4968]713
[4974]714    // Now all the data for which we can produce and consume a full leading block...
[5003]715    iBuilder->SetInsertPoint(regularCondBlock);
[4974]716    PHINode * remainingBytes2 = iBuilder->CreatePHI(intType, 2);
[5003]717    remainingBytes2->addIncoming(bufferSize, leadingBlock);
718    Value * remainingBytesCond = iBuilder->CreateICmpUGT(remainingBytes2, requiredBytes);
719    iBuilder->CreateCondBr(remainingBytesCond, regularBodyBlock, partialCondBlock);
[4995]720
[4968]721    iBuilder->SetInsertPoint(regularBodyBlock);
[4986]722    s2pInstance->CreateDoBlockCall();
723    leadingInstance->CreateDoBlockCall();
724    sortingInstance->CreateDoBlockCall();
[4995]725    gatheringInstance->CreateDoBlockCall();
[4974]726    remainingBytes2->addIncoming(iBuilder->CreateSub(remainingBytes2, blockSize), regularBodyBlock);
[5003]727    iBuilder->CreateBr(regularCondBlock);
[4968]728
[4974]729    // Check if we have a partial blocks worth of leading data remaining
[5003]730    iBuilder->SetInsertPoint(partialCondBlock);
731    PHINode * remainingBytes3 = iBuilder->CreatePHI(intType, 3);
732    remainingBytes3->addIncoming(bufferSize, partialLeadingCond);
733    remainingBytes3->addIncoming(remainingBytes2, regularCondBlock);
[5106]734    Value * partialBlockCond = iBuilder->CreateICmpSGT(remainingBytes3, ConstantInt::get(iBuilder->getSizeTy(), 0));
[5003]735    iBuilder->CreateCondBr(partialBlockCond, partialBodyBlock, flushLengthGroupsBlock);
[4968]736
[4974]737    // If we do, process it and mask out the data
[5003]738    iBuilder->SetInsertPoint(partialBodyBlock);
739    s2pInstance->clearOutputStreamSet();
740    leadingInstance->CreateDoBlockCall();   
[4986]741    sortingInstance->CreateDoBlockCall();
[4995]742    gatheringInstance->CreateDoBlockCall();
[5003]743    remainingBytes3->addIncoming(iBuilder->CreateSub(remainingBytes3, blockSize), partialBodyBlock);
744    iBuilder->CreateBr(partialCondBlock);
[5001]745
[5000]746    // perform a final partial gather on all length groups ...
[5003]747    iBuilder->SetInsertPoint(flushLengthGroupsBlock);
[5000]748
749    Value * const base = iBuilder->CreateLoad(gatheringInstance->getInternalState("Base"));
750    Value * positionArray = gatheringInstance->getInternalState("Positions");
751
752    for (unsigned i = 0; i < mGatherFunction.size(); ++i) {
[5003]753        BasicBlock * nonEmptyGroup = BasicBlock::Create(mMod->getContext(), "flushLengthGroup" + std::to_string(i), main, 0);
[5000]754
755        BasicBlock * nextNonEmptyGroup = BasicBlock::Create(mMod->getContext(), "", main, 0);
756
757        ConstantInt * groupIV = iBuilder->getInt32(i);
758        Value * startIndexPtr = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(0)}, "startIndexPtr");
759        Value * startIndex = iBuilder->CreateLoad(startIndexPtr, "remaining");
760        Value * cond = iBuilder->CreateICmpNE(startIndex, ConstantInt::getNullValue(startIndex->getType()));
761        iBuilder->CreateCondBr(cond, nonEmptyGroup, nextNonEmptyGroup);
762
763        iBuilder->SetInsertPoint(nonEmptyGroup);
764        Value * startArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(1)}, "startArray");
765        Value * startArrayPtr = iBuilder->CreatePointerCast(startArray, PointerType::get(iBuilder->getInt32Ty(), 0));
766        Value * endArray = iBuilder->CreateGEP(positionArray, {iBuilder->getInt32(0), groupIV, iBuilder->getInt32(3)}, "endArray");
767        Value * endArrayPtr = iBuilder->CreatePointerCast(endArray, PointerType::get(iBuilder->getInt32Ty(), 0));
768        Value * outputBuffer = iBuilder->CreatePointerCast(gatheringInstance->getOutputStream(groupIV), iBuilder->getInt8PtrTy());
[5014]769        iBuilder->CreateCall(mGatherFunction.at(i), {base, startArrayPtr, endArrayPtr, startIndex, outputBuffer});
[5000]770        iBuilder->CreateBr(nextNonEmptyGroup);
771
772        iBuilder->SetInsertPoint(nextNonEmptyGroup);
773    }
[4968]774    iBuilder->CreateRetVoid();
[4959]775
[4974]776    return main;
[4959]777}
[4968]778
779SymbolTableBuilder::~SymbolTableBuilder() {
780    delete mS2PKernel;
781    delete mLeadingKernel;
782    delete mSortingKernel;
[4992]783    delete mGatherKernel;
[4968]784}
[4974]785
[4991]786
[4974]787}
Note: See TracBrowser for help on using the repository browser.