source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4682

Last change on this file since 4682 was 4682, checked in by cameron, 4 years ago

Carry Manager progress and bug fix

File size: 35.4 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/carry_data.h>
10#include <pablo/carry_manager.h>
11#include <pablo/printer_pablos.h>
12#include <pablo/function.h>
13#include <cc/cc_namemap.hpp>
14#include <re/re_name.h>
15#include <stdexcept>
16#include <include/simd-lib/bitblock.hpp>
17#include <sstream>
18#include <IDISA/idisa_builder.h>
19#include <llvm/IR/Verifier.h>
20#include <llvm/Pass.h>
21#include <llvm/PassManager.h>
22#include <llvm/ADT/SmallVector.h>
23#include <llvm/Analysis/Passes.h>
24#include <llvm/IR/BasicBlock.h>
25#include <llvm/IR/CallingConv.h>
26#include <llvm/IR/Constants.h>
27#include <llvm/IR/DataLayout.h>
28#include <llvm/IR/DerivedTypes.h>
29#include <llvm/IR/Function.h>
30#include <llvm/IR/GlobalVariable.h>
31#include <llvm/IR/InlineAsm.h>
32#include <llvm/IR/Instructions.h>
33#include <llvm/IR/LLVMContext.h>
34#include <llvm/IR/Module.h>
35#include <llvm/Support/FormattedStream.h>
36#include <llvm/Support/MathExtras.h>
37#include <llvm/Support/Casting.h>
38#include <llvm/Support/Compiler.h>
39#include <llvm/Support/Debug.h>
40#include <llvm/Support/TargetSelect.h>
41#include <llvm/Support/Host.h>
42#include <llvm/Transforms/Scalar.h>
43#include <llvm/ExecutionEngine/ExecutionEngine.h>
44#include <llvm/ExecutionEngine/MCJIT.h>
45#include <llvm/IRReader/IRReader.h>
46#include <llvm/Bitcode/ReaderWriter.h>
47#include <llvm/Support/MemoryBuffer.h>
48#include <llvm/IR/IRBuilder.h>
49#include <llvm/Support/CommandLine.h>
50#include <llvm/ADT/Twine.h>
51#include <iostream>
52
53static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
54static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
55
56static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
57static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
58static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
59
60extern "C" {
61  void wrapped_print_register(char * regName, BitBlock bit_block) {
62      print_register<BitBlock>(regName, bit_block);
63  }
64}
65
66namespace pablo {
67
68PabloCompiler::PabloCompiler()
69: mMod(nullptr)
70, mBuilder(nullptr)
71, mCarryManager(nullptr)
72, mBitBlockType(VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE / 64))
73, iBuilder(mBitBlockType)
74, mInputType(nullptr)
75, mCarryDataPtr(nullptr)
76, mWhileDepth(0)
77, mIfDepth(0)
78, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
79, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
80, mFunction(nullptr)
81, mInputAddressPtr(nullptr)
82, mOutputAddressPtr(nullptr)
83, mMaxWhileDepth(0)
84, mPrintRegisterFunction(nullptr) {
85
86}
87
88PabloCompiler::~PabloCompiler() {
89}
90   
91void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr, const size_t carrySize) {
92    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
93}
94
95void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
96    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
97    GlobalVariable *regStrVar = new GlobalVariable(*mMod,
98                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
99                                                   /*isConstant=*/ true,
100                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
101                                                   /*Initializer=*/ regNameData);
102    Value * regStrPtr = mBuilder->CreateGEP(regStrVar, {mBuilder->getInt64(0), mBuilder->getInt32(0)});
103    mBuilder->CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
104}
105
106CompiledPabloFunction PabloCompiler::compile(PabloFunction & function) {
107
108    Examine(function);
109
110    InitializeNativeTarget();
111    InitializeNativeTargetAsmPrinter();
112    InitializeNativeTargetAsmParser();
113
114    Module * module = new Module("", getGlobalContext());
115
116    mMod = module;
117
118    std::string errMessage;
119    #ifdef USE_LLVM_3_5
120    EngineBuilder builder(mMod);
121    #else
122    EngineBuilder builder(std::move(std::unique_ptr<Module>(mMod)));
123    #endif
124    builder.setErrorStr(&errMessage);
125    builder.setMCPU(sys::getHostCPUName());
126    #ifdef USE_LLVM_3_5
127    builder.setUseMCJIT(true);
128    #endif
129    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
130    ExecutionEngine * engine = builder.create();
131    if (engine == nullptr) {
132        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
133    }
134    DeclareFunctions(engine);
135    DeclareCallFunctions(engine);
136
137    auto func = compile(function, mMod);
138
139    //Display the IR that has been generated by this module.
140    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
141        module->dump();
142    }
143    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
144    verifyModule(*module, &dbgs());
145
146    engine->finalizeObject();
147
148    return CompiledPabloFunction(func.second, func.first, engine);
149}
150
151std::pair<llvm::Function *, size_t> PabloCompiler::compile(PabloFunction & function, Module * module) {
152
153    Examine(function);
154
155    mMod = module;
156
157    mBuilder = new IRBuilder<>(mMod->getContext());
158
159    iBuilder.initialize(mMod, mBuilder);
160
161    mCarryManager = new CarryManager(mBuilder, mBitBlockType, mZeroInitializer, mOneInitializer, &iBuilder);
162
163    GenerateFunction(function);
164
165    mBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction,0));
166
167    //The basis bits structure
168    for (unsigned i = 0; i != function.getNumOfParameters(); ++i) {
169        Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(i)};
170        Value * gep = mBuilder->CreateGEP(mInputAddressPtr, indices);
171        LoadInst * basisBit = mBuilder->CreateAlignedLoad(gep, BLOCK_SIZE/8, false, function.getParameter(i)->getName()->to_string());
172        mMarkerMap.insert(std::make_pair(function.getParameter(i), basisBit));
173        if (DumpTrace) {
174            genPrintRegister(function.getParameter(i)->getName()->to_string(), basisBit);
175        }
176    }
177       
178    unsigned totalCarryDataSize = mCarryManager->initialize(&(function.getEntryBlock()), mCarryDataPtr);
179   
180    //Generate the IR instructions for the function.
181    compileBlock(function.getEntryBlock());
182   
183    mCarryManager->generateBlockNoIncrement();
184
185    if (DumpTrace || TraceNext) {
186        genPrintRegister("mBlockNo", mBuilder->CreateAlignedLoad(mBuilder->CreateBitCast(mCarryManager->getBlockNoPtr(), PointerType::get(mBitBlockType, 0)), BLOCK_SIZE/8, false));
187    }
188   
189    // Write the output values out
190    for (unsigned i = 0; i != function.getNumOfResults(); ++i) {
191        assert (function.getResult(i));
192        SetOutputValue(mMarkerMap[function.getResult(i)], i);
193    }
194
195    //Terminate the block
196    ReturnInst::Create(mMod->getContext(), mBuilder->GetInsertBlock());
197
198    // Clean up
199    delete mCarryManager; mCarryManager = nullptr;
200    delete mBuilder; mBuilder = nullptr;
201    mMod = nullptr; // don't delete this. It's either owned by the ExecutionEngine or the calling function.
202
203    //Return the required size of the carry data area to the process_block function.
204    return std::make_pair(mFunction, totalCarryDataSize * sizeof(BitBlock));
205}
206
207inline void PabloCompiler::GenerateFunction(PabloFunction & function) {
208    mInputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>(function.getNumOfParameters(), mBitBlockType)), 0);
209    Type * carryType = PointerType::get(mBitBlockType, 0);
210    Type * outputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>(function.getNumOfResults(), mBitBlockType)), 0);
211    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), {{mInputType, carryType, outputType}}, false);
212
213#ifdef USE_UADD_OVERFLOW
214#ifdef USE_TWO_UADD_OVERFLOW
215    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
216    std::vector<Type*>StructTy_0_fields;
217    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
218    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
219    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
220
221    std::vector<Type*>FuncTy_1_args;
222    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
223    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
224    FunctionType* FuncTy_1 = FunctionType::get(
225                                              /*Result=*/StructTy_0,
226                                              /*Params=*/FuncTy_1_args,
227                                              /*isVarArg=*/false);
228
229    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
230                                              std::to_string(BLOCK_SIZE));
231    if (!mFunctionUaddOverflow) {
232        mFunctionUaddOverflow= Function::Create(
233          /*Type=*/ FuncTy_1,
234          /*Linkage=*/ GlobalValue::ExternalLinkage,
235          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
236        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
237    }
238    AttributeSet mFunctionUaddOverflowPAL;
239    {
240        SmallVector<AttributeSet, 4> Attrs;
241        AttributeSet PAS;
242        {
243          AttrBuilder B;
244          B.addAttribute(Attribute::NoUnwind);
245          B.addAttribute(Attribute::ReadNone);
246          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
247        }
248
249        Attrs.push_back(PAS);
250        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
251    }
252    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
253#else
254    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
255    std::vector<Type*>StructTy_0_fields;
256    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
257    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
258    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
259
260    std::vector<Type*>FuncTy_1_args;
261    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
262    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
263    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
264    FunctionType* FuncTy_1 = FunctionType::get(
265                                              /*Result=*/StructTy_0,
266                                              /*Params=*/FuncTy_1_args,
267                                              /*isVarArg=*/false);
268
269    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
270                                              std::to_string(BLOCK_SIZE));
271    if (!mFunctionUaddOverflowCarryin) {
272        mFunctionUaddOverflowCarryin = Function::Create(
273          /*Type=*/ FuncTy_1,
274          /*Linkage=*/ GlobalValue::ExternalLinkage,
275          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
276        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
277    }
278    AttributeSet mFunctionUaddOverflowCarryinPAL;
279    {
280        SmallVector<AttributeSet, 4> Attrs;
281        AttributeSet PAS;
282        {
283          AttrBuilder B;
284          B.addAttribute(Attribute::NoUnwind);
285          B.addAttribute(Attribute::ReadNone);
286          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
287        }
288
289        Attrs.push_back(PAS);
290        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
291    }
292    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
293#endif
294#endif
295
296    //Starts on process_block
297    SmallVector<AttributeSet, 4> Attrs;
298    Attrs.push_back(AttributeSet::get(mMod->getContext(), ~0U, { Attribute::NoUnwind, Attribute::UWTable }));
299    Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, { Attribute::ReadOnly, Attribute::NoCapture }));
300    Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, { Attribute::NoCapture }));
301    Attrs.push_back(AttributeSet::get(mMod->getContext(), 3U, { Attribute::ReadNone, Attribute::NoCapture }));
302    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
303
304    // Create the function that will be generated.
305    mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, function.getName()->value(), mMod);
306    mFunction->setCallingConv(CallingConv::C);
307    mFunction->setAttributes(AttrSet);
308
309    Function::arg_iterator args = mFunction->arg_begin();
310    mInputAddressPtr = args++;
311    mInputAddressPtr->setName("input");
312    mCarryDataPtr = args++;
313    mCarryDataPtr->setName("carry");
314    mOutputAddressPtr = args++;
315    mOutputAddressPtr->setName("output");
316}
317
318inline void PabloCompiler::Examine(PabloFunction & function) {
319    if (mMod == nullptr) {
320
321        mWhileDepth = 0;
322        mIfDepth = 0;
323        mMaxWhileDepth = 0;
324
325        Examine(function.getEntryBlock());
326
327        if (LLVM_UNLIKELY(mWhileDepth != 0 || mIfDepth != 0)) {
328            throw std::runtime_error("Malformed Pablo AST: Unbalanced If or While nesting depth!");
329        }
330    }
331}
332
333
334void PabloCompiler::Examine(PabloBlock & block) {
335    for (Statement * stmt : block) {
336        if (Call * call = dyn_cast<Call>(stmt)) {
337            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
338        }
339        else if (If * ifStatement = dyn_cast<If>(stmt)) {
340            Examine(ifStatement->getBody());
341        }
342        else if (While * whileStatement = dyn_cast<While>(stmt)) {
343            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
344            Examine(whileStatement->getBody());
345            --mWhileDepth;
346        }
347    }
348}
349
350inline void PabloCompiler::DeclareFunctions(ExecutionEngine * const engine) {
351    if (DumpTrace || TraceNext) {
352        //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
353        mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(mMod->getContext()), Type::getInt8PtrTy(mMod->getContext()), mBitBlockType, NULL);
354        if (engine) engine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
355    }
356}
357   
358void PabloCompiler::DeclareCallFunctions(ExecutionEngine * const engine) {
359    for (auto mapping : mCalleeMap) {
360        const String * callee = mapping.first;
361        auto ei = mExternalMap.find(callee->value());
362        if (ei != mExternalMap.end()) {
363
364            Type * inputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>{8, mBitBlockType}), 0);
365            Type * carryType = PointerType::get(mBitBlockType, 0);
366            Type * outputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>{1, mBitBlockType}), 0);
367            FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), std::vector<Type *>{inputType, carryType, outputType}, false);
368
369            //Starts on process_block
370            SmallVector<AttributeSet, 3> Attrs;
371            Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, { Attribute::ReadOnly, Attribute::NoCapture }));
372            Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, { Attribute::NoCapture }));
373            Attrs.push_back(AttributeSet::get(mMod->getContext(), 3U, { Attribute::ReadNone, Attribute::NoCapture }));
374            AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
375
376            Function * externalFunction = cast<Function>(mMod->getOrInsertFunction(callee->value(), functionType, AttrSet));
377            if (LLVM_UNLIKELY(externalFunction == nullptr)) {
378                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
379            }
380            externalFunction->setCallingConv(llvm::CallingConv::C);
381
382            if (engine) engine->addGlobalMapping(externalFunction, ei->second);
383            mCalleeMap[callee] = externalFunction;
384        }
385        else {
386            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
387        }
388    }
389}
390
391void PabloCompiler::compileBlock(PabloBlock & block) {
392    mPabloBlock = & block;
393    mCarryManager->ensureCarriesLoadedLocal();
394    for (const Statement * statement : block) {
395        compileStatement(statement);
396    }
397    mCarryManager->ensureCarriesStoredLocal();
398    mPabloBlock = block.getParent();
399}
400
401Value * PabloCompiler::genBitTest2(Value * e1, Value * e2) {
402    Type * t1 = e1->getType();
403    Type * t2 = e2->getType();
404    if (t1 == mBitBlockType) {
405        if (t2 == mBitBlockType) {
406            return iBuilder.bitblock_any(mBuilder->CreateOr(e1, e2));
407        }
408        else {
409            Value * m1 = mBuilder->CreateZExt(iBuilder.hsimd_signmask(16, e1), t2);
410            return mBuilder->CreateICmpNE(mBuilder->CreateOr(m1, e2), ConstantInt::get(t2, 0));
411        }
412    }
413    else if (t2 == mBitBlockType) {
414        Value * m2 = mBuilder->CreateZExt(iBuilder.hsimd_signmask(16, e2), t1);
415        return mBuilder->CreateICmpNE(mBuilder->CreateOr(e1, m2), ConstantInt::get(t1, 0));
416    }
417    else {
418        return mBuilder->CreateICmpNE(mBuilder->CreateOr(e1, e2), ConstantInt::get(t1, 0));
419    }
420}
421
422void PabloCompiler::compileIf(const If * ifStatement) {       
423    //
424    //  The If-ElseZero stmt:
425    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
426    //  If the value of the predicate is nonzero, then determine the values of variables
427    //  <var>* by executing the given statements.  Otherwise, the value of the
428    //  variables are all zero.  Requirements: (a) no variable that is defined within
429    //  the body of the if may be accessed outside unless it is explicitly
430    //  listed in the variable list, (b) every variable in the defined list receives
431    //  a value within the body, and (c) the logical consequence of executing
432    //  the statements in the event that the predicate is zero is that the
433    //  values of all defined variables indeed work out to be 0.
434    //
435    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
436    //  is inserted for each variable in the defined variable list.  It receives
437    //  a zero value from the ifentry block and the defined value from the if
438    //  body.
439    //
440
441    BasicBlock * ifEntryBlock = mBuilder->GetInsertBlock();
442    BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
443    BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
444   
445    PabloBlock & ifBody = ifStatement -> getBody();
446   
447    Value * if_test_value = compileExpression(ifStatement->getCondition());
448   
449    mCarryManager->enterScope(&ifBody);
450    if (mCarryManager->blockHasCarries()) {
451        // load the summary variable
452        Value* last_if_pending_data = mCarryManager->getCarrySummaryExpr();
453        mBuilder->CreateCondBr(genBitTest2(if_test_value, last_if_pending_data), ifBodyBlock, ifEndBlock);
454
455    }
456    else {
457        mBuilder->CreateCondBr(iBuilder.bitblock_any(if_test_value), ifBodyBlock, ifEndBlock);
458    }
459    // Entry processing is complete, now handle the body of the if.
460    mBuilder->SetInsertPoint(ifBodyBlock);
461   
462   
463    ++mIfDepth;
464    compileBlock(ifBody);
465    --mIfDepth;
466    if (mCarryManager->blockHasCarries()) {
467        mCarryManager->generateCarryOutSummaryCodeIfNeeded();
468    }
469    BasicBlock * ifBodyFinalBlock = mBuilder->GetInsertBlock();
470    mBuilder->CreateBr(ifEndBlock);
471    //End Block
472    mBuilder->SetInsertPoint(ifEndBlock);
473    for (const PabloAST * node : ifStatement->getDefined()) {
474        const Assign * assign = cast<Assign>(node);
475        PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, assign->getName()->value());
476        auto f = mMarkerMap.find(assign);
477        assert (f != mMarkerMap.end());
478        phi->addIncoming(mZeroInitializer, ifEntryBlock);
479        phi->addIncoming(f->second, ifBodyFinalBlock);
480        mMarkerMap[assign] = phi;
481    }
482    // Create the phi Node for the summary variable, if needed.
483    mCarryManager->addSummaryPhiIfNeeded(ifEntryBlock, ifBodyFinalBlock);
484    mCarryManager->leaveScope();
485}
486
487void PabloCompiler::compileWhile(const While * whileStatement) {
488
489    PabloBlock & whileBody = whileStatement -> getBody();
490   
491    BasicBlock * whileEntryBlock = mBuilder->GetInsertBlock();
492    BasicBlock * whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
493    BasicBlock * whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
494
495    mCarryManager->enterScope(&whileBody);
496    mCarryManager->ensureCarriesLoadedRecursive();
497
498    const auto & nextNodes = whileStatement->getVariants();
499    std::vector<PHINode *> nextPhis;
500    nextPhis.reserve(nextNodes.size());
501
502    // On entry to the while structure, proceed to execute the first iteration
503    // of the loop body unconditionally.   The while condition is tested at the end of
504    // the loop.
505
506    mBuilder->CreateBr(whileBodyBlock);
507    mBuilder->SetInsertPoint(whileBodyBlock);
508
509    //
510    // There are 3 sets of Phi nodes for the while loop.
511    // (1) Carry-ins: (a) incoming carry data first iterations, (b) zero thereafter
512    // (2) Carry-out accumulators: (a) zero first iteration, (b) |= carry-out of each iteration
513    // (3) Next nodes: (a) values set up before loop, (b) modified values calculated in loop.
514
515    mCarryManager->initializeCarryDataPhisAtWhileEntry(whileEntryBlock);
516
517    // for any Next nodes in the loop body, initialize to (a) pre-loop value.
518    for (const Next * n : nextNodes) {
519        PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, n->getName()->value());
520        auto f = mMarkerMap.find(n->getInitial());
521        assert (f != mMarkerMap.end());
522        phi->addIncoming(f->second, whileEntryBlock);
523        mMarkerMap[n->getInitial()] = phi;
524        nextPhis.push_back(phi);
525    }
526
527    //
528    // Now compile the loop body proper.  Carry-out accumulated values
529    // and iterated values of Next nodes will be computed.
530    ++mWhileDepth;
531    compileBlock(whileBody);
532
533    BasicBlock * whileBodyFinalBlock = mBuilder->GetInsertBlock();
534
535    mCarryManager->extendCarryDataPhisAtWhileBodyFinalBlock(whileBodyFinalBlock);
536
537    // Terminate the while loop body with a conditional branch back.
538    mBuilder->CreateCondBr(iBuilder.bitblock_any(compileExpression(whileStatement->getCondition())), whileBodyBlock, whileEndBlock);
539
540    // and for any Next nodes in the loop body
541    for (unsigned i = 0; i < nextNodes.size(); i++) {
542        const Next * n = nextNodes[i];
543        auto f = mMarkerMap.find(n->getExpr());
544        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
545            throw std::runtime_error("Next node expression was not compiled!");
546        }
547        nextPhis[i]->addIncoming(f->second, whileBodyFinalBlock);
548    }
549
550    mBuilder->SetInsertPoint(whileEndBlock);
551    if (mCarryManager->blockHasCarries()) {
552        mCarryManager->generateCarryOutSummaryCodeIfNeeded();
553    }
554    --mWhileDepth;
555
556    mCarryManager->ensureCarriesStoredRecursive();
557    mCarryManager->leaveScope();
558}
559
560
561void PabloCompiler::compileStatement(const Statement * stmt) {
562    Value * expr = nullptr;
563    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
564        expr = compileExpression(assign->getExpression());
565    }
566    else if (const Next * next = dyn_cast<const Next>(stmt)) {
567        expr = compileExpression(next->getExpr());
568        if (TraceNext) {
569            genPrintRegister(next->getName()->to_string(), expr);
570        }
571    }
572    else if (const If * ifStatement = dyn_cast<const If>(stmt)) {
573        compileIf(ifStatement);
574        return;
575    }
576    else if (const While * whileStatement = dyn_cast<const While>(stmt)) {
577        compileWhile(whileStatement);
578        return;
579    }
580    else if (const Call* call = dyn_cast<Call>(stmt)) {
581        //Call the callee once and store the result in the marker map.
582        if (mMarkerMap.count(call) != 0) {
583            return;
584        }
585        auto ci = mCalleeMap.find(call->getCallee());
586        if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
587            throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
588        }       
589
590        Function * function = ci->second;
591        auto arg = function->getArgumentList().begin();
592        Value * carryFramePtr = ConstantPointerNull::get(cast<PointerType>((++arg)->getType()));
593        AllocaInst * outputStruct = mBuilder->CreateAlloca(cast<PointerType>((++arg)->getType())->getElementType());
594        mBuilder->CreateCall3(function, mInputAddressPtr, carryFramePtr, outputStruct);
595        Value * outputPtr = mBuilder->CreateGEP(outputStruct, { mBuilder->getInt32(0), mBuilder->getInt32(0) });
596        expr = mBuilder->CreateAlignedLoad(outputPtr, BLOCK_SIZE / 8, false);
597    }
598    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
599        expr = mBuilder->CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
600    }
601    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
602        expr = mBuilder->CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
603    }
604    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
605        expr = mBuilder->CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
606    }
607    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
608        Value* ifMask = compileExpression(sel->getCondition());
609        Value* ifTrue = mBuilder->CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
610        Value* ifFalse = mBuilder->CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
611        expr = mBuilder->CreateOr(ifTrue, ifFalse);
612    }
613    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
614        expr = genNot(compileExpression(pablo_not->getExpr()));
615    }
616    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
617        Value* strm_value = compileExpression(adv->getExpr());
618        int shift = adv->getAdvanceAmount();
619        unsigned advance_index = adv->getLocalAdvanceIndex();
620        expr = mCarryManager->advanceCarryInCarryOut(advance_index, shift, strm_value);
621    }
622    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt)) {
623        Value * marker = compileExpression(mstar->getMarker());
624        Value * cc = compileExpression(mstar->getCharClass());
625        Value * marker_and_cc = mBuilder->CreateAnd(marker, cc);
626        unsigned carry_index = mstar->getLocalCarryIndex();
627        expr = mBuilder->CreateOr(mBuilder->CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index), cc), marker, "matchstar");
628    }
629    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt)) {
630        Value * marker_expr = compileExpression(sthru->getScanFrom());
631        Value * cc_expr = compileExpression(sthru->getScanThru());
632        unsigned carry_index = sthru->getLocalCarryIndex();
633        expr = mBuilder->CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index), genNot(cc_expr), "scanthru");
634    }
635    else {
636        llvm::raw_os_ostream cerr(std::cerr);
637        PabloPrinter::print(stmt, cerr);
638        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
639    }
640    mMarkerMap[stmt] = expr;
641    if (DumpTrace) {
642        genPrintRegister(stmt->getName()->to_string(), expr);
643    }
644   
645}
646
647Value * PabloCompiler::compileExpression(const PabloAST * expr) {
648    if (isa<Ones>(expr)) {
649        return mOneInitializer;
650    }
651    else if (isa<Zeroes>(expr)) {
652        return mZeroInitializer;
653    }
654    auto f = mMarkerMap.find(expr);
655    if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
656        std::string o;
657        llvm::raw_string_ostream str(o);
658        str << "\"";
659        PabloPrinter::print(expr, str);
660        str << "\" was used before definition!";
661        throw std::runtime_error(str.str());
662    }
663    return f->second;
664}
665
666
667#ifdef USE_UADD_OVERFLOW
668#ifdef USE_TWO_UADD_OVERFLOW
669PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
670    std::vector<Value*> struct_res_params;
671    struct_res_params.push_back(int128_e1);
672    struct_res_params.push_back(int128_e2);
673    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
674    struct_res->setCallingConv(CallingConv::C);
675    struct_res->setTailCall(false);
676    AttributeSet struct_res_PAL;
677    struct_res->setAttributes(struct_res_PAL);
678
679    SumWithOverflowPack ret;
680
681    std::vector<unsigned> int128_sum_indices;
682    int128_sum_indices.push_back(0);
683    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
684
685    std::vector<unsigned> int1_obit_indices;
686    int1_obit_indices.push_back(1);
687    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
688
689    return ret;
690}
691#else
692PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
693    std::vector<Value*> struct_res_params;
694    struct_res_params.push_back(int128_e1);
695    struct_res_params.push_back(int128_e2);
696    struct_res_params.push_back(int1_cin);
697    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
698    struct_res->setCallingConv(CallingConv::C);
699    struct_res->setTailCall(false);
700    AttributeSet struct_res_PAL;
701    struct_res->setAttributes(struct_res_PAL);
702
703    SumWithOverflowPack ret;
704
705    std::vector<unsigned> int128_sum_indices;
706    int128_sum_indices.push_back(0);
707    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
708
709    std::vector<unsigned> int1_obit_indices;
710    int1_obit_indices.push_back(1);
711    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
712
713    return ret;
714}
715#endif
716#endif
717
718
719Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex) {
720    Value * carryq_value = mCarryManager->getCarryOpCarryIn(localIndex);
721#ifdef USE_TWO_UADD_OVERFLOW
722    //This is the ideal implementation, which uses two uadd.with.overflow
723    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
724    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
725    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
726    CastInst* int128_carryq_value = new BitCastInst(carryq_value, mBuilder->getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
727
728    SumWithOverflowPack sumpack0, sumpack1;
729
730    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
731    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
732
733    Value* obit = mBuilder->CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
734    Value* sum = mBuilder->CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
735
736    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
737    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
738    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
739    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
740    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
741
742#elif defined USE_UADD_OVERFLOW
743    //use llvm.uadd.with.overflow.i128 or i256
744    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
745    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
746
747    //get i1 carryin from iBLOCK_SIZE
748    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
749    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
750    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
751
752    SumWithOverflowPack sumpack0;
753    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
754    Value* obit = sumpack0.obit;
755    Value* sum = mBuilder->CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
756
757    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
758    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
759    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
760    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
761#elif (BLOCK_SIZE == 128)
762    //calculate carry through logical ops
763    Value* carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
764    Value* carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
765    Value* digitsum = mBuilder->CreateAdd(e1, e2, "digitsum");
766    Value* partial = mBuilder->CreateAdd(digitsum, carryq_value, "partial");
767    Value* digitcarry = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(partial)));
768    Value* mid_carry_in = genShiftLeft64(mBuilder->CreateLShr(digitcarry, 63), "mid_carry_in");
769
770    Value* sum = mBuilder->CreateAdd(partial, mid_carry_in, "sum");
771    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(sum))));
772#else
773    //BLOCK_SIZE == 256, there is no other implementation
774    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
775#endif //USE_TWO_UADD_OVERFLOW
776
777    mCarryManager->setCarryOpCarryOut(localIndex, carry_out);
778    return sum;
779}
780
781Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
782    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
783    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
784    Value * v = mBuilder->CreateBitCast(op, vType);
785    return mBuilder->CreateBitCast(mBuilder->CreateLShr(v, FieldWidth - 1), mBitBlockType);
786}
787
788Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
789    Value* i128_val = mBuilder->CreateBitCast(e, mBuilder->getIntNTy(BLOCK_SIZE));
790    return mBuilder->CreateBitCast(mBuilder->CreateShl(i128_val, 64, namehint), mBitBlockType);
791}
792
793inline Value* PabloCompiler::genNot(Value* expr) {
794    return mBuilder->CreateXor(expr, mOneInitializer, "not");
795}
796   
797void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
798    assert (marker);
799    if (marker->getType()->isPointerTy()) {
800        marker = mBuilder->CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
801    }
802    Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(index)};
803    Value* gep = mBuilder->CreateGEP(mOutputAddressPtr, indices);
804    mBuilder->CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
805}
806
807CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
808: CarryDataSize(carryDataSize)
809, FunctionPointer(executionEngine->getPointerToFunction(function))
810, mFunction(function)
811, mExecutionEngine(executionEngine)
812{
813
814}
815
816// Clean up the memory for the compiled function once we're finished using it.
817CompiledPabloFunction::~CompiledPabloFunction() {
818    if (mExecutionEngine) {
819        assert (mFunction);
820        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
821        delete mExecutionEngine;
822    }
823}
824
825}
Note: See TracBrowser for help on using the repository browser.