source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4647

Last change on this file since 4647 was 4647, checked in by cameron, 4 years ago

Carry Manager system integrated into Pablo compiler

File size: 36.7 KB
RevLine 
[3850]1/*
[4533]2 *  Copyright (c) 2014-15 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4237]7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
[4566]9#include <pablo/carry_data.h>
[4647]10#include <pablo/carry_manager.h>
[4237]11#include <pablo/printer_pablos.h>
[4249]12#include <cc/cc_namemap.hpp>
13#include <re/re_name.h>
[4237]14#include <stdexcept>
[4240]15#include <include/simd-lib/bitblock.hpp>
[4416]16#include <sstream>
[4274]17#include <llvm/IR/Verifier.h>
18#include <llvm/Pass.h>
19#include <llvm/PassManager.h>
20#include <llvm/ADT/SmallVector.h>
21#include <llvm/Analysis/Passes.h>
22#include <llvm/IR/BasicBlock.h>
23#include <llvm/IR/CallingConv.h>
24#include <llvm/IR/Constants.h>
25#include <llvm/IR/DataLayout.h>
26#include <llvm/IR/DerivedTypes.h>
27#include <llvm/IR/Function.h>
28#include <llvm/IR/GlobalVariable.h>
29#include <llvm/IR/InlineAsm.h>
30#include <llvm/IR/Instructions.h>
31#include <llvm/IR/LLVMContext.h>
32#include <llvm/IR/Module.h>
33#include <llvm/Support/FormattedStream.h>
34#include <llvm/Support/MathExtras.h>
35#include <llvm/Support/Casting.h>
[4280]36#include <llvm/Support/Compiler.h>
[4274]37#include <llvm/Support/Debug.h>
38#include <llvm/Support/TargetSelect.h>
39#include <llvm/Support/Host.h>
40#include <llvm/Transforms/Scalar.h>
41#include <llvm/ExecutionEngine/ExecutionEngine.h>
42#include <llvm/ExecutionEngine/MCJIT.h>
43#include <llvm/IRReader/IRReader.h>
44#include <llvm/Bitcode/ReaderWriter.h>
45#include <llvm/Support/MemoryBuffer.h>
46#include <llvm/IR/IRBuilder.h>
[4510]47#include <llvm/Support/CommandLine.h>
48#include <llvm/ADT/Twine.h>
[4438]49#include <iostream>
[4237]50
[4544]51static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
[4378]52static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
53
[4544]54static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
55static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
56static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
57
[4237]58extern "C" {
[4542]59  void wrapped_print_register(char * regName, BitBlock bit_block) {
60      print_register<BitBlock>(regName, bit_block);
[4237]61  }
62}
63
64namespace pablo {
65
[4270]66PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
67: mBasisBits(basisBits)
[4572]68#ifdef USE_LLVM_3_5
[4237]69, mMod(new Module("icgrep", getGlobalContext()))
[4572]70#else
71, mModOwner(make_unique<Module>("icgrep", getGlobalContext()))
72, mMod(mModOwner.get())
73#endif
[4628]74, mBuilder(&LLVM_Builder)
[4647]75, mCarryManager(nullptr)
[4237]76, mExecutionEngine(nullptr)
[4270]77, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
[4237]78, mBasisBitsInputPtr(nullptr)
[4538]79, mCarryDataPtr(nullptr)
[4545]80, mWhileDepth(0)
81, mIfDepth(0)
[4270]82, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
83, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
[4253]84, mFunctionType(nullptr)
[4274]85, mFunction(nullptr)
[4237]86, mBasisBitsAddr(nullptr)
[4253]87, mOutputAddrPtr(nullptr)
[4545]88, mMaxWhileDepth(0)
[4510]89, mPrintRegisterFunction(nullptr)
[4237]90{
91    //Create the jit execution engine.up
92    InitializeNativeTarget();
93    InitializeNativeTargetAsmPrinter();
94    InitializeNativeTargetAsmParser();
95    DefineTypes();
96}
97
98PabloCompiler::~PabloCompiler()
99{
[4516]100
[4237]101}
[4382]102   
103void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
104    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
105}
[4237]106
[4542]107void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
108    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
109    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
110                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
111                                                   /*isConstant=*/ true,
112                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
113                                                   /*Initializer=*/ regNameData);
[4628]114    Value * regStrPtr = mBuilder->CreateGEP(regStrVar, {mBuilder->getInt64(0), mBuilder->getInt32(0)});
115    mBuilder->CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
[4542]116}
[4382]117
[4516]118CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
[4237]119{
[4545]120    mWhileDepth = 0;
121    mIfDepth = 0;
122    mMaxWhileDepth = 0;
[4647]123    mCarryManager = new CarryManager(mBuilder, mBitBlockType, mZeroInitializer, mOneInitializer);
124   
[4276]125    std::string errMessage;
[4572]126#ifdef USE_LLVM_3_5
[4276]127    EngineBuilder builder(mMod);
[4572]128#else
129    EngineBuilder builder(std::move(mModOwner));
130#endif
[4276]131    builder.setErrorStr(&errMessage);
132    builder.setMCPU(sys::getHostCPUName());
[4572]133#ifdef USE_LLVM_3_5
[4276]134    builder.setUseMCJIT(true);
[4572]135#endif
[4545]136    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
[4276]137    mExecutionEngine = builder.create();
138    if (mExecutionEngine == nullptr) {
139        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
140    }
[4421]141    DeclareFunctions();
[4276]142
[4647]143    Examine(pb);
[4417]144    DeclareCallFunctions();
[4276]145
[4274]146    Function::arg_iterator args = mFunction->arg_begin();
[4250]147    mBasisBitsAddr = args++;
148    mBasisBitsAddr->setName("basis_bits");
[4538]149    mCarryDataPtr = args++;
150    mCarryDataPtr->setName("carry_data");
[4253]151    mOutputAddrPtr = args++;
152    mOutputAddrPtr->setName("output");
[4237]153
[4545]154    mWhileDepth = 0;
155    mIfDepth = 0;
156    mMaxWhileDepth = 0;
[4628]157    BasicBlock * b = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
158    mBuilder->SetInsertPoint(b);
[4237]159
160    //The basis bits structure
[4270]161    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
[4628]162        Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(i)};
163        Value * gep = mBuilder->CreateGEP(mBasisBitsAddr, indices);
164        LoadInst * basisBit = mBuilder->CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
[4410]165        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
[4237]166    }
[4647]167       
168    unsigned totalCarryDataSize = mCarryManager->initialize(&pb, mCarryDataPtr);
[4548]169   
[4237]170    //Generate the IR instructions for the function.
[4539]171    compileBlock(pb);
[4628]172   
[4647]173    mCarryManager->generateBlockNoIncrement();
[4257]174
[4548]175    if (DumpTrace || TraceNext) {
[4647]176        genPrintRegister("mBlockNo", mBuilder->CreateAlignedLoad(mBuilder->CreateBitCast(mCarryManager->getBlockNoPtr(), PointerType::get(mBitBlockType, 0)), BLOCK_SIZE/8, false));
[4548]177    }
[4647]178   
[4545]179    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
180        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
[4433]181    }
182
[4237]183    //Terminate the block
[4628]184    ReturnInst::Create(mMod->getContext(), mBuilder->GetInsertBlock());
[4237]185
[4348]186    //Display the IR that has been generated by this module.
[4433]187    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
188        mMod->dump();
[4348]189    }
[4237]190    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
191    verifyModule(*mMod, &dbgs());
192
193    mExecutionEngine->finalizeObject();
194
[4538]195    //Return the required size of the carry data area to the process_block function.
[4566]196    return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, mExecutionEngine);
[4237]197}
198
199void PabloCompiler::DefineTypes()
200{
[4254]201    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
202    if (structBasisBits == nullptr) {
203        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
[4237]204    }
205    std::vector<Type*>StructTy_struct_Basis_bits_fields;
[4270]206    for (int i = 0; i != mBasisBits.size(); i++)
[4237]207    {
[4270]208        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
[4237]209    }
[4254]210    if (structBasisBits->isOpaque()) {
211        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
[4237]212    }
[4254]213    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
[4237]214
[4254]215    std::vector<Type*>functionTypeArgs;
216    functionTypeArgs.push_back(mBasisBitsInputPtr);
[4237]217
[4538]218    //The carry data array.
[4237]219    //A pointer to the BitBlock vector.
[4270]220    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
[4237]221
222    //The output structure.
[4253]223    StructType * outputStruct = mMod->getTypeByName("struct.Output");
224    if (!outputStruct) {
225        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
[4237]226    }
[4253]227    if (outputStruct->isOpaque()) {
[4254]228        std::vector<Type*>fields;
[4270]229        fields.push_back(mBitBlockType);
230        fields.push_back(mBitBlockType);
[4254]231        outputStruct->setBody(fields, /*isPacked=*/false);
[4237]232    }
[4253]233    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
[4237]234
235    //The &output parameter.
[4254]236    functionTypeArgs.push_back(outputStructPtr);
[4237]237
[4253]238    mFunctionType = FunctionType::get(
[4237]239     /*Result=*/Type::getVoidTy(mMod->getContext()),
[4254]240     /*Params=*/functionTypeArgs,
[4237]241     /*isVarArg=*/false);
242}
243
244void PabloCompiler::DeclareFunctions()
245{
246    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
[4542]247    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
[4510]248    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
[4628]249    // to call->  mBuilder->CreateCall(mFunc_print_register, unicode_category);
[4237]250
251#ifdef USE_UADD_OVERFLOW
[4301]252#ifdef USE_TWO_UADD_OVERFLOW
[4240]253    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
[4237]254    std::vector<Type*>StructTy_0_fields;
255    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
256    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
257    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
258
259    std::vector<Type*>FuncTy_1_args;
260    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
261    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
262    FunctionType* FuncTy_1 = FunctionType::get(
263                                              /*Result=*/StructTy_0,
264                                              /*Params=*/FuncTy_1_args,
265                                              /*isVarArg=*/false);
266
[4301]267    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
[4275]268                                              std::to_string(BLOCK_SIZE));
269    if (!mFunctionUaddOverflow) {
[4301]270        mFunctionUaddOverflow= Function::Create(
[4240]271          /*Type=*/ FuncTy_1,
272          /*Linkage=*/ GlobalValue::ExternalLinkage,
[4301]273          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
[4275]274        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
[4237]275    }
[4275]276    AttributeSet mFunctionUaddOverflowPAL;
[4237]277    {
278        SmallVector<AttributeSet, 4> Attrs;
279        AttributeSet PAS;
280        {
281          AttrBuilder B;
282          B.addAttribute(Attribute::NoUnwind);
283          B.addAttribute(Attribute::ReadNone);
284          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
285        }
286
287        Attrs.push_back(PAS);
[4275]288        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
[4237]289    }
[4275]290    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
[4301]291#else
292    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
293    std::vector<Type*>StructTy_0_fields;
294    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
295    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
296    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
297
298    std::vector<Type*>FuncTy_1_args;
299    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
300    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
301    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
302    FunctionType* FuncTy_1 = FunctionType::get(
303                                              /*Result=*/StructTy_0,
304                                              /*Params=*/FuncTy_1_args,
305                                              /*isVarArg=*/false);
306
307    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
308                                              std::to_string(BLOCK_SIZE));
309    if (!mFunctionUaddOverflowCarryin) {
310        mFunctionUaddOverflowCarryin = Function::Create(
311          /*Type=*/ FuncTy_1,
312          /*Linkage=*/ GlobalValue::ExternalLinkage,
313          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
314        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
315    }
316    AttributeSet mFunctionUaddOverflowCarryinPAL;
317    {
318        SmallVector<AttributeSet, 4> Attrs;
319        AttributeSet PAS;
320        {
321          AttrBuilder B;
322          B.addAttribute(Attribute::NoUnwind);
323          B.addAttribute(Attribute::ReadNone);
324          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
325        }
326
327        Attrs.push_back(PAS);
328        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
329    }
330    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
[4237]331#endif
[4301]332#endif
[4237]333
334    //Starts on process_block
[4538]335    SmallVector<AttributeSet, 4> Attrs;
[4237]336    AttributeSet PAS;
337    {
338        AttrBuilder B;
339        B.addAttribute(Attribute::ReadOnly);
340        B.addAttribute(Attribute::NoCapture);
341        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
342    }
343    Attrs.push_back(PAS);
344    {
345        AttrBuilder B;
346        B.addAttribute(Attribute::NoCapture);
347        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
348    }
349    Attrs.push_back(PAS);
350    {
351        AttrBuilder B;
352        B.addAttribute(Attribute::NoCapture);
353        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
354    }
355    Attrs.push_back(PAS);
356    {
357        AttrBuilder B;
358        B.addAttribute(Attribute::NoUnwind);
359        B.addAttribute(Attribute::UWTable);
360        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
361    }
362    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
363
364    //Create the function that will be generated.
[4274]365    mFunction = mMod->getFunction("process_block");
366    if (!mFunction) {
367        mFunction = Function::Create(
[4253]368            /*Type=*/mFunctionType,
[4237]369            /*Linkage=*/GlobalValue::ExternalLinkage,
370            /*Name=*/"process_block", mMod);
[4274]371        mFunction->setCallingConv(CallingConv::C);
[4237]372    }
[4274]373    mFunction->setAttributes(AttrSet);
[4237]374}
[4541]375   
[4566]376void PabloCompiler::Examine(PabloBlock & blk) {
[4539]377    for (Statement * stmt : blk) {
[4433]378        if (Call * call = dyn_cast<Call>(stmt)) {
379            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
380        }
[4257]381        else if (If * ifStatement = dyn_cast<If>(stmt)) {
[4545]382            ++mIfDepth;
[4566]383            Examine(ifStatement->getBody());
[4545]384            --mIfDepth;
[4237]385        }
[4257]386        else if (While * whileStatement = dyn_cast<While>(stmt)) {
[4545]387            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
[4566]388            Examine(whileStatement->getBody());
[4545]389            --mWhileDepth;
[4237]390        }
391    }
392}
393
[4276]394void PabloCompiler::DeclareCallFunctions() {
395    for (auto mapping : mCalleeMap) {
396        const String * callee = mapping.first;
[4382]397        //std::cerr << callee->str() << " to be declared\n";
[4510]398        auto ei = mExternalMap.find(callee->value());
[4382]399        if (ei != mExternalMap.end()) {
400            void * fn_ptr = ei->second;
401            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
[4510]402            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
[4382]403            if (LLVM_UNLIKELY(externalValue == nullptr)) {
[4510]404                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
[4382]405            }
406            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
407            mCalleeMap[callee] = externalValue;
[4276]408        }
[4382]409        else {
[4510]410            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
[4382]411        }
[4276]412    }
413}
414
[4643]415void PabloCompiler::compileBlock(PabloBlock & block) {
[4647]416    mCarryManager->ensureCarriesLoadedLocal(block);
417    mPabloBlock = & block;
[4643]418    for (const Statement * statement : block) {
[4343]419        compileStatement(statement);
[4237]420    }
[4643]421    mPabloBlock = block.getParent();
[4647]422    mCarryManager->ensureCarriesStoredLocal(block);
[4237]423}
424
[4541]425
426void PabloCompiler::compileIf(const If * ifStatement) {       
[4647]427    //
428    //  The If-ElseZero stmt:
429    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
430    //  If the value of the predicate is nonzero, then determine the values of variables
431    //  <var>* by executing the given statements.  Otherwise, the value of the
432    //  variables are all zero.  Requirements: (a) no variable that is defined within
433    //  the body of the if may be accessed outside unless it is explicitly
434    //  listed in the variable list, (b) every variable in the defined list receives
435    //  a value within the body, and (c) the logical consequence of executing
436    //  the statements in the event that the predicate is zero is that the
437    //  values of all defined variables indeed work out to be 0.
438    //
439    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
440    //  is inserted for each variable in the defined variable list.  It receives
441    //  a zero value from the ifentry block and the defined value from the if
442    //  body.
443    //
444    BasicBlock * ifEntryBlock = mBuilder->GetInsertBlock();
445    BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
446    BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
[4541]447   
[4647]448    PabloBlock & ifBody = ifStatement -> getBody();
[4628]449   
[4647]450    Value* if_test_value = compileExpression(ifStatement->getCondition());
451    if (mCarryManager->blockHasCarries(ifBody)) {
452        // load the summary variable
453        Value* last_if_pending_data = mCarryManager->getCarrySummaryExpr(ifBody);
454        if_test_value = mBuilder->CreateOr(if_test_value, last_if_pending_data);
455    }
456    mBuilder->CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
457    // Entry processing is complete, now handle the body of the if.
458    mBuilder->SetInsertPoint(ifBodyBlock);
459   
460    ++mIfDepth;
461    compileBlock(ifBody);
462    --mIfDepth;
463    if (mCarryManager->blockHasCarries(ifBody)) {
464        mCarryManager->generateCarryOutSummaryCode(ifBody);
465    }
466    BasicBlock * ifBodyFinalBlock = mBuilder->GetInsertBlock();
467    mBuilder->CreateBr(ifEndBlock);
468    //End Block
469    mBuilder->SetInsertPoint(ifEndBlock);
470    for (const PabloAST * node : ifStatement->getDefined()) {
471        const Assign * assign = cast<Assign>(node);
472        PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, assign->getName()->value());
473        auto f = mMarkerMap.find(assign);
474        assert (f != mMarkerMap.end());
475        phi->addIncoming(mZeroInitializer, ifEntryBlock);
476        phi->addIncoming(f->second, ifBodyFinalBlock);
477        mMarkerMap[assign] = phi;
478    }
479    // Create the phi Node for the summary variable, if needed.
480    if (mCarryManager->summaryNeededInParentBlock(ifBody)) {
481        mCarryManager->addSummaryPhi(ifBody, ifEntryBlock, ifBodyFinalBlock);
482    }
[4535]483}
484
[4647]485void PabloCompiler::compileWhile(const While * whileStatement) {
[4595]486
[4647]487    PabloBlock & whileBody = whileStatement -> getBody();
488   
489    BasicBlock * whileEntryBlock = mBuilder->GetInsertBlock();
490    BasicBlock * whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
491    BasicBlock * whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
[4640]492
[4647]493    mCarryManager->ensureCarriesLoadedRecursive(whileBody);
[4640]494
[4647]495    const auto & nextNodes = whileStatement->getVariants();
496    std::vector<PHINode *> nextPhis;
497    nextPhis.reserve(nextNodes.size());
[4640]498
[4647]499    // On entry to the while structure, proceed to execute the first iteration
500    // of the loop body unconditionally.   The while condition is tested at the end of
501    // the loop.
[4640]502
[4647]503    mBuilder->CreateBr(whileBodyBlock);
504    mBuilder->SetInsertPoint(whileBodyBlock);
[4640]505
[4647]506    //
507    // There are 3 sets of Phi nodes for the while loop.
508    // (1) Carry-ins: (a) incoming carry data first iterations, (b) zero thereafter
509    // (2) Carry-out accumulators: (a) zero first iteration, (b) |= carry-out of each iteration
510    // (3) Next nodes: (a) values set up before loop, (b) modified values calculated in loop.
[4640]511
[4647]512    mCarryManager->initializeCarryDataPhisAtWhileEntry(whileBody, whileEntryBlock);
[4640]513
[4647]514    // for any Next nodes in the loop body, initialize to (a) pre-loop value.
515    for (const Next * n : nextNodes) {
516        PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, n->getName()->value());
517        auto f = mMarkerMap.find(n->getInitial());
518        assert (f != mMarkerMap.end());
519        phi->addIncoming(f->second, whileEntryBlock);
520        mMarkerMap[n->getInitial()] = phi;
521        nextPhis.push_back(phi);
522    }
[4595]523
[4647]524    //
525    // Now compile the loop body proper.  Carry-out accumulated values
526    // and iterated values of Next nodes will be computed.
527    ++mWhileDepth;
528    compileBlock(whileBody);
[4640]529
[4647]530    BasicBlock * whileBodyFinalBlock = mBuilder->GetInsertBlock();
[4640]531
[4647]532    mCarryManager->extendCarryDataPhisAtWhileBodyFinalBlock(whileBody, whileBodyFinalBlock);
[4264]533
[4647]534    // Terminate the while loop body with a conditional branch back.
535    mBuilder->CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
[4576]536
[4647]537    // and for any Next nodes in the loop body
538    for (unsigned i = 0; i < nextNodes.size(); i++) {
539        const Next * n = nextNodes[i];
540        auto f = mMarkerMap.find(n->getExpr());
541        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
542            throw std::runtime_error("Next node expression was not compiled!");
[4264]543        }
[4647]544        nextPhis[i]->addIncoming(f->second, whileBodyFinalBlock);
545    }
[4237]546
[4647]547    mBuilder->SetInsertPoint(whileEndBlock);
548    --mWhileDepth;
[4640]549
[4647]550    mCarryManager->ensureCarriesStoredRecursive(whileBody);
[4535]551}
552
[4640]553
[4643]554void PabloCompiler::compileStatement(const Statement * stmt) {
555    Value * expr = nullptr;
[4535]556    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
[4643]557        expr = compileExpression(assign->getExpr());
[4535]558        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
559            SetOutputValue(expr, assign->getOutputIndex());
560        }
[4237]561    }
[4535]562    else if (const Next * next = dyn_cast<const Next>(stmt)) {
[4643]563        expr = compileExpression(next->getExpr());
[4544]564        if (TraceNext) {
[4643]565            genPrintRegister(next->getName()->to_string(), expr);
[4544]566        }
[4535]567    }
[4643]568    else if (const If * ifStatement = dyn_cast<const If>(stmt)) {
[4535]569        compileIf(ifStatement);
[4643]570        return;
[4535]571    }
[4643]572    else if (const While * whileStatement = dyn_cast<const While>(stmt)) {
[4535]573        compileWhile(whileStatement);
[4643]574        return;
[4535]575    }
[4410]576    else if (const Call* call = dyn_cast<Call>(stmt)) {
[4237]577        //Call the callee once and store the result in the marker map.
[4643]578        if (mMarkerMap.count(call) != 0) {
579            return;
[4237]580        }
[4643]581        auto ci = mCalleeMap.find(call->getCallee());
582        if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
583            throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
584        }
585        expr = mBuilder->CreateCall(ci->second, mBasisBitsAddr);
[4237]586    }
[4410]587    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
[4643]588        expr = mBuilder->CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
[4237]589    }
[4410]590    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
[4643]591        expr = mBuilder->CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
[4237]592    }
[4410]593    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
[4643]594        expr = mBuilder->CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
[4237]595    }
[4410]596    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
[4264]597        Value* ifMask = compileExpression(sel->getCondition());
[4628]598        Value* ifTrue = mBuilder->CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
599        Value* ifFalse = mBuilder->CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
[4643]600        expr = mBuilder->CreateOr(ifTrue, ifFalse);
[4237]601    }
[4410]602    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
[4643]603        expr = genNot(compileExpression(pablo_not->getExpr()));
[4237]604    }
[4410]605    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
[4647]606        Value* strm_value = compileExpression(adv->getExpr());
[4270]607        int shift = adv->getAdvanceAmount();
[4541]608        unsigned advance_index = adv->getLocalAdvanceIndex();
[4647]609        if (shift == 1) {
610            expr = genUnitAdvanceWithCarry(strm_value, advance_index);
[4546]611        }
[4647]612        else if (shift < LongAdvanceBase) {
613            expr = genShortAdvanceWithCarry(strm_value, advance_index, shift);
614        }
615        else {
616            expr = genLongAdvanceWithCarry(strm_value, advance_index, shift);
617        }
[4237]618    }
[4643]619    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt)) {
[4410]620        Value * marker = compileExpression(mstar->getMarker());
621        Value * cc = compileExpression(mstar->getCharClass());
[4628]622        Value * marker_and_cc = mBuilder->CreateAnd(marker, cc);
[4541]623        unsigned carry_index = mstar->getLocalCarryIndex();
[4643]624        expr = mBuilder->CreateOr(mBuilder->CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index), cc), marker, "matchstar");
[4237]625    }
[4643]626    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt)) {
[4410]627        Value * marker_expr = compileExpression(sthru->getScanFrom());
628        Value * cc_expr = compileExpression(sthru->getScanThru());
[4541]629        unsigned carry_index = sthru->getLocalCarryIndex();
[4643]630        expr = mBuilder->CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index), genNot(cc_expr), "scanthru");
[4237]631    }
[4359]632    else {
[4567]633        llvm::raw_os_ostream cerr(std::cerr);
634        PabloPrinter::print(stmt, cerr);
[4410]635        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
636    }
[4643]637    mMarkerMap[stmt] = expr;
[4647]638    if (DumpTrace) {
639        genPrintRegister(stmt->getName()->to_string(), expr);
640    }
641   
[4410]642}
643
644Value * PabloCompiler::compileExpression(const PabloAST * expr) {
645    if (isa<Ones>(expr)) {
646        return mOneInitializer;
647    }
648    else if (isa<Zeroes>(expr)) {
649        return mZeroInitializer;
650    }
651    auto f = mMarkerMap.find(expr);
[4643]652    if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
[4567]653        std::string o;
654        llvm::raw_string_ostream str(o);
[4416]655        str << "\"";
656        PabloPrinter::print(expr, str);
657        str << "\" was used before definition!";
658        throw std::runtime_error(str.str());
[4359]659    }
[4410]660    return f->second;
[4237]661}
662
[4537]663
[4237]664#ifdef USE_UADD_OVERFLOW
[4301]665#ifdef USE_TWO_UADD_OVERFLOW
666PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
667    std::vector<Value*> struct_res_params;
668    struct_res_params.push_back(int128_e1);
669    struct_res_params.push_back(int128_e2);
670    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
671    struct_res->setCallingConv(CallingConv::C);
672    struct_res->setTailCall(false);
673    AttributeSet struct_res_PAL;
674    struct_res->setAttributes(struct_res_PAL);
675
676    SumWithOverflowPack ret;
677
678    std::vector<unsigned> int128_sum_indices;
679    int128_sum_indices.push_back(0);
680    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
681
682    std::vector<unsigned> int1_obit_indices;
683    int1_obit_indices.push_back(1);
684    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
685
686    return ret;
687}
688#else
[4275]689PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
[4237]690    std::vector<Value*> struct_res_params;
691    struct_res_params.push_back(int128_e1);
692    struct_res_params.push_back(int128_e2);
[4240]693    struct_res_params.push_back(int1_cin);
[4301]694    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
[4237]695    struct_res->setCallingConv(CallingConv::C);
696    struct_res->setTailCall(false);
697    AttributeSet struct_res_PAL;
698    struct_res->setAttributes(struct_res_PAL);
699
700    SumWithOverflowPack ret;
701
702    std::vector<unsigned> int128_sum_indices;
703    int128_sum_indices.push_back(0);
704    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
705
706    std::vector<unsigned> int1_obit_indices;
707    int1_obit_indices.push_back(1);
708    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
709
710    return ret;
711}
712#endif
[4301]713#endif
[4237]714
[4537]715
[4640]716Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex) {
[4647]717    Value * carryq_value = mCarryManager->getCarryOpCarryIn(mPabloBlock, localIndex);
[4301]718#ifdef USE_TWO_UADD_OVERFLOW
719    //This is the ideal implementation, which uses two uadd.with.overflow
720    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
[4628]721    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
722    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
723    CastInst* int128_carryq_value = new BitCastInst(carryq_value, mBuilder->getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
[4237]724
[4301]725    SumWithOverflowPack sumpack0, sumpack1;
726
727    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
728    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
729
[4628]730    Value* obit = mBuilder->CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
731    Value* sum = mBuilder->CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
[4301]732
733    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
734    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
735    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
736    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
737    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
738
739#elif defined USE_UADD_OVERFLOW
[4237]740    //use llvm.uadd.with.overflow.i128 or i256
[4628]741    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
742    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
[4301]743
744    //get i1 carryin from iBLOCK_SIZE
745    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
[4240]746    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
747    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
[4301]748
[4240]749    SumWithOverflowPack sumpack0;
750    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
751    Value* obit = sumpack0.obit;
[4628]752    Value* sum = mBuilder->CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
[4301]753
[4237]754    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
[4275]755    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
[4237]756    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
757    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
[4301]758#elif (BLOCK_SIZE == 128)
[4237]759    //calculate carry through logical ops
[4628]760    Value* carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
761    Value* carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
762    Value* digitsum = mBuilder->CreateAdd(e1, e2, "digitsum");
763    Value* partial = mBuilder->CreateAdd(digitsum, carryq_value, "partial");
764    Value* digitcarry = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(partial)));
765    Value* mid_carry_in = genShiftLeft64(mBuilder->CreateLShr(digitcarry, 63), "mid_carry_in");
[4237]766
[4628]767    Value* sum = mBuilder->CreateAdd(partial, mid_carry_in, "sum");
768    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(sum))));
[4290]769#else
770    //BLOCK_SIZE == 256, there is no other implementation
771    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
[4301]772#endif //USE_TWO_UADD_OVERFLOW
[4290]773
[4647]774    mCarryManager->setCarryOpCarryOut(mPabloBlock, localIndex, carry_out);
[4237]775    return sum;
776}
777
[4258]778inline Value* PabloCompiler::genBitBlockAny(Value* test) {
[4628]779    Value* cast_marker_value_1 = mBuilder->CreateBitCast(test, mBuilder->getIntNTy(BLOCK_SIZE));
780    return mBuilder->CreateICmpEQ(cast_marker_value_1, ConstantInt::get(mBuilder->getIntNTy(BLOCK_SIZE), 0));
[4237]781}
782
[4542]783Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
784    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
785    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
[4628]786    Value * v = mBuilder->CreateBitCast(op, vType);
787    return mBuilder->CreateBitCast(mBuilder->CreateLShr(v, FieldWidth - 1), mBitBlockType);
[4237]788}
789
790Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
[4628]791    Value* i128_val = mBuilder->CreateBitCast(e, mBuilder->getIntNTy(BLOCK_SIZE));
792    return mBuilder->CreateBitCast(mBuilder->CreateShl(i128_val, 64, namehint), mBitBlockType);
[4237]793}
794
[4253]795inline Value* PabloCompiler::genNot(Value* expr) {
[4628]796    return mBuilder->CreateXor(expr, mOneInitializer, "not");
[4237]797}
[4547]798
[4640]799Value* PabloCompiler::genUnitAdvanceWithCarry(Value* strm_value, unsigned localIndex) {
[4647]800    Value * carry_in = mCarryManager->getUnitAdvanceCarryIn(mPabloBlock, localIndex);
[4566]801    Value* result_value;
802   
803#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
[4647]804    Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, carry_in);
[4628]805    Value* srli_1_value = mBuilder->CreateLShr(strm_value, 63);
[4566]806    Value* packed_shuffle;
[4628]807    Constant* const_packed_1_elems [] = {mBuilder->getInt32(0), mBuilder->getInt32(2)};
[4566]808    Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
[4628]809    packed_shuffle = mBuilder->CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
[4566]810   
[4628]811    Constant* const_packed_2_elems[] = {mBuilder->getInt64(1), mBuilder->getInt64(1)};
[4566]812    Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
813   
[4628]814    Value* shl_value = mBuilder->CreateShl(strm_value, const_packed_2);
815    result_value = mBuilder->CreateOr(shl_value, packed_shuffle, "advance");
[4547]816#else
[4647]817    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(BLOCK_SIZE));
[4628]818    Value* strm_longint = mBuilder->CreateBitCast(strm_value, mBuilder->getIntNTy(BLOCK_SIZE));
819    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, 1), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
820    result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
[4547]821   
822#endif
[4647]823    mCarryManager->setUnitAdvanceCarryOut(mPabloBlock, localIndex, strm_value);
[4547]824    return result_value;
825}
[4566]826                   
[4647]827Value* PabloCompiler::genShortAdvanceWithCarry(Value* strm_value, unsigned localIndex, int shift_amount) {
828    Value * carry_in = mCarryManager->getShortAdvanceCarryIn(mPabloBlock, localIndex, shift_amount);
829    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(BLOCK_SIZE));
830    Value* strm_longint = mBuilder->CreateBitCast(strm_value, mBuilder->getIntNTy(BLOCK_SIZE));
831    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
832    Value* result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
833    mCarryManager->setShortAdvanceCarryOut(mPabloBlock, localIndex, shift_amount, strm_value);
[4421]834    return result_value;
[4237]835}
[4647]836                   
837Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, unsigned localIndex, int shift_amount) {
838    return mCarryManager->longAdvanceCarryInCarryOut(mPabloBlock, localIndex, shift_amount, strm_value);
839}
[4547]840   
[4268]841void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
842    if (marker->getType()->isPointerTy()) {
[4628]843        marker = mBuilder->CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
[4268]844    }
[4628]845    Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(index)};
846    Value* gep = mBuilder->CreateGEP(mOutputAddrPtr, indices);
847    mBuilder->CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
[4237]848}
[4268]849
[4538]850CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
851: CarryDataSize(carryDataSize)
[4516]852, FunctionPointer(executionEngine->getPointerToFunction(function))
853, mFunction(function)
854, mExecutionEngine(executionEngine)
855{
856
[4268]857}
[4516]858
859// Clean up the memory for the compiled function once we're finished using it.
860CompiledPabloFunction::~CompiledPabloFunction() {
861    if (mExecutionEngine) {
862        assert (mFunction);
863        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
864        delete mExecutionEngine;
865    }
866}
867
868}
Note: See TracBrowser for help on using the repository browser.