source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4723

Last change on this file since 4723 was 4720, checked in by cameron, 4 years ago

Implementatation of the Pablo Count operation

File size: 36.0 KB
RevLine 
[3850]1/*
[4533]2 *  Copyright (c) 2014-15 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4237]7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
[4566]9#include <pablo/carry_data.h>
[4647]10#include <pablo/carry_manager.h>
[4237]11#include <pablo/printer_pablos.h>
[4657]12#include <pablo/function.h>
[4249]13#include <cc/cc_namemap.hpp>
14#include <re/re_name.h>
[4237]15#include <stdexcept>
[4240]16#include <include/simd-lib/bitblock.hpp>
[4416]17#include <sstream>
[4663]18#include <IDISA/idisa_builder.h>
[4274]19#include <llvm/IR/Verifier.h>
20#include <llvm/Pass.h>
21#include <llvm/PassManager.h>
22#include <llvm/ADT/SmallVector.h>
23#include <llvm/Analysis/Passes.h>
24#include <llvm/IR/BasicBlock.h>
25#include <llvm/IR/CallingConv.h>
26#include <llvm/IR/Constants.h>
27#include <llvm/IR/DataLayout.h>
28#include <llvm/IR/DerivedTypes.h>
29#include <llvm/IR/Function.h>
30#include <llvm/IR/GlobalVariable.h>
31#include <llvm/IR/InlineAsm.h>
32#include <llvm/IR/Instructions.h>
33#include <llvm/IR/LLVMContext.h>
34#include <llvm/IR/Module.h>
35#include <llvm/Support/FormattedStream.h>
36#include <llvm/Support/MathExtras.h>
37#include <llvm/Support/Casting.h>
[4280]38#include <llvm/Support/Compiler.h>
[4274]39#include <llvm/Support/Debug.h>
40#include <llvm/Support/TargetSelect.h>
41#include <llvm/Support/Host.h>
42#include <llvm/Transforms/Scalar.h>
43#include <llvm/ExecutionEngine/ExecutionEngine.h>
44#include <llvm/ExecutionEngine/MCJIT.h>
45#include <llvm/IRReader/IRReader.h>
46#include <llvm/Bitcode/ReaderWriter.h>
47#include <llvm/Support/MemoryBuffer.h>
48#include <llvm/IR/IRBuilder.h>
[4510]49#include <llvm/Support/CommandLine.h>
50#include <llvm/ADT/Twine.h>
[4438]51#include <iostream>
[4237]52
[4544]53static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
[4686]54static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(eIRDumpOptions));
[4378]55
[4544]56static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
57static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
58
[4237]59extern "C" {
[4542]60  void wrapped_print_register(char * regName, BitBlock bit_block) {
61      print_register<BitBlock>(regName, bit_block);
[4237]62  }
63}
64
65namespace pablo {
66
[4657]67PabloCompiler::PabloCompiler()
[4665]68: mMod(nullptr)
[4684]69, mExecutionEngine(nullptr)
[4665]70, mBuilder(nullptr)
[4647]71, mCarryManager(nullptr)
[4684]72, mCarryOffset(0)
[4665]73, mBitBlockType(VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE / 64))
74, iBuilder(mBitBlockType)
75, mInputType(nullptr)
[4538]76, mCarryDataPtr(nullptr)
[4545]77, mWhileDepth(0)
78, mIfDepth(0)
[4270]79, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
80, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
[4274]81, mFunction(nullptr)
[4659]82, mInputAddressPtr(nullptr)
83, mOutputAddressPtr(nullptr)
[4545]84, mMaxWhileDepth(0)
[4661]85, mPrintRegisterFunction(nullptr) {
86
[4237]87}
88
[4665]89PabloCompiler::~PabloCompiler() {
[4237]90}
[4382]91   
[4237]92
[4542]93void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
94    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
[4665]95    GlobalVariable *regStrVar = new GlobalVariable(*mMod,
[4542]96                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
97                                                   /*isConstant=*/ true,
98                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
99                                                   /*Initializer=*/ regNameData);
[4628]100    Value * regStrPtr = mBuilder->CreateGEP(regStrVar, {mBuilder->getInt64(0), mBuilder->getInt32(0)});
101    mBuilder->CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
[4542]102}
[4382]103
[4661]104CompiledPabloFunction PabloCompiler::compile(PabloFunction & function) {
105
[4665]106    Examine(function);
107
[4661]108    InitializeNativeTarget();
109    InitializeNativeTargetAsmPrinter();
110    InitializeNativeTargetAsmParser();
111
[4665]112    Module * module = new Module("", getGlobalContext());
113
114    mMod = module;
115
[4276]116    std::string errMessage;
[4665]117    #ifdef USE_LLVM_3_5
[4276]118    EngineBuilder builder(mMod);
[4665]119    #else
120    EngineBuilder builder(std::move(std::unique_ptr<Module>(mMod)));
121    #endif
[4276]122    builder.setErrorStr(&errMessage);
123    builder.setMCPU(sys::getHostCPUName());
[4665]124    #ifdef USE_LLVM_3_5
[4276]125    builder.setUseMCJIT(true);
[4665]126    #endif
[4545]127    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
[4684]128    mExecutionEngine = builder.create();
129    if (mExecutionEngine == nullptr) {
[4276]130        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
131    }
[4686]132    DeclareDebugFunctions();
[4657]133
[4665]134    auto func = compile(function, mMod);
135
136    //Display the IR that has been generated by this module.
137    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
138        module->dump();
139    }
140    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
141    verifyModule(*module, &dbgs());
142
[4684]143    mExecutionEngine->finalizeObject();
144    ExecutionEngine * engine = mExecutionEngine;
145    mExecutionEngine = nullptr; // <-- pass ownership of the execution engine to the caller
[4665]146
147    return CompiledPabloFunction(func.second, func.first, engine);
148}
149
150std::pair<llvm::Function *, size_t> PabloCompiler::compile(PabloFunction & function, Module * module) {
151
[4687]152 
153    function.getEntryBlock().enumerateScopes(0);
154   
[4665]155    Examine(function);
156
157    mMod = module;
158
159    mBuilder = new IRBuilder<>(mMod->getContext());
160
161    iBuilder.initialize(mMod, mBuilder);
162
163    mCarryManager = new CarryManager(mBuilder, mBitBlockType, mZeroInitializer, mOneInitializer, &iBuilder);
164
[4659]165    GenerateFunction(function);
[4276]166
[4665]167    mBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction,0));
[4237]168
169    //The basis bits structure
[4680]170    for (unsigned i = 0; i != function.getNumOfParameters(); ++i) {
[4628]171        Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(i)};
[4659]172        Value * gep = mBuilder->CreateGEP(mInputAddressPtr, indices);
[4657]173        LoadInst * basisBit = mBuilder->CreateAlignedLoad(gep, BLOCK_SIZE/8, false, function.getParameter(i)->getName()->to_string());
[4692]174        mMarkerMap[function.getParameter(i)] = basisBit;
[4659]175        if (DumpTrace) {
176            genPrintRegister(function.getParameter(i)->getName()->to_string(), basisBit);
177        }
[4237]178    }
[4690]179     
180    PabloBlock & mainScope = function.getEntryBlock();
181
182    mCarryOffset = mCarryManager->initialize(&mainScope, mCarryDataPtr);
[4548]183   
[4237]184    //Generate the IR instructions for the function.
[4628]185   
[4690]186    compileBlock(mainScope);
187   
[4710]188    mCarryManager->ensureCarriesStoredLocal();
[4703]189    mCarryManager->leaveScope();
190   
191   
[4647]192    mCarryManager->generateBlockNoIncrement();
[4257]193
[4717]194    if (DumpTrace) {
[4647]195        genPrintRegister("mBlockNo", mBuilder->CreateAlignedLoad(mBuilder->CreateBitCast(mCarryManager->getBlockNoPtr(), PointerType::get(mBitBlockType, 0)), BLOCK_SIZE/8, false));
[4548]196    }
[4647]197   
[4657]198    // Write the output values out
[4680]199    for (unsigned i = 0; i != function.getNumOfResults(); ++i) {
200        assert (function.getResult(i));
[4657]201        SetOutputValue(mMarkerMap[function.getResult(i)], i);
202    }
203
[4237]204    //Terminate the block
[4628]205    ReturnInst::Create(mMod->getContext(), mBuilder->GetInsertBlock());
[4237]206
[4665]207    // Clean up
208    delete mCarryManager; mCarryManager = nullptr;
209    delete mBuilder; mBuilder = nullptr;
210    mMod = nullptr; // don't delete this. It's either owned by the ExecutionEngine or the calling function.
[4237]211
[4538]212    //Return the required size of the carry data area to the process_block function.
[4684]213    return std::make_pair(mFunction, mCarryOffset * sizeof(BitBlock));
[4237]214}
215
[4659]216inline void PabloCompiler::GenerateFunction(PabloFunction & function) {
[4680]217    mInputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>(function.getNumOfParameters(), mBitBlockType)), 0);
[4665]218    Type * carryType = PointerType::get(mBitBlockType, 0);
[4680]219    Type * outputType = PointerType::get(StructType::get(mMod->getContext(), std::vector<Type *>(function.getNumOfResults(), mBitBlockType)), 0);
[4665]220    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), {{mInputType, carryType, outputType}}, false);
[4657]221
[4237]222#ifdef USE_UADD_OVERFLOW
[4301]223#ifdef USE_TWO_UADD_OVERFLOW
[4240]224    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
[4237]225    std::vector<Type*>StructTy_0_fields;
226    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
227    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
228    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
229
230    std::vector<Type*>FuncTy_1_args;
231    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
232    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
233    FunctionType* FuncTy_1 = FunctionType::get(
234                                              /*Result=*/StructTy_0,
235                                              /*Params=*/FuncTy_1_args,
236                                              /*isVarArg=*/false);
237
[4301]238    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
[4275]239                                              std::to_string(BLOCK_SIZE));
240    if (!mFunctionUaddOverflow) {
[4301]241        mFunctionUaddOverflow= Function::Create(
[4240]242          /*Type=*/ FuncTy_1,
243          /*Linkage=*/ GlobalValue::ExternalLinkage,
[4301]244          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
[4275]245        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
[4237]246    }
[4275]247    AttributeSet mFunctionUaddOverflowPAL;
[4237]248    {
249        SmallVector<AttributeSet, 4> Attrs;
250        AttributeSet PAS;
251        {
252          AttrBuilder B;
253          B.addAttribute(Attribute::NoUnwind);
254          B.addAttribute(Attribute::ReadNone);
255          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
256        }
257
258        Attrs.push_back(PAS);
[4275]259        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
[4237]260    }
[4275]261    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
[4301]262#else
263    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
264    std::vector<Type*>StructTy_0_fields;
265    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
266    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
267    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
268
269    std::vector<Type*>FuncTy_1_args;
270    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
271    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
272    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
273    FunctionType* FuncTy_1 = FunctionType::get(
274                                              /*Result=*/StructTy_0,
275                                              /*Params=*/FuncTy_1_args,
276                                              /*isVarArg=*/false);
277
278    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
279                                              std::to_string(BLOCK_SIZE));
280    if (!mFunctionUaddOverflowCarryin) {
281        mFunctionUaddOverflowCarryin = Function::Create(
282          /*Type=*/ FuncTy_1,
283          /*Linkage=*/ GlobalValue::ExternalLinkage,
284          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
285        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
286    }
287    AttributeSet mFunctionUaddOverflowCarryinPAL;
288    {
289        SmallVector<AttributeSet, 4> Attrs;
290        AttributeSet PAS;
291        {
292          AttrBuilder B;
293          B.addAttribute(Attribute::NoUnwind);
294          B.addAttribute(Attribute::ReadNone);
295          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
296        }
297
298        Attrs.push_back(PAS);
299        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
300    }
301    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
[4237]302#endif
[4301]303#endif
[4237]304
305    //Starts on process_block
[4538]306    SmallVector<AttributeSet, 4> Attrs;
[4659]307    Attrs.push_back(AttributeSet::get(mMod->getContext(), ~0U, { Attribute::NoUnwind, Attribute::UWTable }));
308    Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, { Attribute::ReadOnly, Attribute::NoCapture }));
309    Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, { Attribute::NoCapture }));
310    Attrs.push_back(AttributeSet::get(mMod->getContext(), 3U, { Attribute::ReadNone, Attribute::NoCapture }));
[4237]311    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
312
[4659]313    // Create the function that will be generated.
314    mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, function.getName()->value(), mMod);
315    mFunction->setCallingConv(CallingConv::C);
[4274]316    mFunction->setAttributes(AttrSet);
[4659]317
318    Function::arg_iterator args = mFunction->arg_begin();
319    mInputAddressPtr = args++;
320    mInputAddressPtr->setName("input");
321    mCarryDataPtr = args++;
322    mCarryDataPtr->setName("carry");
323    mOutputAddressPtr = args++;
324    mOutputAddressPtr->setName("output");
[4237]325}
[4659]326
[4665]327inline void PabloCompiler::Examine(PabloFunction & function) {
[4692]328    mWhileDepth = 0;
329    mIfDepth = 0;
330    mMaxWhileDepth = 0;
331    Examine(function.getEntryBlock());
332    if (LLVM_UNLIKELY(mWhileDepth != 0 || mIfDepth != 0)) {
333        throw std::runtime_error("Malformed Pablo AST: Unbalanced If or While nesting depth!");
[4659]334    }
335}
[4665]336
337
338void PabloCompiler::Examine(PabloBlock & block) {
339    for (Statement * stmt : block) {
[4686]340        if (If * ifStatement = dyn_cast<If>(stmt)) {
[4566]341            Examine(ifStatement->getBody());
[4237]342        }
[4257]343        else if (While * whileStatement = dyn_cast<While>(stmt)) {
[4545]344            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
[4566]345            Examine(whileStatement->getBody());
[4545]346            --mWhileDepth;
[4237]347        }
348    }
349}
350
[4686]351inline void PabloCompiler::DeclareDebugFunctions() {
[4717]352    if (DumpTrace) {
[4665]353        //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
354        mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(mMod->getContext()), Type::getInt8PtrTy(mMod->getContext()), mBitBlockType, NULL);
[4684]355        if (mExecutionEngine) mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
[4665]356    }
357}
358
[4643]359void PabloCompiler::compileBlock(PabloBlock & block) {
[4647]360    mPabloBlock = & block;
[4643]361    for (const Statement * statement : block) {
[4343]362        compileStatement(statement);
[4237]363    }
[4643]364    mPabloBlock = block.getParent();
[4237]365}
366
[4708]367    Value * PabloCompiler::genBitTest2(Value * e1, Value * e2) {
368        Type * t1 = e1->getType();
369        Type * t2 = e2->getType();
370        if (t1 == mBitBlockType) {
371            if (t2 == mBitBlockType) {
372                return iBuilder.bitblock_any(mBuilder->CreateOr(e1, e2));
373            }
374            else {
375                Value * m1 = mBuilder->CreateZExt(iBuilder.hsimd_signmask(16, e1), t2);
376                return mBuilder->CreateICmpNE(mBuilder->CreateOr(m1, e2), ConstantInt::get(t2, 0));
377            }
[4663]378        }
[4708]379        else if (t2 == mBitBlockType) {
380            Value * m2 = mBuilder->CreateZExt(iBuilder.hsimd_signmask(16, e2), t1);
381            return mBuilder->CreateICmpNE(mBuilder->CreateOr(e1, m2), ConstantInt::get(t1, 0));
382        }
[4663]383        else {
[4708]384            return mBuilder->CreateICmpNE(mBuilder->CreateOr(e1, e2), ConstantInt::get(t1, 0));
[4663]385        }
386    }
[4708]387   
388    void PabloCompiler::compileIf(const If * ifStatement) {       
[4647]389    //
390    //  The If-ElseZero stmt:
391    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
392    //  If the value of the predicate is nonzero, then determine the values of variables
393    //  <var>* by executing the given statements.  Otherwise, the value of the
394    //  variables are all zero.  Requirements: (a) no variable that is defined within
395    //  the body of the if may be accessed outside unless it is explicitly
396    //  listed in the variable list, (b) every variable in the defined list receives
397    //  a value within the body, and (c) the logical consequence of executing
398    //  the statements in the event that the predicate is zero is that the
399    //  values of all defined variables indeed work out to be 0.
400    //
401    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
402    //  is inserted for each variable in the defined variable list.  It receives
403    //  a zero value from the ifentry block and the defined value from the if
404    //  body.
405    //
[4665]406
[4647]407    BasicBlock * ifEntryBlock = mBuilder->GetInsertBlock();
408    BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
409    BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
[4541]410   
[4647]411    PabloBlock & ifBody = ifStatement -> getBody();
[4628]412   
[4663]413    Value * if_test_value = compileExpression(ifStatement->getCondition());
[4670]414   
415    mCarryManager->enterScope(&ifBody);
416    if (mCarryManager->blockHasCarries()) {
[4647]417        // load the summary variable
[4670]418        Value* last_if_pending_data = mCarryManager->getCarrySummaryExpr();
[4663]419        mBuilder->CreateCondBr(genBitTest2(if_test_value, last_if_pending_data), ifBodyBlock, ifEndBlock);
420
[4647]421    }
[4663]422    else {
423        mBuilder->CreateCondBr(iBuilder.bitblock_any(if_test_value), ifBodyBlock, ifEndBlock);
424    }
[4647]425    // Entry processing is complete, now handle the body of the if.
426    mBuilder->SetInsertPoint(ifBodyBlock);
427   
[4703]428    mCarryManager->initializeCarryDataAtIfEntry();
[4647]429    compileBlock(ifBody);
[4670]430    if (mCarryManager->blockHasCarries()) {
[4676]431        mCarryManager->generateCarryOutSummaryCodeIfNeeded();
[4647]432    }
433    BasicBlock * ifBodyFinalBlock = mBuilder->GetInsertBlock();
[4710]434    mCarryManager->ensureCarriesStoredLocal();
[4647]435    mBuilder->CreateBr(ifEndBlock);
436    //End Block
437    mBuilder->SetInsertPoint(ifEndBlock);
438    for (const PabloAST * node : ifStatement->getDefined()) {
439        const Assign * assign = cast<Assign>(node);
440        PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, assign->getName()->value());
441        auto f = mMarkerMap.find(assign);
442        assert (f != mMarkerMap.end());
443        phi->addIncoming(mZeroInitializer, ifEntryBlock);
444        phi->addIncoming(f->second, ifBodyFinalBlock);
445        mMarkerMap[assign] = phi;
446    }
447    // Create the phi Node for the summary variable, if needed.
[4703]448    mCarryManager->buildCarryDataPhisAfterIfBody(ifEntryBlock, ifBodyFinalBlock);
[4670]449    mCarryManager->leaveScope();
[4535]450}
451
[4647]452void PabloCompiler::compileWhile(const While * whileStatement) {
[4595]453
[4647]454    PabloBlock & whileBody = whileStatement -> getBody();
455   
456    BasicBlock * whileEntryBlock = mBuilder->GetInsertBlock();
457    BasicBlock * whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
458    BasicBlock * whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
[4640]459
[4670]460    mCarryManager->enterScope(&whileBody);
461    mCarryManager->ensureCarriesLoadedRecursive();
[4640]462
[4647]463    const auto & nextNodes = whileStatement->getVariants();
464    std::vector<PHINode *> nextPhis;
465    nextPhis.reserve(nextNodes.size());
[4640]466
[4647]467    // On entry to the while structure, proceed to execute the first iteration
468    // of the loop body unconditionally.   The while condition is tested at the end of
469    // the loop.
[4640]470
[4647]471    mBuilder->CreateBr(whileBodyBlock);
472    mBuilder->SetInsertPoint(whileBodyBlock);
[4640]473
[4647]474    //
475    // There are 3 sets of Phi nodes for the while loop.
476    // (1) Carry-ins: (a) incoming carry data first iterations, (b) zero thereafter
477    // (2) Carry-out accumulators: (a) zero first iteration, (b) |= carry-out of each iteration
478    // (3) Next nodes: (a) values set up before loop, (b) modified values calculated in loop.
[4640]479
[4670]480    mCarryManager->initializeCarryDataPhisAtWhileEntry(whileEntryBlock);
[4640]481
[4647]482    // for any Next nodes in the loop body, initialize to (a) pre-loop value.
483    for (const Next * n : nextNodes) {
484        PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, n->getName()->value());
485        auto f = mMarkerMap.find(n->getInitial());
486        assert (f != mMarkerMap.end());
487        phi->addIncoming(f->second, whileEntryBlock);
488        mMarkerMap[n->getInitial()] = phi;
489        nextPhis.push_back(phi);
490    }
[4595]491
[4647]492    //
493    // Now compile the loop body proper.  Carry-out accumulated values
494    // and iterated values of Next nodes will be computed.
495    ++mWhileDepth;
496    compileBlock(whileBody);
[4640]497
[4647]498    BasicBlock * whileBodyFinalBlock = mBuilder->GetInsertBlock();
[4640]499
[4688]500    if (mCarryManager->blockHasCarries()) {
501        mCarryManager->generateCarryOutSummaryCodeIfNeeded();
502    }
[4670]503    mCarryManager->extendCarryDataPhisAtWhileBodyFinalBlock(whileBodyFinalBlock);
[4264]504
[4647]505    // Terminate the while loop body with a conditional branch back.
[4663]506    mBuilder->CreateCondBr(iBuilder.bitblock_any(compileExpression(whileStatement->getCondition())), whileBodyBlock, whileEndBlock);
[4576]507
[4647]508    // and for any Next nodes in the loop body
509    for (unsigned i = 0; i < nextNodes.size(); i++) {
510        const Next * n = nextNodes[i];
511        auto f = mMarkerMap.find(n->getExpr());
512        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
513            throw std::runtime_error("Next node expression was not compiled!");
[4264]514        }
[4647]515        nextPhis[i]->addIncoming(f->second, whileBodyFinalBlock);
516    }
[4237]517
[4647]518    mBuilder->SetInsertPoint(whileEndBlock);
519    --mWhileDepth;
[4640]520
[4670]521    mCarryManager->ensureCarriesStoredRecursive();
522    mCarryManager->leaveScope();
[4535]523}
524
[4640]525
[4643]526void PabloCompiler::compileStatement(const Statement * stmt) {
527    Value * expr = nullptr;
[4535]528    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
[4657]529        expr = compileExpression(assign->getExpression());
[4237]530    }
[4535]531    else if (const Next * next = dyn_cast<const Next>(stmt)) {
[4643]532        expr = compileExpression(next->getExpr());
[4535]533    }
[4643]534    else if (const If * ifStatement = dyn_cast<const If>(stmt)) {
[4535]535        compileIf(ifStatement);
[4643]536        return;
[4535]537    }
[4643]538    else if (const While * whileStatement = dyn_cast<const While>(stmt)) {
[4535]539        compileWhile(whileStatement);
[4643]540        return;
[4535]541    }
[4410]542    else if (const Call* call = dyn_cast<Call>(stmt)) {
[4237]543        //Call the callee once and store the result in the marker map.
[4643]544        if (mMarkerMap.count(call) != 0) {
545            return;
[4237]546        }
[4674]547
[4684]548        const Prototype * proto = call->getPrototype();
549        const String * callee = proto->getName();
550
551        Type * inputType = StructType::get(mMod->getContext(), std::vector<Type *>{proto->getNumOfParameters(), mBitBlockType});
552        Type * carryType = mBitBlockType;
553        Type * outputType = StructType::get(mMod->getContext(), std::vector<Type *>{proto->getNumOfResults(), mBitBlockType});
554        FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()), std::vector<Type *>{PointerType::get(inputType, 0), PointerType::get(carryType, 0), PointerType::get(outputType, 0)}, false);
555
556        //Starts on process_block
557        SmallVector<AttributeSet, 3> Attrs;
558        Attrs.push_back(AttributeSet::get(mMod->getContext(), 1U, { Attribute::ReadOnly, Attribute::NoCapture }));
559        Attrs.push_back(AttributeSet::get(mMod->getContext(), 2U, { Attribute::NoCapture }));
560        Attrs.push_back(AttributeSet::get(mMod->getContext(), 3U, { Attribute::ReadNone, Attribute::NoCapture }));
561        AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
562
563        Function * externalFunction = cast<Function>(mMod->getOrInsertFunction(callee->value(), functionType, AttrSet));
564        if (LLVM_UNLIKELY(externalFunction == nullptr)) {
565            throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
566        }
567        externalFunction->setCallingConv(llvm::CallingConv::C);
568
569        if (mExecutionEngine) mExecutionEngine->addGlobalMapping(externalFunction, proto->getFunctionPtr());
570
571        // add mCarryOffset to mCarryDataPtr
572        Value * carryFramePtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(mCarryOffset));
573        AllocaInst * outputStruct = mBuilder->CreateAlloca(outputType);
574        mBuilder->CreateCall3(externalFunction, mInputAddressPtr, carryFramePtr, outputStruct);
[4674]575        Value * outputPtr = mBuilder->CreateGEP(outputStruct, { mBuilder->getInt32(0), mBuilder->getInt32(0) });
576        expr = mBuilder->CreateAlignedLoad(outputPtr, BLOCK_SIZE / 8, false);
[4684]577
578        mCarryOffset += (proto->getRequiredStateSpace() + (BLOCK_SIZE / 8) - 1) / (BLOCK_SIZE / 8);
[4237]579    }
[4410]580    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
[4643]581        expr = mBuilder->CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
[4237]582    }
[4410]583    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
[4643]584        expr = mBuilder->CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
[4237]585    }
[4410]586    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
[4643]587        expr = mBuilder->CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
[4237]588    }
[4410]589    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
[4264]590        Value* ifMask = compileExpression(sel->getCondition());
[4628]591        Value* ifTrue = mBuilder->CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
592        Value* ifFalse = mBuilder->CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
[4643]593        expr = mBuilder->CreateOr(ifTrue, ifFalse);
[4237]594    }
[4410]595    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
[4643]596        expr = genNot(compileExpression(pablo_not->getExpr()));
[4237]597    }
[4410]598    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
[4647]599        Value* strm_value = compileExpression(adv->getExpr());
[4270]600        int shift = adv->getAdvanceAmount();
[4717]601        unsigned advance_index = adv->getLocalAdvanceIndex();
602        expr = mCarryManager->advanceCarryInCarryOut(advance_index, shift, strm_value);
[4237]603    }
[4717]604    else if (const Mod64Advance * adv = dyn_cast<Mod64Advance>(stmt)) {
605        Value* strm_value = compileExpression(adv->getExpr());
606        int shift = adv->getAdvanceAmount();
607        expr = iBuilder.simd_slli(64, strm_value, shift);
608    }
[4643]609    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt)) {
[4410]610        Value * marker = compileExpression(mstar->getMarker());
611        Value * cc = compileExpression(mstar->getCharClass());
[4628]612        Value * marker_and_cc = mBuilder->CreateAnd(marker, cc);
[4717]613        unsigned carry_index = mstar->getLocalCarryIndex();
614        Value * sum = mCarryManager->addCarryInCarryOut(carry_index, marker_and_cc, cc);
[4708]615        expr = mBuilder->CreateOr(mBuilder->CreateXor(sum, cc), marker, "matchstar");
[4237]616    }
[4717]617    else if (const Mod64MatchStar * mstar = dyn_cast<Mod64MatchStar>(stmt)) {
618        Value * marker = compileExpression(mstar->getMarker());
619        Value * cc = compileExpression(mstar->getCharClass());
620        Value * marker_and_cc = mBuilder->CreateAnd(marker, cc);
621        Value * sum = iBuilder.simd_add(64, marker_and_cc, cc);
622        expr = mBuilder->CreateOr(mBuilder->CreateXor(sum, cc), marker, "matchstar64");
623    }
[4643]624    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt)) {
[4410]625        Value * marker_expr = compileExpression(sthru->getScanFrom());
626        Value * cc_expr = compileExpression(sthru->getScanThru());
[4717]627        unsigned carry_index = sthru->getLocalCarryIndex();
628        Value * sum = mCarryManager->addCarryInCarryOut(carry_index, marker_expr, cc_expr);
[4708]629        expr = mBuilder->CreateAnd(sum, genNot(cc_expr), "scanthru");
[4237]630    }
[4717]631    else if (const Mod64ScanThru * sthru = dyn_cast<Mod64ScanThru>(stmt)) {
632        Value * marker_expr = compileExpression(sthru->getScanFrom());
633        Value * cc_expr = compileExpression(sthru->getScanThru());
634        Value * sum = iBuilder.simd_add(64, marker_expr, cc_expr);
635        expr = mBuilder->CreateAnd(sum, genNot(cc_expr), "scanthru64");
636    }
[4720]637    else if (const Count * c = dyn_cast<Count>(stmt)) {
638        unsigned count_index = c->getGlobalCountIndex();
639        Value * to_count = compileExpression(c->getExpr());
640        expr = mCarryManager->popCount(to_count, count_index);
641    }
[4359]642    else {
[4567]643        llvm::raw_os_ostream cerr(std::cerr);
644        PabloPrinter::print(stmt, cerr);
[4410]645        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
646    }
[4643]647    mMarkerMap[stmt] = expr;
[4647]648    if (DumpTrace) {
649        genPrintRegister(stmt->getName()->to_string(), expr);
650    }
651   
[4410]652}
653
654Value * PabloCompiler::compileExpression(const PabloAST * expr) {
655    if (isa<Ones>(expr)) {
656        return mOneInitializer;
657    }
658    else if (isa<Zeroes>(expr)) {
659        return mZeroInitializer;
660    }
661    auto f = mMarkerMap.find(expr);
[4643]662    if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
[4567]663        std::string o;
664        llvm::raw_string_ostream str(o);
[4416]665        str << "\"";
666        PabloPrinter::print(expr, str);
667        str << "\" was used before definition!";
668        throw std::runtime_error(str.str());
[4359]669    }
[4410]670    return f->second;
[4237]671}
672
[4537]673
[4237]674#ifdef USE_UADD_OVERFLOW
[4301]675#ifdef USE_TWO_UADD_OVERFLOW
676PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
677    std::vector<Value*> struct_res_params;
678    struct_res_params.push_back(int128_e1);
679    struct_res_params.push_back(int128_e2);
680    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
681    struct_res->setCallingConv(CallingConv::C);
682    struct_res->setTailCall(false);
683    AttributeSet struct_res_PAL;
684    struct_res->setAttributes(struct_res_PAL);
685
686    SumWithOverflowPack ret;
687
688    std::vector<unsigned> int128_sum_indices;
689    int128_sum_indices.push_back(0);
690    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
691
692    std::vector<unsigned> int1_obit_indices;
693    int1_obit_indices.push_back(1);
694    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
695
696    return ret;
697}
698#else
[4275]699PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
[4237]700    std::vector<Value*> struct_res_params;
701    struct_res_params.push_back(int128_e1);
702    struct_res_params.push_back(int128_e2);
[4240]703    struct_res_params.push_back(int1_cin);
[4301]704    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
[4237]705    struct_res->setCallingConv(CallingConv::C);
706    struct_res->setTailCall(false);
707    AttributeSet struct_res_PAL;
708    struct_res->setAttributes(struct_res_PAL);
709
710    SumWithOverflowPack ret;
711
712    std::vector<unsigned> int128_sum_indices;
713    int128_sum_indices.push_back(0);
714    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
715
716    std::vector<unsigned> int1_obit_indices;
717    int1_obit_indices.push_back(1);
718    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
719
720    return ret;
721}
722#endif
[4301]723#endif
[4237]724
[4537]725
[4640]726Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex) {
[4670]727    Value * carryq_value = mCarryManager->getCarryOpCarryIn(localIndex);
[4301]728#ifdef USE_TWO_UADD_OVERFLOW
729    //This is the ideal implementation, which uses two uadd.with.overflow
730    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
[4628]731    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
732    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
733    CastInst* int128_carryq_value = new BitCastInst(carryq_value, mBuilder->getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
[4237]734
[4301]735    SumWithOverflowPack sumpack0, sumpack1;
736
737    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
738    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
739
[4628]740    Value* obit = mBuilder->CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
741    Value* sum = mBuilder->CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
[4301]742
743    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
744    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
745    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
746    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
747    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
748
749#elif defined USE_UADD_OVERFLOW
[4237]750    //use llvm.uadd.with.overflow.i128 or i256
[4628]751    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
752    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
[4301]753
754    //get i1 carryin from iBLOCK_SIZE
755    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
[4240]756    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
757    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
[4301]758
[4240]759    SumWithOverflowPack sumpack0;
760    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
761    Value* obit = sumpack0.obit;
[4628]762    Value* sum = mBuilder->CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
[4301]763
[4237]764    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
[4275]765    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
[4237]766    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
767    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
[4301]768#elif (BLOCK_SIZE == 128)
[4237]769    //calculate carry through logical ops
[4628]770    Value* carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
771    Value* carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
772    Value* digitsum = mBuilder->CreateAdd(e1, e2, "digitsum");
773    Value* partial = mBuilder->CreateAdd(digitsum, carryq_value, "partial");
774    Value* digitcarry = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(partial)));
775    Value* mid_carry_in = genShiftLeft64(mBuilder->CreateLShr(digitcarry, 63), "mid_carry_in");
[4237]776
[4628]777    Value* sum = mBuilder->CreateAdd(partial, mid_carry_in, "sum");
778    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(sum))));
[4290]779#else
780    //BLOCK_SIZE == 256, there is no other implementation
781    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
[4301]782#endif //USE_TWO_UADD_OVERFLOW
[4290]783
[4670]784    mCarryManager->setCarryOpCarryOut(localIndex, carry_out);
[4237]785    return sum;
786}
787
[4542]788Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
789    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
790    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
[4628]791    Value * v = mBuilder->CreateBitCast(op, vType);
792    return mBuilder->CreateBitCast(mBuilder->CreateLShr(v, FieldWidth - 1), mBitBlockType);
[4237]793}
794
795Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
[4628]796    Value* i128_val = mBuilder->CreateBitCast(e, mBuilder->getIntNTy(BLOCK_SIZE));
797    return mBuilder->CreateBitCast(mBuilder->CreateShl(i128_val, 64, namehint), mBitBlockType);
[4237]798}
799
[4253]800inline Value* PabloCompiler::genNot(Value* expr) {
[4628]801    return mBuilder->CreateXor(expr, mOneInitializer, "not");
[4237]802}
[4566]803   
[4268]804void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
[4684]805    if (LLVM_UNLIKELY(marker == nullptr)) {
806        throw std::runtime_error("Cannot set result " + std::to_string(index) + " to Null");
807    }
808    if (LLVM_UNLIKELY(marker->getType()->isPointerTy())) {
[4628]809        marker = mBuilder->CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
[4268]810    }
[4628]811    Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(index)};
[4659]812    Value* gep = mBuilder->CreateGEP(mOutputAddressPtr, indices);
[4628]813    mBuilder->CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
[4237]814}
[4268]815
[4538]816CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
817: CarryDataSize(carryDataSize)
[4516]818, FunctionPointer(executionEngine->getPointerToFunction(function))
819, mFunction(function)
820, mExecutionEngine(executionEngine)
821{
822
[4268]823}
[4516]824
825// Clean up the memory for the compiled function once we're finished using it.
826CompiledPabloFunction::~CompiledPabloFunction() {
827    if (mExecutionEngine) {
828        assert (mFunction);
829        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
830        delete mExecutionEngine;
831    }
832}
833
834}
Note: See TracBrowser for help on using the repository browser.