source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4542

Last change on this file since 4542 was 4542, checked in by cameron, 4 years ago

Named print register function, field size in genShiftHighbitToLow

File size: 43.7 KB
RevLine 
[3850]1/*
[4533]2 *  Copyright (c) 2014-15 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4237]7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
[4249]10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
[4237]12#include <stdexcept>
[4240]13#include <include/simd-lib/bitblock.hpp>
[4416]14#include <sstream>
[4274]15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
[4280]34#include <llvm/Support/Compiler.h>
[4274]35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
[4510]45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
[4438]47#include <iostream>
[4237]48
[4378]49cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
[4237]52extern "C" {
[4542]53  void wrapped_print_register(char * regName, BitBlock bit_block) {
54      print_register<BitBlock>(regName, bit_block);
[4237]55  }
56}
57
58namespace pablo {
59
[4270]60PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
61: mBasisBits(basisBits)
[4237]62, mMod(new Module("icgrep", getGlobalContext()))
63, mBasicBlock(nullptr)
64, mExecutionEngine(nullptr)
[4270]65, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
[4237]66, mBasisBitsInputPtr(nullptr)
[4538]67, mCarryDataPtr(nullptr)
[4257]68, mNestingDepth(0)
[4270]69, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
70, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
[4253]71, mFunctionType(nullptr)
[4274]72, mFunction(nullptr)
[4237]73, mBasisBitsAddr(nullptr)
[4253]74, mOutputAddrPtr(nullptr)
[4276]75, mMaxNestingDepth(0)
[4510]76, mPrintRegisterFunction(nullptr)
[4237]77{
78    //Create the jit execution engine.up
79    InitializeNativeTarget();
80    InitializeNativeTargetAsmPrinter();
81    InitializeNativeTargetAsmParser();
82    DefineTypes();
83}
84
85PabloCompiler::~PabloCompiler()
86{
[4516]87
[4237]88}
[4382]89   
90void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
91    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
92}
[4237]93
[4542]94void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
95    IRBuilder <> b(mBasicBlock);
96    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
97    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
98                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
99                                                   /*isConstant=*/ true,
100                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
101                                                   /*Initializer=*/ regNameData);
102    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
103    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
104}
[4382]105
[4516]106CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
[4237]107{
[4276]108    mNestingDepth = 0;
109    mMaxNestingDepth = 0;
[4541]110    unsigned totalCarryDataSize = Examine(pb, 0); 
111    mCarryDataVector.resize(totalCarryDataSize);
112    mCarryDataSummaryIdx.resize(totalCarryDataSize);
[4276]113    std::string errMessage;
114    EngineBuilder builder(mMod);
115    builder.setErrorStr(&errMessage);
116    builder.setMCPU(sys::getHostCPUName());
117    builder.setUseMCJIT(true);
[4281]118    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
[4276]119    mExecutionEngine = builder.create();
120    if (mExecutionEngine == nullptr) {
121        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
122    }
[4421]123    DeclareFunctions();
[4276]124
[4417]125    DeclareCallFunctions();
[4276]126
[4274]127    Function::arg_iterator args = mFunction->arg_begin();
[4250]128    mBasisBitsAddr = args++;
129    mBasisBitsAddr->setName("basis_bits");
[4538]130    mCarryDataPtr = args++;
131    mCarryDataPtr->setName("carry_data");
[4253]132    mOutputAddrPtr = args++;
133    mOutputAddrPtr->setName("output");
[4237]134
[4257]135    mNestingDepth = 0;
[4276]136    mMaxNestingDepth = 0;
[4274]137    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
[4237]138
139    //The basis bits structure
[4270]140    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
[4237]141        IRBuilder<> b(mBasicBlock);
142        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
[4264]143        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
[4510]144        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
[4410]145        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
[4237]146    }
[4289]147
[4237]148    //Generate the IR instructions for the function.
[4539]149    compileBlock(pb);
[4257]150
[4433]151    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
152        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
153    }
154
[4237]155    //Terminate the block
156    ReturnInst::Create(mMod->getContext(), mBasicBlock);
157
[4348]158    //Display the IR that has been generated by this module.
[4433]159    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
160        mMod->dump();
[4348]161    }
[4237]162    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
163    verifyModule(*mMod, &dbgs());
164
165    mExecutionEngine->finalizeObject();
166
[4538]167    //Return the required size of the carry data area to the process_block function.
[4541]168    // Reserve 1 element in the carry data area for current block number (future). TODO
169    return CompiledPabloFunction((totalCarryDataSize + 1) * sizeof(BitBlock), mFunction, mExecutionEngine);
[4237]170}
171
172void PabloCompiler::DefineTypes()
173{
[4254]174    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
175    if (structBasisBits == nullptr) {
176        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
[4237]177    }
178    std::vector<Type*>StructTy_struct_Basis_bits_fields;
[4270]179    for (int i = 0; i != mBasisBits.size(); i++)
[4237]180    {
[4270]181        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
[4237]182    }
[4254]183    if (structBasisBits->isOpaque()) {
184        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
[4237]185    }
[4254]186    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
[4237]187
[4254]188    std::vector<Type*>functionTypeArgs;
189    functionTypeArgs.push_back(mBasisBitsInputPtr);
[4237]190
[4538]191    //The carry data array.
[4237]192    //A pointer to the BitBlock vector.
[4270]193    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
[4237]194
195    //The output structure.
[4253]196    StructType * outputStruct = mMod->getTypeByName("struct.Output");
197    if (!outputStruct) {
198        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
[4237]199    }
[4253]200    if (outputStruct->isOpaque()) {
[4254]201        std::vector<Type*>fields;
[4270]202        fields.push_back(mBitBlockType);
203        fields.push_back(mBitBlockType);
[4254]204        outputStruct->setBody(fields, /*isPacked=*/false);
[4237]205    }
[4253]206    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
[4237]207
208    //The &output parameter.
[4254]209    functionTypeArgs.push_back(outputStructPtr);
[4237]210
[4253]211    mFunctionType = FunctionType::get(
[4237]212     /*Result=*/Type::getVoidTy(mMod->getContext()),
[4254]213     /*Params=*/functionTypeArgs,
[4237]214     /*isVarArg=*/false);
215}
216
217void PabloCompiler::DeclareFunctions()
218{
219    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
[4542]220    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
[4510]221    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
[4237]222    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
223
224#ifdef USE_UADD_OVERFLOW
[4301]225#ifdef USE_TWO_UADD_OVERFLOW
[4240]226    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
[4237]227    std::vector<Type*>StructTy_0_fields;
228    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
229    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
230    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
231
232    std::vector<Type*>FuncTy_1_args;
233    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
234    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
235    FunctionType* FuncTy_1 = FunctionType::get(
236                                              /*Result=*/StructTy_0,
237                                              /*Params=*/FuncTy_1_args,
238                                              /*isVarArg=*/false);
239
[4301]240    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
[4275]241                                              std::to_string(BLOCK_SIZE));
242    if (!mFunctionUaddOverflow) {
[4301]243        mFunctionUaddOverflow= Function::Create(
[4240]244          /*Type=*/ FuncTy_1,
245          /*Linkage=*/ GlobalValue::ExternalLinkage,
[4301]246          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
[4275]247        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
[4237]248    }
[4275]249    AttributeSet mFunctionUaddOverflowPAL;
[4237]250    {
251        SmallVector<AttributeSet, 4> Attrs;
252        AttributeSet PAS;
253        {
254          AttrBuilder B;
255          B.addAttribute(Attribute::NoUnwind);
256          B.addAttribute(Attribute::ReadNone);
257          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
258        }
259
260        Attrs.push_back(PAS);
[4275]261        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
[4237]262    }
[4275]263    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
[4301]264#else
265    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
266    std::vector<Type*>StructTy_0_fields;
267    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
268    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
269    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
270
271    std::vector<Type*>FuncTy_1_args;
272    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
273    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
274    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
275    FunctionType* FuncTy_1 = FunctionType::get(
276                                              /*Result=*/StructTy_0,
277                                              /*Params=*/FuncTy_1_args,
278                                              /*isVarArg=*/false);
279
280    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
281                                              std::to_string(BLOCK_SIZE));
282    if (!mFunctionUaddOverflowCarryin) {
283        mFunctionUaddOverflowCarryin = Function::Create(
284          /*Type=*/ FuncTy_1,
285          /*Linkage=*/ GlobalValue::ExternalLinkage,
286          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
287        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
288    }
289    AttributeSet mFunctionUaddOverflowCarryinPAL;
290    {
291        SmallVector<AttributeSet, 4> Attrs;
292        AttributeSet PAS;
293        {
294          AttrBuilder B;
295          B.addAttribute(Attribute::NoUnwind);
296          B.addAttribute(Attribute::ReadNone);
297          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
298        }
299
300        Attrs.push_back(PAS);
301        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
302    }
303    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
[4237]304#endif
[4301]305#endif
[4237]306
307    //Starts on process_block
[4538]308    SmallVector<AttributeSet, 4> Attrs;
[4237]309    AttributeSet PAS;
310    {
311        AttrBuilder B;
312        B.addAttribute(Attribute::ReadOnly);
313        B.addAttribute(Attribute::NoCapture);
314        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
315    }
316    Attrs.push_back(PAS);
317    {
318        AttrBuilder B;
319        B.addAttribute(Attribute::NoCapture);
320        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
321    }
322    Attrs.push_back(PAS);
323    {
324        AttrBuilder B;
325        B.addAttribute(Attribute::NoCapture);
326        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
327    }
328    Attrs.push_back(PAS);
329    {
330        AttrBuilder B;
331        B.addAttribute(Attribute::NoUnwind);
332        B.addAttribute(Attribute::UWTable);
333        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
334    }
335    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
336
337    //Create the function that will be generated.
[4274]338    mFunction = mMod->getFunction("process_block");
339    if (!mFunction) {
340        mFunction = Function::Create(
[4253]341            /*Type=*/mFunctionType,
[4237]342            /*Linkage=*/GlobalValue::ExternalLinkage,
343            /*Name=*/"process_block", mMod);
[4274]344        mFunction->setCallingConv(CallingConv::C);
[4237]345    }
[4274]346    mFunction->setAttributes(AttrSet);
[4237]347}
[4541]348   
349// CarryDataNumbering
[4421]350//
[4541]351// For each PabloBlock, a contiguous CarryData area holds carry,
352// and advance values that are generated in one block for use in the
353// next.  For a given block, the carry data area contains the
354// carries, the advances and the nested data for contained blocks,
355// if any.
356// Notes:
357//   (a) an additional data entry is created for each if-statement
[4421]358//       having more than one carry or advance opreation within it.  This
359//       additional entry is a summary entry which must be nonzero to
360//       indicate that there are carry or advance bits associated with
361//       any operation within the if-structure (at any nesting level).
362//   (b) advancing by a large amount may require multiple advance entries.
363//       the number of advance entries for an operation Adv(x, n) is
[4541]364//       (n + BLOCK_SIZE - 1) / BLOCK_SIZE
[4421]365//
366// Examine precomputes some CarryNumbering and AdvanceNumbering, as
367// well as mMaxNestingDepth of while loops.
368//
[4541]369unsigned PabloCompiler::Examine(PabloBlock & blk, unsigned carryDataIndexIn) {
[4539]370    // Count local carries and advances at this level.
[4541]371    unsigned carryDataIndex = carryDataIndexIn;
[4539]372    unsigned localCarries = 0;
373    unsigned localAdvances = 0;
[4541]374    unsigned nestedCarryDataSize = 0;
[4539]375    for (Statement * stmt : blk) {
[4433]376        if (Advance * adv = dyn_cast<Advance>(stmt)) {
[4541]377            adv->setLocalAdvanceIndex(localAdvances);
[4539]378            localAdvances += (adv->getAdvanceAmount() + BLOCK_SIZE - 1) / BLOCK_SIZE;
[4237]379        }
[4541]380        else if (MatchStar * m = dyn_cast<MatchStar>(stmt)) {
381            m->setLocalCarryIndex(localCarries);
[4539]382            ++localCarries;
[4253]383        }
[4541]384        else if (ScanThru * s = dyn_cast<ScanThru>(stmt)) {
385            s->setLocalCarryIndex(localCarries);
386            ++localCarries;
387        }
[4539]388    }
[4541]389    carryDataIndex += localCarries + localAdvances;
[4539]390    for (Statement * stmt : blk) {
[4433]391        if (Call * call = dyn_cast<Call>(stmt)) {
392            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
393        }
[4257]394        else if (If * ifStatement = dyn_cast<If>(stmt)) {
[4541]395            const auto ifCarryDataSize = Examine(ifStatement->getBody(), carryDataIndex);
396            nestedCarryDataSize += ifCarryDataSize;
397            carryDataIndex += ifCarryDataSize;
[4237]398        }
[4257]399        else if (While * whileStatement = dyn_cast<While>(stmt)) {
[4276]400            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
[4541]401            const auto whileCarryDataSize = Examine(whileStatement->getBody(), carryDataIndex);
[4276]402            --mNestingDepth;
[4541]403            nestedCarryDataSize += whileCarryDataSize;
404            carryDataIndex += whileCarryDataSize;
[4237]405        }
406    }
[4541]407    blk.setCarryIndexBase(carryDataIndexIn);
408    blk.setLocalCarryCount(localCarries);
409    blk.setLocalAdvanceCount(localAdvances);
410    unsigned totalCarryDataSize = localCarries + localAdvances + nestedCarryDataSize;
411    if (totalCarryDataSize > 1) {
412        // Need extra space for the summary variable, always the last
413        // entry within the block.
414        totalCarryDataSize += 1;
415    }
416    blk.setTotalCarryDataSize(totalCarryDataSize);
417    return totalCarryDataSize;
[4237]418}
419
[4276]420void PabloCompiler::DeclareCallFunctions() {
421    for (auto mapping : mCalleeMap) {
422        const String * callee = mapping.first;
[4382]423        //std::cerr << callee->str() << " to be declared\n";
[4510]424        auto ei = mExternalMap.find(callee->value());
[4382]425        if (ei != mExternalMap.end()) {
426            void * fn_ptr = ei->second;
427            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
[4510]428            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
[4382]429            if (LLVM_UNLIKELY(externalValue == nullptr)) {
[4510]430                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
[4382]431            }
432            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
433            mCalleeMap[callee] = externalValue;
[4276]434        }
[4382]435        else {
[4510]436            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
[4382]437        }
[4276]438    }
439}
440
[4539]441void PabloCompiler::compileBlock(const PabloBlock & blk) {
442    for (const Statement * statement : blk) {
[4343]443        compileStatement(statement);
[4237]444    }
445}
446
[4541]447
448
449
450void PabloCompiler::compileIf(const If * ifStatement) {       
[4410]451        //
452        //  The If-ElseZero stmt:
453        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
454        //  If the value of the predicate is nonzero, then determine the values of variables
455        //  <var>* by executing the given statements.  Otherwise, the value of the
456        //  variables are all zero.  Requirements: (a) no variable that is defined within
457        //  the body of the if may be accessed outside unless it is explicitly
458        //  listed in the variable list, (b) every variable in the defined list receives
459        //  a value within the body, and (c) the logical consequence of executing
460        //  the statements in the event that the predicate is zero is that the
461        //  values of all defined variables indeed work out to be 0.
462        //
463        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
464        //  is inserted for each variable in the defined variable list.  It receives
465        //  a zero value from the ifentry block and the defined value from the if
466        //  body.
467        //
[4344]468        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
[4274]469        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
470        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
[4344]471       
472        IRBuilder<> b_entry(ifEntryBlock);
473        mBasicBlock = ifEntryBlock;
[4541]474   
475        const unsigned baseCarryDataIdx = ifStatement->getBody().getCarryIndexBase();
476        const unsigned carryDataSize = ifStatement->getBody().getTotalCarryDataSize();
477        const unsigned carrySummaryIndex = baseCarryDataIdx + carryDataSize - 1;
478       
[4344]479        Value* if_test_value = compileExpression(ifStatement->getCondition());
[4541]480        if (carryDataSize > 0) {
481            // load the summary variable
482            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
483            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
[4344]484        }
485        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
[4237]486
[4344]487        // Entry processing is complete, now handle the body of the if.
[4237]488        mBasicBlock = ifBodyBlock;
[4541]489        compileBlock(ifStatement -> getBody());
[4376]490
491        // If we compiled an If or a While statement, we won't be in the same basic block as before.
492        // Create the branch from the current basic block to the end block.
493        IRBuilder<> bIfBody(mBasicBlock);
[4344]494        // After the recursive compile, now insert the code to compute the summary
495        // carry over variable.
496       
[4541]497        if (carryDataSize > 1) {
498            // If there was only one carry entry, then it also serves as the summary variable.
499            // Otherwise, we need to combine entries to compute the summary.
[4344]500            Value * carry_summary = mZeroInitializer;
[4541]501            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
502                int s = mCarryDataSummaryIdx[c];
[4401]503                if (s == -1) {
[4541]504                    Value* carryq_value = mCarryDataVector[c];
[4410]505                    if (carry_summary == mZeroInitializer) {
506                        carry_summary = carryq_value;
507                    }
508                    else {
509                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
510                    }
[4541]511                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
[4401]512                }
[4237]513            }
[4541]514            genCarryDataStore(carry_summary, carrySummaryIndex);
[4288]515        }
[4376]516        bIfBody.CreateBr(ifEndBlock);
[4344]517        //End Block
518        IRBuilder<> bEnd(ifEndBlock);
[4510]519        for (const PabloAST * node : ifStatement->getDefined()) {
520            const Assign * assign = cast<Assign>(node);
521            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
522            auto f = mMarkerMap.find(assign);
[4344]523            assert (f != mMarkerMap.end());
524            phi->addIncoming(mZeroInitializer, ifEntryBlock);
[4371]525            phi->addIncoming(f->second, mBasicBlock);
[4510]526            mMarkerMap[assign] = phi;
[4237]527        }
[4403]528        // Create the phi Node for the summary variable.
[4541]529        if (carryDataSize > 0) {
[4410]530            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
531            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
[4541]532            summary_phi->addIncoming(mCarryDataVector[carrySummaryIndex], mBasicBlock);
533            mCarryDataVector[carrySummaryIndex] = summary_phi;
[4403]534        }
535       
[4371]536        // Set the basic block to the new end block
537        mBasicBlock = ifEndBlock;
[4535]538}
539
540void PabloCompiler::compileWhile(const While * whileStatement) {
[4541]541        const unsigned baseCarryDataIdx = whileStatement->getBody().getCarryIndexBase();
542        const unsigned carryDataSize = whileStatement->getBody().getTotalCarryDataSize();
543   
[4257]544        if (mNestingDepth == 0) {
[4541]545            for (auto i = 0; i < carryDataSize; ++i) {
546                genCarryDataLoad(baseCarryDataIdx + i);
[4257]547            }
[4289]548        }
[4257]549
[4276]550        SmallVector<const Next*, 4> nextNodes;
551        for (const PabloAST * node : whileStatement->getBody()) {
[4264]552            if (isa<Next>(node)) {
553                nextNodes.push_back(cast<Next>(node));
554            }
555        }
556
[4541]557        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
558        // mCarryDataVector with the appropriate values. Although we're not actually entering a new basic
559        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
[4289]560        // will refer to the previous value.
[4276]561
[4257]562        ++mNestingDepth;
[4276]563
[4539]564        compileBlock(whileStatement->getBody());
[4289]565
[4258]566        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
567        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
568        // but works for now.
[4237]569
[4274]570        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
571        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
572        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
[4257]573
[4539]574        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
[4268]575        // may not be same one that we entered the function with.
[4258]576        IRBuilder<> bEntry(mBasicBlock);
577        bEntry.CreateBr(whileCondBlock);
[4237]578
[4257]579        // CONDITION BLOCK
580        IRBuilder<> bCond(whileCondBlock);
581        // generate phi nodes for any carry propogating instruction
[4541]582        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
[4264]583        unsigned index = 0;
[4541]584        for (index = 0; index < carryDataSize; ++index) {
[4270]585            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
[4541]586            phi->addIncoming(mCarryDataVector[baseCarryDataIdx + index], mBasicBlock);
587            mCarryDataVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
[4264]588            phiNodes[index] = phi;
[4257]589        }
[4264]590        // and for any Next nodes in the loop body
[4276]591        for (const Next * n : nextNodes) {
[4510]592            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
[4410]593            auto f = mMarkerMap.find(n->getInitial());
[4264]594            assert (f != mMarkerMap.end());
595            phi->addIncoming(f->second, mBasicBlock);
[4410]596            mMarkerMap[n->getInitial()] = phi;
[4264]597            phiNodes[index++] = phi;
598        }
[4237]599
[4257]600        mBasicBlock = whileCondBlock;
[4264]601        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
[4237]602
[4257]603        // BODY BLOCK
[4237]604        mBasicBlock = whileBodyBlock;
[4539]605        compileBlock(whileStatement->getBody());
[4257]606        // update phi nodes for any carry propogating instruction
607        IRBuilder<> bWhileBody(mBasicBlock);
[4541]608        for (index = 0; index < carryDataSize; ++index) {
609            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryDataVector[baseCarryDataIdx + index]);
[4264]610            PHINode * phi = phiNodes[index];
611            phi->addIncoming(carryOut, mBasicBlock);
[4541]612            mCarryDataVector[baseCarryDataIdx + index] = phi;
[4237]613        }
[4264]614        // and for any Next nodes in the loop body
[4276]615        for (const Next * n : nextNodes) {
[4410]616            auto f = mMarkerMap.find(n->getInitial());
[4264]617            assert (f != mMarkerMap.end());
618            PHINode * phi = phiNodes[index++];
619            phi->addIncoming(f->second, mBasicBlock);
[4410]620            mMarkerMap[n->getInitial()] = phi;
[4264]621        }
[4237]622
[4257]623        bWhileBody.CreateBr(whileCondBlock);
[4237]624
[4257]625        // EXIT BLOCK
[4289]626        mBasicBlock = whileEndBlock;
[4257]627        if (--mNestingDepth == 0) {
[4541]628            for (index = 0; index < carryDataSize; ++index) {
629                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
[4289]630            }
[4237]631        }
[4535]632 
633}
634
635void PabloCompiler::compileStatement(const Statement * stmt)
636{
637    IRBuilder<> b(mBasicBlock);
638    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
639        Value * expr = compileExpression(assign->getExpr());
640        mMarkerMap[assign] = expr;
641        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
642            SetOutputValue(expr, assign->getOutputIndex());
643        }
[4237]644    }
[4535]645    else if (const Next * next = dyn_cast<const Next>(stmt)) {
646        Value * expr = compileExpression(next->getExpr());
647        mMarkerMap[next->getInitial()] = expr;
648    }
649    else if (const If * ifStatement = dyn_cast<const If>(stmt))
650    {
651        compileIf(ifStatement);
652    }
653    else if (const While * whileStatement = dyn_cast<const While>(stmt))
654    {
655        compileWhile(whileStatement);
656    }
[4410]657    else if (const Call* call = dyn_cast<Call>(stmt)) {
[4237]658        //Call the callee once and store the result in the marker map.
[4410]659        auto mi = mMarkerMap.find(call);
[4237]660        if (mi == mMarkerMap.end()) {
661            auto ci = mCalleeMap.find(call->getCallee());
[4280]662            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
[4510]663                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
[4237]664            }
[4410]665            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
[4237]666        }
[4410]667        // return mi->second;
[4237]668    }
[4410]669    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
670        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
671        mMarkerMap[pablo_and] = expr;
672        // return expr;
[4237]673    }
[4410]674    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
675        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
676        mMarkerMap[pablo_or] = expr;
677        // return expr;
[4237]678    }
[4410]679    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
680        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
681        mMarkerMap[pablo_xor] = expr;
682        // return expr;
[4237]683    }
[4410]684    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
[4264]685        Value* ifMask = compileExpression(sel->getCondition());
686        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
687        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
[4410]688        Value * expr = b.CreateOr(ifTrue, ifFalse);
689        mMarkerMap[sel] = expr;
690        // return expr;
[4237]691    }
[4410]692    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
693        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
694        mMarkerMap[pablo_not] = expr;
695        // return expr;
[4237]696    }
[4410]697    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
[4237]698        Value* strm_value = compileExpression(adv->getExpr());
[4270]699        int shift = adv->getAdvanceAmount();
[4541]700        unsigned advance_index = adv->getLocalAdvanceIndex();
701        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
[4410]702        mMarkerMap[adv] = expr;
703        // return expr;
[4237]704    }
[4410]705    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
[4237]706    {
[4410]707        Value * marker = compileExpression(mstar->getMarker());
708        Value * cc = compileExpression(mstar->getCharClass());
709        Value * marker_and_cc = b.CreateAnd(marker, cc);
[4541]710        unsigned carry_index = mstar->getLocalCarryIndex();
711        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
[4410]712        mMarkerMap[mstar] = expr;
713        // return expr;
[4237]714    }
[4410]715    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
[4237]716    {
[4410]717        Value * marker_expr = compileExpression(sthru->getScanFrom());
718        Value * cc_expr = compileExpression(sthru->getScanThru());
[4541]719        unsigned carry_index = sthru->getLocalCarryIndex();
720        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
[4410]721        mMarkerMap[sthru] = expr;
722        // return expr;
[4237]723    }
[4359]724    else {
[4410]725        PabloPrinter::print(stmt, std::cerr);
726        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
727    }
728}
729
730Value * PabloCompiler::compileExpression(const PabloAST * expr) {
731    if (isa<Ones>(expr)) {
732        return mOneInitializer;
733    }
734    else if (isa<Zeroes>(expr)) {
735        return mZeroInitializer;
736    }
737    else if (const Next * next = dyn_cast<Next>(expr)) {
738        expr = next->getInitial();
739    }
740    auto f = mMarkerMap.find(expr);
741    if (f == mMarkerMap.end()) {
[4416]742        std::stringstream str;
743        str << "\"";
744        PabloPrinter::print(expr, str);
745        str << "\" was used before definition!";
746        throw std::runtime_error(str.str());
[4359]747    }
[4410]748    return f->second;
[4237]749}
750
[4537]751
[4237]752#ifdef USE_UADD_OVERFLOW
[4301]753#ifdef USE_TWO_UADD_OVERFLOW
754PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
755    std::vector<Value*> struct_res_params;
756    struct_res_params.push_back(int128_e1);
757    struct_res_params.push_back(int128_e2);
758    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
759    struct_res->setCallingConv(CallingConv::C);
760    struct_res->setTailCall(false);
761    AttributeSet struct_res_PAL;
762    struct_res->setAttributes(struct_res_PAL);
763
764    SumWithOverflowPack ret;
765
766    std::vector<unsigned> int128_sum_indices;
767    int128_sum_indices.push_back(0);
768    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
769
770    std::vector<unsigned> int1_obit_indices;
771    int1_obit_indices.push_back(1);
772    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
773
774    return ret;
775}
776#else
[4275]777PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
[4237]778    std::vector<Value*> struct_res_params;
779    struct_res_params.push_back(int128_e1);
780    struct_res_params.push_back(int128_e2);
[4240]781    struct_res_params.push_back(int1_cin);
[4301]782    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
[4237]783    struct_res->setCallingConv(CallingConv::C);
784    struct_res->setTailCall(false);
785    AttributeSet struct_res_PAL;
786    struct_res->setAttributes(struct_res_PAL);
787
788    SumWithOverflowPack ret;
789
790    std::vector<unsigned> int128_sum_indices;
791    int128_sum_indices.push_back(0);
792    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
793
794    std::vector<unsigned> int1_obit_indices;
795    int1_obit_indices.push_back(1);
796    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
797
798    return ret;
799}
800#endif
[4301]801#endif
[4237]802
[4537]803
[4541]804Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
[4237]805    IRBuilder<> b(mBasicBlock);
806
807    //CarryQ - carry in.
[4541]808    const int carryIdx = blk->getCarryIndexBase() + localIndex;
809    Value* carryq_value = genCarryDataLoad(carryIdx);
[4301]810#ifdef USE_TWO_UADD_OVERFLOW
811    //This is the ideal implementation, which uses two uadd.with.overflow
812    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
813    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
814    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
815    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
[4237]816
[4301]817    SumWithOverflowPack sumpack0, sumpack1;
818
819    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
820    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
821
822    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
823    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
824
825    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
826    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
827    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
828    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
829    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
830
831#elif defined USE_UADD_OVERFLOW
[4237]832    //use llvm.uadd.with.overflow.i128 or i256
833    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
834    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
[4301]835
836    //get i1 carryin from iBLOCK_SIZE
837    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
[4240]838    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
839    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
[4301]840
[4240]841    SumWithOverflowPack sumpack0;
842    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
843    Value* obit = sumpack0.obit;
[4275]844    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
[4301]845
[4237]846    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
[4275]847    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
[4237]848    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
849    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
[4301]850#elif (BLOCK_SIZE == 128)
[4237]851    //calculate carry through logical ops
852    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
853    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
854    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
855    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
856    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
857    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
858
859    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
[4542]860    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
[4290]861#else
862    //BLOCK_SIZE == 256, there is no other implementation
863    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
[4301]864#endif //USE_TWO_UADD_OVERFLOW
[4290]865
[4541]866    genCarryDataStore(carry_out, carryIdx);
[4237]867    return sum;
868}
[4542]869#define CARRY_DEBUG
[4541]870Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
871    assert (index < mCarryDataVector.size());
[4257]872    if (mNestingDepth == 0) {
[4258]873        IRBuilder<> b(mBasicBlock);
[4541]874        mCarryDataVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
[4257]875    }
[4542]876#ifdef CARRY_DEBUG
877    genPrintRegister("carry_in_" + std::to_string(index), mCarryDataVector[index]);
878#endif
[4541]879    return mCarryDataVector[index];
[4237]880}
881
[4541]882void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
[4257]883    assert (carryOut);
[4541]884    assert (index < mCarryDataVector.size());
[4289]885    if (mNestingDepth == 0) {
[4258]886        IRBuilder<> b(mBasicBlock);
[4538]887        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
[4257]888    }
[4541]889    mCarryDataSummaryIdx[index] = -1;
[4542]890#ifdef CARRY_DEBUG
891    genPrintRegister("carry_out_" + std::to_string(index), mCarryVector[index]);
892#endif
[4541]893    mCarryDataVector[index] = carryOut;
[4237]894}
895
[4258]896inline Value* PabloCompiler::genBitBlockAny(Value* test) {
[4237]897    IRBuilder<> b(mBasicBlock);
[4258]898    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
[4237]899    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
900}
901
[4542]902Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
903    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
[4237]904    IRBuilder<> b(mBasicBlock);
[4542]905    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
906    Value * v = b.CreateBitCast(op, vType);
907    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
[4237]908}
909
910Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
911    IRBuilder<> b(mBasicBlock);
912    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
[4270]913    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
[4237]914}
915
[4253]916inline Value* PabloCompiler::genNot(Value* expr) {
[4237]917    IRBuilder<> b(mBasicBlock);
[4253]918    return b.CreateXor(expr, mOneInitializer, "not");
[4237]919}
[4541]920Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
[4237]921    IRBuilder<> b(mBasicBlock);
[4421]922    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
923    int block_shift = shift_amount % BLOCK_SIZE;
[4541]924    const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
925    const auto storeIdx = advanceIndex;
926    const auto loadIdx = advanceIndex + advEntries - 1;
[4421]927    Value* result_value;
928   
929    if (advEntries == 1) {
[4422]930        if (block_shift == 0) { 
[4541]931            result_value = genCarryDataLoad(loadIdx);
[4422]932            //b.CreateCall(mFunc_print_register, result_value);
933        }
[4537]934#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
[4421]935        if (block_shift == 1) {
[4542]936            Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(loadIdx));
[4421]937            Value* srli_1_value = b.CreateLShr(strm_value, 63);
938            Value* packed_shuffle;
939            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
940            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
941            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
[4237]942
[4421]943            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
944            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
[4237]945
[4421]946            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
947            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
948        }
949        else { //if (block_shift < BLOCK_SIZE) {
950            // This is the preferred logic, but is too slow for the general case.
951            // We need to speed up our custom LLVM for this code.
[4541]952            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
[4421]953            Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
954            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
955            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
956        }
[4537]957#else
[4541]958        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
[4537]959        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
960        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
961        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
962
963#endif
[4267]964    }
[4421]965    else {
966        if (block_shift == 0) {
[4541]967            result_value = genCarryDataLoad(loadIdx);
[4421]968        }
969        else { 
970            // The advance is based on the two oldest bit blocks in the advance queue.
[4541]971            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
972            Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
[4421]973            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
974            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
[4541]975            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx));
976            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx-1));
[4421]977            //b.CreateCall(mFunc_print_register, result_value);
978        }
979        // copy entries from previous blocks forward
[4422]980        for (int i = loadIdx; i > storeIdx; i--) {
[4541]981            genCarryDataStore(genCarryDataLoad(i-1), i);
[4421]982        }
[4267]983    }
[4541]984    genCarryDataStore(strm_value, storeIdx);
[4421]985    return result_value;
[4237]986}
987
[4268]988void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
989    IRBuilder<> b(mBasicBlock);
990    if (marker->getType()->isPointerTy()) {
991        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
992    }
993    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
994    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
995    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
[4237]996}
[4268]997
[4538]998CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
999: CarryDataSize(carryDataSize)
[4516]1000, FunctionPointer(executionEngine->getPointerToFunction(function))
1001, mFunction(function)
1002, mExecutionEngine(executionEngine)
1003{
1004
[4268]1005}
[4516]1006
1007// Clean up the memory for the compiled function once we're finished using it.
1008CompiledPabloFunction::~CompiledPabloFunction() {
1009    if (mExecutionEngine) {
1010        assert (mFunction);
1011        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1012        delete mExecutionEngine;
1013    }
1014}
1015
1016}
Note: See TracBrowser for help on using the repository browser.