source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4543

Last change on this file since 4543 was 4543, checked in by cameron, 4 years ago

Minor clean-ups

File size: 43.4 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52extern "C" {
53  void wrapped_print_register(char * regName, BitBlock bit_block) {
54      print_register<BitBlock>(regName, bit_block);
55  }
56}
57
58namespace pablo {
59
60PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
61: mBasisBits(basisBits)
62, mMod(new Module("icgrep", getGlobalContext()))
63, mBasicBlock(nullptr)
64, mExecutionEngine(nullptr)
65, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
66, mBasisBitsInputPtr(nullptr)
67, mCarryDataPtr(nullptr)
68, mNestingDepth(0)
69, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
70, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
71, mFunctionType(nullptr)
72, mFunction(nullptr)
73, mBasisBitsAddr(nullptr)
74, mOutputAddrPtr(nullptr)
75, mMaxNestingDepth(0)
76, mPrintRegisterFunction(nullptr)
77{
78    //Create the jit execution engine.up
79    InitializeNativeTarget();
80    InitializeNativeTargetAsmPrinter();
81    InitializeNativeTargetAsmParser();
82    DefineTypes();
83}
84
85PabloCompiler::~PabloCompiler()
86{
87
88}
89   
90void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
91    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
92}
93
94void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
95    IRBuilder <> b(mBasicBlock);
96    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
97    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
98                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
99                                                   /*isConstant=*/ true,
100                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
101                                                   /*Initializer=*/ regNameData);
102    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
103    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
104}
105
106CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
107{
108    mNestingDepth = 0;
109    mMaxNestingDepth = 0;
110    unsigned totalCarryDataSize = Examine(pb, 0); 
111    mCarryDataVector.resize(totalCarryDataSize);
112    mCarryDataSummaryIdx.resize(totalCarryDataSize);
113    std::string errMessage;
114    EngineBuilder builder(mMod);
115    builder.setErrorStr(&errMessage);
116    builder.setMCPU(sys::getHostCPUName());
117    builder.setUseMCJIT(true);
118    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
119    mExecutionEngine = builder.create();
120    if (mExecutionEngine == nullptr) {
121        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
122    }
123    DeclareFunctions();
124
125    DeclareCallFunctions();
126
127    Function::arg_iterator args = mFunction->arg_begin();
128    mBasisBitsAddr = args++;
129    mBasisBitsAddr->setName("basis_bits");
130    mCarryDataPtr = args++;
131    mCarryDataPtr->setName("carry_data");
132    mOutputAddrPtr = args++;
133    mOutputAddrPtr->setName("output");
134
135    mNestingDepth = 0;
136    mMaxNestingDepth = 0;
137    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
138
139    //The basis bits structure
140    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
141        IRBuilder<> b(mBasicBlock);
142        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
143        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
144        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
145        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
146    }
147
148    //Generate the IR instructions for the function.
149    compileBlock(pb);
150
151    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
152        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
153    }
154
155    //Terminate the block
156    ReturnInst::Create(mMod->getContext(), mBasicBlock);
157
158    //Display the IR that has been generated by this module.
159    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
160        mMod->dump();
161    }
162    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
163    verifyModule(*mMod, &dbgs());
164
165    mExecutionEngine->finalizeObject();
166
167    //Return the required size of the carry data area to the process_block function.
168    // Reserve 1 element in the carry data area for current block number (future). TODO
169    return CompiledPabloFunction((totalCarryDataSize + 1) * sizeof(BitBlock), mFunction, mExecutionEngine);
170}
171
172void PabloCompiler::DefineTypes()
173{
174    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
175    if (structBasisBits == nullptr) {
176        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
177    }
178    std::vector<Type*>StructTy_struct_Basis_bits_fields;
179    for (int i = 0; i != mBasisBits.size(); i++)
180    {
181        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
182    }
183    if (structBasisBits->isOpaque()) {
184        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
185    }
186    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
187
188    std::vector<Type*>functionTypeArgs;
189    functionTypeArgs.push_back(mBasisBitsInputPtr);
190
191    //The carry data array.
192    //A pointer to the BitBlock vector.
193    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
194
195    //The output structure.
196    StructType * outputStruct = mMod->getTypeByName("struct.Output");
197    if (!outputStruct) {
198        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
199    }
200    if (outputStruct->isOpaque()) {
201        std::vector<Type*>fields;
202        fields.push_back(mBitBlockType);
203        fields.push_back(mBitBlockType);
204        outputStruct->setBody(fields, /*isPacked=*/false);
205    }
206    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
207
208    //The &output parameter.
209    functionTypeArgs.push_back(outputStructPtr);
210
211    mFunctionType = FunctionType::get(
212     /*Result=*/Type::getVoidTy(mMod->getContext()),
213     /*Params=*/functionTypeArgs,
214     /*isVarArg=*/false);
215}
216
217void PabloCompiler::DeclareFunctions()
218{
219    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
220    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
221    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
222    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
223
224#ifdef USE_UADD_OVERFLOW
225#ifdef USE_TWO_UADD_OVERFLOW
226    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
227    std::vector<Type*>StructTy_0_fields;
228    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
229    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
230    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
231
232    std::vector<Type*>FuncTy_1_args;
233    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
234    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
235    FunctionType* FuncTy_1 = FunctionType::get(
236                                              /*Result=*/StructTy_0,
237                                              /*Params=*/FuncTy_1_args,
238                                              /*isVarArg=*/false);
239
240    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
241                                              std::to_string(BLOCK_SIZE));
242    if (!mFunctionUaddOverflow) {
243        mFunctionUaddOverflow= Function::Create(
244          /*Type=*/ FuncTy_1,
245          /*Linkage=*/ GlobalValue::ExternalLinkage,
246          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
247        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
248    }
249    AttributeSet mFunctionUaddOverflowPAL;
250    {
251        SmallVector<AttributeSet, 4> Attrs;
252        AttributeSet PAS;
253        {
254          AttrBuilder B;
255          B.addAttribute(Attribute::NoUnwind);
256          B.addAttribute(Attribute::ReadNone);
257          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
258        }
259
260        Attrs.push_back(PAS);
261        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
262    }
263    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
264#else
265    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
266    std::vector<Type*>StructTy_0_fields;
267    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
268    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
269    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
270
271    std::vector<Type*>FuncTy_1_args;
272    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
273    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
274    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
275    FunctionType* FuncTy_1 = FunctionType::get(
276                                              /*Result=*/StructTy_0,
277                                              /*Params=*/FuncTy_1_args,
278                                              /*isVarArg=*/false);
279
280    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
281                                              std::to_string(BLOCK_SIZE));
282    if (!mFunctionUaddOverflowCarryin) {
283        mFunctionUaddOverflowCarryin = Function::Create(
284          /*Type=*/ FuncTy_1,
285          /*Linkage=*/ GlobalValue::ExternalLinkage,
286          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
287        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
288    }
289    AttributeSet mFunctionUaddOverflowCarryinPAL;
290    {
291        SmallVector<AttributeSet, 4> Attrs;
292        AttributeSet PAS;
293        {
294          AttrBuilder B;
295          B.addAttribute(Attribute::NoUnwind);
296          B.addAttribute(Attribute::ReadNone);
297          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
298        }
299
300        Attrs.push_back(PAS);
301        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
302    }
303    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
304#endif
305#endif
306
307    //Starts on process_block
308    SmallVector<AttributeSet, 4> Attrs;
309    AttributeSet PAS;
310    {
311        AttrBuilder B;
312        B.addAttribute(Attribute::ReadOnly);
313        B.addAttribute(Attribute::NoCapture);
314        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
315    }
316    Attrs.push_back(PAS);
317    {
318        AttrBuilder B;
319        B.addAttribute(Attribute::NoCapture);
320        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
321    }
322    Attrs.push_back(PAS);
323    {
324        AttrBuilder B;
325        B.addAttribute(Attribute::NoCapture);
326        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
327    }
328    Attrs.push_back(PAS);
329    {
330        AttrBuilder B;
331        B.addAttribute(Attribute::NoUnwind);
332        B.addAttribute(Attribute::UWTable);
333        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
334    }
335    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
336
337    //Create the function that will be generated.
338    mFunction = mMod->getFunction("process_block");
339    if (!mFunction) {
340        mFunction = Function::Create(
341            /*Type=*/mFunctionType,
342            /*Linkage=*/GlobalValue::ExternalLinkage,
343            /*Name=*/"process_block", mMod);
344        mFunction->setCallingConv(CallingConv::C);
345    }
346    mFunction->setAttributes(AttrSet);
347}
348   
349// CarryDataNumbering
350//
351// For each PabloBlock, a contiguous CarryData area holds carry,
352// and advance values that are generated in one block for use in the
353// next.  For a given block, the carry data area contains the
354// carries, the advances and the nested data for contained blocks,
355// if any.
356// Notes:
357//   (a) an additional data entry is created for each if-statement
358//       having more than one carry or advance opreation within it.  This
359//       additional entry is a summary entry which must be nonzero to
360//       indicate that there are carry or advance bits associated with
361//       any operation within the if-structure (at any nesting level).
362//   (b) advancing by a large amount may require multiple advance entries.
363//       the number of advance entries for an operation Adv(x, n) is
364//       (n + BLOCK_SIZE - 1) / BLOCK_SIZE
365//
366// Examine precomputes some CarryNumbering and AdvanceNumbering, as
367// well as mMaxNestingDepth of while loops.
368//
369unsigned PabloCompiler::Examine(PabloBlock & blk, unsigned carryDataIndexIn) {
370    // Count local carries and advances at this level.
371    unsigned carryDataIndex = carryDataIndexIn;
372    unsigned localCarries = 0;
373    unsigned localAdvances = 0;
374    unsigned nestedCarryDataSize = 0;
375    for (Statement * stmt : blk) {
376        if (Advance * adv = dyn_cast<Advance>(stmt)) {
377            adv->setLocalAdvanceIndex(localAdvances);
378            localAdvances += (adv->getAdvanceAmount() + BLOCK_SIZE - 1) / BLOCK_SIZE;
379        }
380        else if (MatchStar * m = dyn_cast<MatchStar>(stmt)) {
381            m->setLocalCarryIndex(localCarries);
382            ++localCarries;
383        }
384        else if (ScanThru * s = dyn_cast<ScanThru>(stmt)) {
385            s->setLocalCarryIndex(localCarries);
386            ++localCarries;
387        }
388    }
389    carryDataIndex += localCarries + localAdvances;
390    for (Statement * stmt : blk) {
391        if (Call * call = dyn_cast<Call>(stmt)) {
392            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
393        }
394        else if (If * ifStatement = dyn_cast<If>(stmt)) {
395            const auto ifCarryDataSize = Examine(ifStatement->getBody(), carryDataIndex);
396            nestedCarryDataSize += ifCarryDataSize;
397            carryDataIndex += ifCarryDataSize;
398        }
399        else if (While * whileStatement = dyn_cast<While>(stmt)) {
400            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
401            const auto whileCarryDataSize = Examine(whileStatement->getBody(), carryDataIndex);
402            --mNestingDepth;
403            nestedCarryDataSize += whileCarryDataSize;
404            carryDataIndex += whileCarryDataSize;
405        }
406    }
407    blk.setCarryIndexBase(carryDataIndexIn);
408    blk.setLocalCarryCount(localCarries);
409    blk.setLocalAdvanceCount(localAdvances);
410    unsigned totalCarryDataSize = localCarries + localAdvances + nestedCarryDataSize;
411    if (totalCarryDataSize > 1) {
412        // Need extra space for the summary variable, always the last
413        // entry within the block.
414        totalCarryDataSize += 1;
415    }
416    blk.setTotalCarryDataSize(totalCarryDataSize);
417    return totalCarryDataSize;
418}
419
420void PabloCompiler::DeclareCallFunctions() {
421    for (auto mapping : mCalleeMap) {
422        const String * callee = mapping.first;
423        //std::cerr << callee->str() << " to be declared\n";
424        auto ei = mExternalMap.find(callee->value());
425        if (ei != mExternalMap.end()) {
426            void * fn_ptr = ei->second;
427            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
428            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
429            if (LLVM_UNLIKELY(externalValue == nullptr)) {
430                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
431            }
432            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
433            mCalleeMap[callee] = externalValue;
434        }
435        else {
436            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
437        }
438    }
439}
440
441void PabloCompiler::compileBlock(const PabloBlock & blk) {
442    for (const Statement * statement : blk) {
443        compileStatement(statement);
444    }
445}
446
447
448
449
450void PabloCompiler::compileIf(const If * ifStatement) {       
451        //
452        //  The If-ElseZero stmt:
453        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
454        //  If the value of the predicate is nonzero, then determine the values of variables
455        //  <var>* by executing the given statements.  Otherwise, the value of the
456        //  variables are all zero.  Requirements: (a) no variable that is defined within
457        //  the body of the if may be accessed outside unless it is explicitly
458        //  listed in the variable list, (b) every variable in the defined list receives
459        //  a value within the body, and (c) the logical consequence of executing
460        //  the statements in the event that the predicate is zero is that the
461        //  values of all defined variables indeed work out to be 0.
462        //
463        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
464        //  is inserted for each variable in the defined variable list.  It receives
465        //  a zero value from the ifentry block and the defined value from the if
466        //  body.
467        //
468        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
469        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
470        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
471       
472        IRBuilder<> b_entry(ifEntryBlock);
473        mBasicBlock = ifEntryBlock;
474   
475        const unsigned baseCarryDataIdx = ifStatement->getBody().getCarryIndexBase();
476        const unsigned carryDataSize = ifStatement->getBody().getTotalCarryDataSize();
477        const unsigned carrySummaryIndex = baseCarryDataIdx + carryDataSize - 1;
478       
479        Value* if_test_value = compileExpression(ifStatement->getCondition());
480        if (carryDataSize > 0) {
481            // load the summary variable
482            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
483            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
484        }
485        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
486
487        // Entry processing is complete, now handle the body of the if.
488        mBasicBlock = ifBodyBlock;
489        compileBlock(ifStatement -> getBody());
490
491        // If we compiled an If or a While statement, we won't be in the same basic block as before.
492        // Create the branch from the current basic block to the end block.
493        IRBuilder<> bIfBody(mBasicBlock);
494        // After the recursive compile, now insert the code to compute the summary
495        // carry over variable.
496       
497        if (carryDataSize > 1) {
498            // If there was only one carry entry, then it also serves as the summary variable.
499            // Otherwise, we need to combine entries to compute the summary.
500            Value * carry_summary = mZeroInitializer;
501            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
502                int s = mCarryDataSummaryIdx[c];
503                if (s == -1) {
504                    Value* carryq_value = mCarryDataVector[c];
505                    if (carry_summary == mZeroInitializer) {
506                        carry_summary = carryq_value;
507                    }
508                    else {
509                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
510                    }
511                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
512                }
513            }
514            genCarryDataStore(carry_summary, carrySummaryIndex);
515        }
516        bIfBody.CreateBr(ifEndBlock);
517        //End Block
518        IRBuilder<> bEnd(ifEndBlock);
519        for (const PabloAST * node : ifStatement->getDefined()) {
520            const Assign * assign = cast<Assign>(node);
521            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
522            auto f = mMarkerMap.find(assign);
523            assert (f != mMarkerMap.end());
524            phi->addIncoming(mZeroInitializer, ifEntryBlock);
525            phi->addIncoming(f->second, mBasicBlock);
526            mMarkerMap[assign] = phi;
527        }
528        // Create the phi Node for the summary variable.
529        if (carryDataSize > 0) {
530            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
531            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
532            summary_phi->addIncoming(mCarryDataVector[carrySummaryIndex], mBasicBlock);
533            mCarryDataVector[carrySummaryIndex] = summary_phi;
534        }
535       
536        // Set the basic block to the new end block
537        mBasicBlock = ifEndBlock;
538}
539
540void PabloCompiler::compileWhile(const While * whileStatement) {
541        const unsigned baseCarryDataIdx = whileStatement->getBody().getCarryIndexBase();
542        const unsigned carryDataSize = whileStatement->getBody().getTotalCarryDataSize();
543   
544        if (mNestingDepth == 0) {
545            for (auto i = 0; i < carryDataSize; ++i) {
546                genCarryDataLoad(baseCarryDataIdx + i);
547            }
548        }
549
550        SmallVector<const Next*, 4> nextNodes;
551        for (const PabloAST * node : whileStatement->getBody()) {
552            if (isa<Next>(node)) {
553                nextNodes.push_back(cast<Next>(node));
554            }
555        }
556
557        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
558        // mCarryDataVector with the appropriate values. Although we're not actually entering a new basic
559        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
560        // will refer to the previous value.
561
562        ++mNestingDepth;
563
564        compileBlock(whileStatement->getBody());
565
566        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
567        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
568        // but works for now.
569
570        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
571        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
572        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
573
574        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
575        // may not be same one that we entered the function with.
576        IRBuilder<> bEntry(mBasicBlock);
577        bEntry.CreateBr(whileCondBlock);
578
579        // CONDITION BLOCK
580        IRBuilder<> bCond(whileCondBlock);
581        // generate phi nodes for any carry propogating instruction
582        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
583        unsigned index = 0;
584        for (index = 0; index < carryDataSize; ++index) {
585            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
586            phi->addIncoming(mCarryDataVector[baseCarryDataIdx + index], mBasicBlock);
587            mCarryDataVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
588            phiNodes[index] = phi;
589        }
590        // and for any Next nodes in the loop body
591        for (const Next * n : nextNodes) {
592            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
593            auto f = mMarkerMap.find(n->getInitial());
594            assert (f != mMarkerMap.end());
595            phi->addIncoming(f->second, mBasicBlock);
596            mMarkerMap[n->getInitial()] = phi;
597            phiNodes[index++] = phi;
598        }
599
600        mBasicBlock = whileCondBlock;
601        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
602
603        // BODY BLOCK
604        mBasicBlock = whileBodyBlock;
605        compileBlock(whileStatement->getBody());
606        // update phi nodes for any carry propogating instruction
607        IRBuilder<> bWhileBody(mBasicBlock);
608        for (index = 0; index < carryDataSize; ++index) {
609            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryDataVector[baseCarryDataIdx + index]);
610            PHINode * phi = phiNodes[index];
611            phi->addIncoming(carryOut, mBasicBlock);
612            mCarryDataVector[baseCarryDataIdx + index] = phi;
613        }
614        // and for any Next nodes in the loop body
615        for (const Next * n : nextNodes) {
616            auto f = mMarkerMap.find(n->getInitial());
617            assert (f != mMarkerMap.end());
618            PHINode * phi = phiNodes[index++];
619            phi->addIncoming(f->second, mBasicBlock);
620            mMarkerMap[n->getInitial()] = phi;
621        }
622
623        bWhileBody.CreateBr(whileCondBlock);
624
625        // EXIT BLOCK
626        mBasicBlock = whileEndBlock;
627        if (--mNestingDepth == 0) {
628            for (index = 0; index < carryDataSize; ++index) {
629                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
630            }
631        }
632 
633}
634
635void PabloCompiler::compileStatement(const Statement * stmt)
636{
637    IRBuilder<> b(mBasicBlock);
638    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
639        Value * expr = compileExpression(assign->getExpr());
640        mMarkerMap[assign] = expr;
641        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
642            SetOutputValue(expr, assign->getOutputIndex());
643        }
644    }
645    else if (const Next * next = dyn_cast<const Next>(stmt)) {
646        Value * expr = compileExpression(next->getExpr());
647        mMarkerMap[next->getInitial()] = expr;
648    }
649    else if (const If * ifStatement = dyn_cast<const If>(stmt))
650    {
651        compileIf(ifStatement);
652    }
653    else if (const While * whileStatement = dyn_cast<const While>(stmt))
654    {
655        compileWhile(whileStatement);
656    }
657    else if (const Call* call = dyn_cast<Call>(stmt)) {
658        //Call the callee once and store the result in the marker map.
659        auto mi = mMarkerMap.find(call);
660        if (mi == mMarkerMap.end()) {
661            auto ci = mCalleeMap.find(call->getCallee());
662            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
663                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
664            }
665            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
666        }
667        // return mi->second;
668    }
669    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
670        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
671        mMarkerMap[pablo_and] = expr;
672        // return expr;
673    }
674    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
675        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
676        mMarkerMap[pablo_or] = expr;
677        // return expr;
678    }
679    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
680        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
681        mMarkerMap[pablo_xor] = expr;
682        // return expr;
683    }
684    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
685        Value* ifMask = compileExpression(sel->getCondition());
686        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
687        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
688        Value * expr = b.CreateOr(ifTrue, ifFalse);
689        mMarkerMap[sel] = expr;
690        // return expr;
691    }
692    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
693        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
694        mMarkerMap[pablo_not] = expr;
695        // return expr;
696    }
697    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
698        Value* strm_value = compileExpression(adv->getExpr());
699        int shift = adv->getAdvanceAmount();
700        unsigned advance_index = adv->getLocalAdvanceIndex();
701        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
702        mMarkerMap[adv] = expr;
703        // return expr;
704    }
705    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
706    {
707        Value * marker = compileExpression(mstar->getMarker());
708        Value * cc = compileExpression(mstar->getCharClass());
709        Value * marker_and_cc = b.CreateAnd(marker, cc);
710        unsigned carry_index = mstar->getLocalCarryIndex();
711        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
712        mMarkerMap[mstar] = expr;
713        // return expr;
714    }
715    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
716    {
717        Value * marker_expr = compileExpression(sthru->getScanFrom());
718        Value * cc_expr = compileExpression(sthru->getScanThru());
719        unsigned carry_index = sthru->getLocalCarryIndex();
720        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
721        mMarkerMap[sthru] = expr;
722        // return expr;
723    }
724    else {
725        PabloPrinter::print(stmt, std::cerr);
726        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
727    }
728}
729
730Value * PabloCompiler::compileExpression(const PabloAST * expr) {
731    if (isa<Ones>(expr)) {
732        return mOneInitializer;
733    }
734    else if (isa<Zeroes>(expr)) {
735        return mZeroInitializer;
736    }
737    else if (const Next * next = dyn_cast<Next>(expr)) {
738        expr = next->getInitial();
739    }
740    auto f = mMarkerMap.find(expr);
741    if (f == mMarkerMap.end()) {
742        std::stringstream str;
743        str << "\"";
744        PabloPrinter::print(expr, str);
745        str << "\" was used before definition!";
746        throw std::runtime_error(str.str());
747    }
748    return f->second;
749}
750
751
752#ifdef USE_UADD_OVERFLOW
753#ifdef USE_TWO_UADD_OVERFLOW
754PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
755    std::vector<Value*> struct_res_params;
756    struct_res_params.push_back(int128_e1);
757    struct_res_params.push_back(int128_e2);
758    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
759    struct_res->setCallingConv(CallingConv::C);
760    struct_res->setTailCall(false);
761    AttributeSet struct_res_PAL;
762    struct_res->setAttributes(struct_res_PAL);
763
764    SumWithOverflowPack ret;
765
766    std::vector<unsigned> int128_sum_indices;
767    int128_sum_indices.push_back(0);
768    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
769
770    std::vector<unsigned> int1_obit_indices;
771    int1_obit_indices.push_back(1);
772    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
773
774    return ret;
775}
776#else
777PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
778    std::vector<Value*> struct_res_params;
779    struct_res_params.push_back(int128_e1);
780    struct_res_params.push_back(int128_e2);
781    struct_res_params.push_back(int1_cin);
782    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
783    struct_res->setCallingConv(CallingConv::C);
784    struct_res->setTailCall(false);
785    AttributeSet struct_res_PAL;
786    struct_res->setAttributes(struct_res_PAL);
787
788    SumWithOverflowPack ret;
789
790    std::vector<unsigned> int128_sum_indices;
791    int128_sum_indices.push_back(0);
792    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
793
794    std::vector<unsigned> int1_obit_indices;
795    int1_obit_indices.push_back(1);
796    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
797
798    return ret;
799}
800#endif
801#endif
802
803
804Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
805    IRBuilder<> b(mBasicBlock);
806
807    //CarryQ - carry in.
808    const int carryIdx = blk->getCarryIndexBase() + localIndex;
809    Value* carryq_value = genCarryDataLoad(carryIdx);
810#ifdef USE_TWO_UADD_OVERFLOW
811    //This is the ideal implementation, which uses two uadd.with.overflow
812    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
813    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
814    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
815    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
816
817    SumWithOverflowPack sumpack0, sumpack1;
818
819    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
820    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
821
822    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
823    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
824
825    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
826    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
827    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
828    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
829    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
830
831#elif defined USE_UADD_OVERFLOW
832    //use llvm.uadd.with.overflow.i128 or i256
833    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
834    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
835
836    //get i1 carryin from iBLOCK_SIZE
837    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
838    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
839    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
840
841    SumWithOverflowPack sumpack0;
842    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
843    Value* obit = sumpack0.obit;
844    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
845
846    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
847    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
848    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
849    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
850#elif (BLOCK_SIZE == 128)
851    //calculate carry through logical ops
852    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
853    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
854    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
855    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
856    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
857    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
858
859    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
860    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
861#else
862    //BLOCK_SIZE == 256, there is no other implementation
863    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
864#endif //USE_TWO_UADD_OVERFLOW
865
866    genCarryDataStore(carry_out, carryIdx);
867    return sum;
868}
869//#define CARRY_DEBUG
870Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
871    assert (index < mCarryDataVector.size());
872    if (mNestingDepth == 0) {
873        IRBuilder<> b(mBasicBlock);
874        mCarryDataVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
875    }
876#ifdef CARRY_DEBUG
877    genPrintRegister("carry_in_" + std::to_string(index), mCarryDataVector[index]);
878#endif
879    return mCarryDataVector[index];
880}
881
882void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
883    assert (carryOut);
884    assert (index < mCarryDataVector.size());
885    if (mNestingDepth == 0) {
886        IRBuilder<> b(mBasicBlock);
887        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
888    }
889    mCarryDataSummaryIdx[index] = -1;
890#ifdef CARRY_DEBUG
891    genPrintRegister("carry_out_" + std::to_string(index), mCarryDataVector[index]);
892#endif
893    mCarryDataVector[index] = carryOut;
894}
895
896inline Value* PabloCompiler::genBitBlockAny(Value* test) {
897    IRBuilder<> b(mBasicBlock);
898    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
899    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
900}
901
902Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
903    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
904    IRBuilder<> b(mBasicBlock);
905    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
906    Value * v = b.CreateBitCast(op, vType);
907    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
908}
909
910Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
911    IRBuilder<> b(mBasicBlock);
912    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
913    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
914}
915
916inline Value* PabloCompiler::genNot(Value* expr) {
917    IRBuilder<> b(mBasicBlock);
918    return b.CreateXor(expr, mOneInitializer, "not");
919}
920Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
921    IRBuilder<> b(mBasicBlock);
922    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
923    int block_shift = shift_amount % BLOCK_SIZE;
924    const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
925    const auto storeIdx = advanceIndex;
926    const auto loadIdx = advanceIndex + advEntries - 1;
927    Value* result_value;
928   
929    if (advEntries == 1) {
930        if (block_shift == 0) { 
931            result_value = genCarryDataLoad(loadIdx);
932            //b.CreateCall(mFunc_print_register, result_value);
933        }
934#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
935        if (block_shift == 1) {
936            Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(loadIdx));
937            Value* srli_1_value = b.CreateLShr(strm_value, 63);
938            Value* packed_shuffle;
939            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
940            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
941            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
942
943            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
944            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
945
946            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
947            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
948        }
949        else { //if (block_shift < BLOCK_SIZE) {
950            // This is the preferred logic, but is too slow for the general case.
951            // We need to speed up our custom LLVM for this code.
952            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
953            Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
954            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
955            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
956        }
957#else
958        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
959        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
960        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
961        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
962
963#endif
964    }
965    else {
966        if (block_shift == 0) {
967            result_value = genCarryDataLoad(loadIdx);
968        }
969        else { 
970            // The advance is based on the two oldest bit blocks in the advance queue.
971            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
972            Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), b.getIntNTy(BLOCK_SIZE));
973            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
974            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
975            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx));
976            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx-1));
977            //b.CreateCall(mFunc_print_register, result_value);
978        }
979        // copy entries from previous blocks forward
980        for (int i = loadIdx; i > storeIdx; i--) {
981            genCarryDataStore(genCarryDataLoad(i-1), i);
982        }
983    }
984    genCarryDataStore(strm_value, storeIdx);
985    return result_value;
986}
987
988void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
989    IRBuilder<> b(mBasicBlock);
990    if (marker->getType()->isPointerTy()) {
991        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
992    }
993    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
994    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
995    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
996}
997
998CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
999: CarryDataSize(carryDataSize)
1000, FunctionPointer(executionEngine->getPointerToFunction(function))
1001, mFunction(function)
1002, mExecutionEngine(executionEngine)
1003{
1004
1005}
1006
1007// Clean up the memory for the compiled function once we're finished using it.
1008CompiledPabloFunction::~CompiledPabloFunction() {
1009    if (mExecutionEngine) {
1010        assert (mFunction);
1011        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1012        delete mExecutionEngine;
1013    }
1014}
1015
1016}
Note: See TracBrowser for help on using the repository browser.