source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4544

Last change on this file since 4544 was 4544, checked in by cameron, 4 years ago

Tracing options; make all command line options static

File size: 44.1 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
53static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
54static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
55
56extern "C" {
57  void wrapped_print_register(char * regName, BitBlock bit_block) {
58      print_register<BitBlock>(regName, bit_block);
59  }
60}
61
62namespace pablo {
63
64PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
65: mBasisBits(basisBits)
66, mMod(new Module("icgrep", getGlobalContext()))
67, mBasicBlock(nullptr)
68, mExecutionEngine(nullptr)
69, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
70, mBasisBitsInputPtr(nullptr)
71, mCarryDataPtr(nullptr)
72, mNestingDepth(0)
73, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
74, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
75, mFunctionType(nullptr)
76, mFunction(nullptr)
77, mBasisBitsAddr(nullptr)
78, mOutputAddrPtr(nullptr)
79, mMaxNestingDepth(0)
80, mPrintRegisterFunction(nullptr)
81{
82    //Create the jit execution engine.up
83    InitializeNativeTarget();
84    InitializeNativeTargetAsmPrinter();
85    InitializeNativeTargetAsmParser();
86    DefineTypes();
87}
88
89PabloCompiler::~PabloCompiler()
90{
91
92}
93   
94void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
95    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
96}
97
98void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
99    IRBuilder <> b(mBasicBlock);
100    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
101    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
102                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
103                                                   /*isConstant=*/ true,
104                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
105                                                   /*Initializer=*/ regNameData);
106    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
107    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
108}
109
110CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
111{
112    mNestingDepth = 0;
113    mMaxNestingDepth = 0;
114    unsigned totalCarryDataSize = Examine(pb, 0); 
115    mCarryDataVector.resize(totalCarryDataSize);
116    mCarryDataSummaryIdx.resize(totalCarryDataSize);
117    std::string errMessage;
118    EngineBuilder builder(mMod);
119    builder.setErrorStr(&errMessage);
120    builder.setMCPU(sys::getHostCPUName());
121    builder.setUseMCJIT(true);
122    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
123    mExecutionEngine = builder.create();
124    if (mExecutionEngine == nullptr) {
125        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
126    }
127    DeclareFunctions();
128
129    DeclareCallFunctions();
130
131    Function::arg_iterator args = mFunction->arg_begin();
132    mBasisBitsAddr = args++;
133    mBasisBitsAddr->setName("basis_bits");
134    mCarryDataPtr = args++;
135    mCarryDataPtr->setName("carry_data");
136    mOutputAddrPtr = args++;
137    mOutputAddrPtr->setName("output");
138
139    mNestingDepth = 0;
140    mMaxNestingDepth = 0;
141    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
142
143    //The basis bits structure
144    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
145        IRBuilder<> b(mBasicBlock);
146        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
147        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
148        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
149        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
150    }
151
152    //Generate the IR instructions for the function.
153    compileBlock(pb);
154
155    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
156        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
157    }
158
159    //Terminate the block
160    ReturnInst::Create(mMod->getContext(), mBasicBlock);
161
162    //Display the IR that has been generated by this module.
163    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
164        mMod->dump();
165    }
166    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
167    verifyModule(*mMod, &dbgs());
168
169    mExecutionEngine->finalizeObject();
170
171    //Return the required size of the carry data area to the process_block function.
172    // Reserve 1 element in the carry data area for current block number (future). TODO
173    return CompiledPabloFunction((totalCarryDataSize + 1) * sizeof(BitBlock), mFunction, mExecutionEngine);
174}
175
176void PabloCompiler::DefineTypes()
177{
178    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
179    if (structBasisBits == nullptr) {
180        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
181    }
182    std::vector<Type*>StructTy_struct_Basis_bits_fields;
183    for (int i = 0; i != mBasisBits.size(); i++)
184    {
185        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
186    }
187    if (structBasisBits->isOpaque()) {
188        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
189    }
190    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
191
192    std::vector<Type*>functionTypeArgs;
193    functionTypeArgs.push_back(mBasisBitsInputPtr);
194
195    //The carry data array.
196    //A pointer to the BitBlock vector.
197    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
198
199    //The output structure.
200    StructType * outputStruct = mMod->getTypeByName("struct.Output");
201    if (!outputStruct) {
202        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
203    }
204    if (outputStruct->isOpaque()) {
205        std::vector<Type*>fields;
206        fields.push_back(mBitBlockType);
207        fields.push_back(mBitBlockType);
208        outputStruct->setBody(fields, /*isPacked=*/false);
209    }
210    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
211
212    //The &output parameter.
213    functionTypeArgs.push_back(outputStructPtr);
214
215    mFunctionType = FunctionType::get(
216     /*Result=*/Type::getVoidTy(mMod->getContext()),
217     /*Params=*/functionTypeArgs,
218     /*isVarArg=*/false);
219}
220
221void PabloCompiler::DeclareFunctions()
222{
223    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
224    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
225    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
226    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
227
228#ifdef USE_UADD_OVERFLOW
229#ifdef USE_TWO_UADD_OVERFLOW
230    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
231    std::vector<Type*>StructTy_0_fields;
232    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
233    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
234    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
235
236    std::vector<Type*>FuncTy_1_args;
237    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
238    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
239    FunctionType* FuncTy_1 = FunctionType::get(
240                                              /*Result=*/StructTy_0,
241                                              /*Params=*/FuncTy_1_args,
242                                              /*isVarArg=*/false);
243
244    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
245                                              std::to_string(BLOCK_SIZE));
246    if (!mFunctionUaddOverflow) {
247        mFunctionUaddOverflow= Function::Create(
248          /*Type=*/ FuncTy_1,
249          /*Linkage=*/ GlobalValue::ExternalLinkage,
250          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
251        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
252    }
253    AttributeSet mFunctionUaddOverflowPAL;
254    {
255        SmallVector<AttributeSet, 4> Attrs;
256        AttributeSet PAS;
257        {
258          AttrBuilder B;
259          B.addAttribute(Attribute::NoUnwind);
260          B.addAttribute(Attribute::ReadNone);
261          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
262        }
263
264        Attrs.push_back(PAS);
265        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
266    }
267    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
268#else
269    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
270    std::vector<Type*>StructTy_0_fields;
271    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
272    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
273    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
274
275    std::vector<Type*>FuncTy_1_args;
276    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
277    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
278    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
279    FunctionType* FuncTy_1 = FunctionType::get(
280                                              /*Result=*/StructTy_0,
281                                              /*Params=*/FuncTy_1_args,
282                                              /*isVarArg=*/false);
283
284    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
285                                              std::to_string(BLOCK_SIZE));
286    if (!mFunctionUaddOverflowCarryin) {
287        mFunctionUaddOverflowCarryin = Function::Create(
288          /*Type=*/ FuncTy_1,
289          /*Linkage=*/ GlobalValue::ExternalLinkage,
290          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
291        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
292    }
293    AttributeSet mFunctionUaddOverflowCarryinPAL;
294    {
295        SmallVector<AttributeSet, 4> Attrs;
296        AttributeSet PAS;
297        {
298          AttrBuilder B;
299          B.addAttribute(Attribute::NoUnwind);
300          B.addAttribute(Attribute::ReadNone);
301          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
302        }
303
304        Attrs.push_back(PAS);
305        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
306    }
307    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
308#endif
309#endif
310
311    //Starts on process_block
312    SmallVector<AttributeSet, 4> Attrs;
313    AttributeSet PAS;
314    {
315        AttrBuilder B;
316        B.addAttribute(Attribute::ReadOnly);
317        B.addAttribute(Attribute::NoCapture);
318        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
319    }
320    Attrs.push_back(PAS);
321    {
322        AttrBuilder B;
323        B.addAttribute(Attribute::NoCapture);
324        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
325    }
326    Attrs.push_back(PAS);
327    {
328        AttrBuilder B;
329        B.addAttribute(Attribute::NoCapture);
330        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
331    }
332    Attrs.push_back(PAS);
333    {
334        AttrBuilder B;
335        B.addAttribute(Attribute::NoUnwind);
336        B.addAttribute(Attribute::UWTable);
337        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
338    }
339    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
340
341    //Create the function that will be generated.
342    mFunction = mMod->getFunction("process_block");
343    if (!mFunction) {
344        mFunction = Function::Create(
345            /*Type=*/mFunctionType,
346            /*Linkage=*/GlobalValue::ExternalLinkage,
347            /*Name=*/"process_block", mMod);
348        mFunction->setCallingConv(CallingConv::C);
349    }
350    mFunction->setAttributes(AttrSet);
351}
352   
353// CarryDataNumbering
354//
355// For each PabloBlock, a contiguous CarryData area holds carry,
356// and advance values that are generated in one block for use in the
357// next.  For a given block, the carry data area contains the
358// carries, the advances and the nested data for contained blocks,
359// if any.
360// Notes:
361//   (a) an additional data entry is created for each if-statement
362//       having more than one carry or advance opreation within it.  This
363//       additional entry is a summary entry which must be nonzero to
364//       indicate that there are carry or advance bits associated with
365//       any operation within the if-structure (at any nesting level).
366//   (b) advancing by a large amount may require multiple advance entries.
367//       the number of advance entries for an operation Adv(x, n) is
368//       (n + BLOCK_SIZE - 1) / BLOCK_SIZE
369//
370// Examine precomputes some CarryNumbering and AdvanceNumbering, as
371// well as mMaxNestingDepth of while loops.
372//
373unsigned PabloCompiler::Examine(PabloBlock & blk, unsigned carryDataIndexIn) {
374    // Count local carries and advances at this level.
375    unsigned carryDataIndex = carryDataIndexIn;
376    unsigned localCarries = 0;
377    unsigned localAdvances = 0;
378    unsigned nestedCarryDataSize = 0;
379    for (Statement * stmt : blk) {
380        if (Advance * adv = dyn_cast<Advance>(stmt)) {
381            adv->setLocalAdvanceIndex(localAdvances);
382            localAdvances += (adv->getAdvanceAmount() + BLOCK_SIZE - 1) / BLOCK_SIZE;
383        }
384        else if (MatchStar * m = dyn_cast<MatchStar>(stmt)) {
385            m->setLocalCarryIndex(localCarries);
386            ++localCarries;
387        }
388        else if (ScanThru * s = dyn_cast<ScanThru>(stmt)) {
389            s->setLocalCarryIndex(localCarries);
390            ++localCarries;
391        }
392    }
393    carryDataIndex += localCarries + localAdvances;
394    for (Statement * stmt : blk) {
395        if (Call * call = dyn_cast<Call>(stmt)) {
396            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
397        }
398        else if (If * ifStatement = dyn_cast<If>(stmt)) {
399            const auto ifCarryDataSize = Examine(ifStatement->getBody(), carryDataIndex);
400            nestedCarryDataSize += ifCarryDataSize;
401            carryDataIndex += ifCarryDataSize;
402        }
403        else if (While * whileStatement = dyn_cast<While>(stmt)) {
404            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
405            const auto whileCarryDataSize = Examine(whileStatement->getBody(), carryDataIndex);
406            --mNestingDepth;
407            nestedCarryDataSize += whileCarryDataSize;
408            carryDataIndex += whileCarryDataSize;
409        }
410    }
411    blk.setCarryIndexBase(carryDataIndexIn);
412    blk.setLocalCarryCount(localCarries);
413    blk.setLocalAdvanceCount(localAdvances);
414    unsigned totalCarryDataSize = localCarries + localAdvances + nestedCarryDataSize;
415    if (totalCarryDataSize > 1) {
416        // Need extra space for the summary variable, always the last
417        // entry within the block.
418        totalCarryDataSize += 1;
419    }
420    blk.setTotalCarryDataSize(totalCarryDataSize);
421    return totalCarryDataSize;
422}
423
424void PabloCompiler::DeclareCallFunctions() {
425    for (auto mapping : mCalleeMap) {
426        const String * callee = mapping.first;
427        //std::cerr << callee->str() << " to be declared\n";
428        auto ei = mExternalMap.find(callee->value());
429        if (ei != mExternalMap.end()) {
430            void * fn_ptr = ei->second;
431            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
432            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
433            if (LLVM_UNLIKELY(externalValue == nullptr)) {
434                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
435            }
436            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
437            mCalleeMap[callee] = externalValue;
438        }
439        else {
440            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
441        }
442    }
443}
444
445void PabloCompiler::compileBlock(const PabloBlock & blk) {
446    for (const Statement * statement : blk) {
447        compileStatement(statement);
448    }
449}
450
451
452
453
454void PabloCompiler::compileIf(const If * ifStatement) {       
455        //
456        //  The If-ElseZero stmt:
457        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
458        //  If the value of the predicate is nonzero, then determine the values of variables
459        //  <var>* by executing the given statements.  Otherwise, the value of the
460        //  variables are all zero.  Requirements: (a) no variable that is defined within
461        //  the body of the if may be accessed outside unless it is explicitly
462        //  listed in the variable list, (b) every variable in the defined list receives
463        //  a value within the body, and (c) the logical consequence of executing
464        //  the statements in the event that the predicate is zero is that the
465        //  values of all defined variables indeed work out to be 0.
466        //
467        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
468        //  is inserted for each variable in the defined variable list.  It receives
469        //  a zero value from the ifentry block and the defined value from the if
470        //  body.
471        //
472        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
473        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
474        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
475       
476        IRBuilder<> b_entry(ifEntryBlock);
477        mBasicBlock = ifEntryBlock;
478   
479        const unsigned baseCarryDataIdx = ifStatement->getBody().getCarryIndexBase();
480        const unsigned carryDataSize = ifStatement->getBody().getTotalCarryDataSize();
481        const unsigned carrySummaryIndex = baseCarryDataIdx + carryDataSize - 1;
482       
483        Value* if_test_value = compileExpression(ifStatement->getCondition());
484        if (carryDataSize > 0) {
485            // load the summary variable
486            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
487            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
488        }
489        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
490
491        // Entry processing is complete, now handle the body of the if.
492        mBasicBlock = ifBodyBlock;
493        compileBlock(ifStatement -> getBody());
494
495        // If we compiled an If or a While statement, we won't be in the same basic block as before.
496        // Create the branch from the current basic block to the end block.
497        IRBuilder<> bIfBody(mBasicBlock);
498        // After the recursive compile, now insert the code to compute the summary
499        // carry over variable.
500       
501        if (carryDataSize > 1) {
502            // If there was only one carry entry, then it also serves as the summary variable.
503            // Otherwise, we need to combine entries to compute the summary.
504            Value * carry_summary = mZeroInitializer;
505            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
506                int s = mCarryDataSummaryIdx[c];
507                if (s == -1) {
508                    Value* carryq_value = mCarryDataVector[c];
509                    if (carry_summary == mZeroInitializer) {
510                        carry_summary = carryq_value;
511                    }
512                    else {
513                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
514                    }
515                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
516                }
517            }
518            genCarryDataStore(carry_summary, carrySummaryIndex);
519        }
520        bIfBody.CreateBr(ifEndBlock);
521        //End Block
522        IRBuilder<> bEnd(ifEndBlock);
523        for (const PabloAST * node : ifStatement->getDefined()) {
524            const Assign * assign = cast<Assign>(node);
525            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
526            auto f = mMarkerMap.find(assign);
527            assert (f != mMarkerMap.end());
528            phi->addIncoming(mZeroInitializer, ifEntryBlock);
529            phi->addIncoming(f->second, mBasicBlock);
530            mMarkerMap[assign] = phi;
531        }
532        // Create the phi Node for the summary variable.
533        if (carryDataSize > 0) {
534            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
535            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
536            summary_phi->addIncoming(mCarryDataVector[carrySummaryIndex], mBasicBlock);
537            mCarryDataVector[carrySummaryIndex] = summary_phi;
538        }
539       
540        // Set the basic block to the new end block
541        mBasicBlock = ifEndBlock;
542}
543
544void PabloCompiler::compileWhile(const While * whileStatement) {
545        const unsigned baseCarryDataIdx = whileStatement->getBody().getCarryIndexBase();
546        const unsigned carryDataSize = whileStatement->getBody().getTotalCarryDataSize();
547   
548        if (mNestingDepth == 0) {
549            for (auto i = 0; i < carryDataSize; ++i) {
550                genCarryDataLoad(baseCarryDataIdx + i);
551            }
552        }
553
554        SmallVector<const Next*, 4> nextNodes;
555        for (const PabloAST * node : whileStatement->getBody()) {
556            if (isa<Next>(node)) {
557                nextNodes.push_back(cast<Next>(node));
558            }
559        }
560
561        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
562        // mCarryDataVector with the appropriate values. Although we're not actually entering a new basic
563        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
564        // will refer to the previous value.
565
566        ++mNestingDepth;
567
568        compileBlock(whileStatement->getBody());
569
570        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
571        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
572        // but works for now.
573
574        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
575        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
576        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
577
578        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
579        // may not be same one that we entered the function with.
580        IRBuilder<> bEntry(mBasicBlock);
581        bEntry.CreateBr(whileCondBlock);
582
583        // CONDITION BLOCK
584        IRBuilder<> bCond(whileCondBlock);
585        // generate phi nodes for any carry propogating instruction
586        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
587        unsigned index = 0;
588        for (index = 0; index < carryDataSize; ++index) {
589            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
590            phi->addIncoming(mCarryDataVector[baseCarryDataIdx + index], mBasicBlock);
591            mCarryDataVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
592            phiNodes[index] = phi;
593        }
594        // and for any Next nodes in the loop body
595        for (const Next * n : nextNodes) {
596            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
597            auto f = mMarkerMap.find(n->getInitial());
598            assert (f != mMarkerMap.end());
599            phi->addIncoming(f->second, mBasicBlock);
600            mMarkerMap[n->getInitial()] = phi;
601            phiNodes[index++] = phi;
602        }
603
604        mBasicBlock = whileCondBlock;
605        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
606
607        // BODY BLOCK
608        mBasicBlock = whileBodyBlock;
609        compileBlock(whileStatement->getBody());
610        // update phi nodes for any carry propogating instruction
611        IRBuilder<> bWhileBody(mBasicBlock);
612        for (index = 0; index < carryDataSize; ++index) {
613            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryDataVector[baseCarryDataIdx + index]);
614            PHINode * phi = phiNodes[index];
615            phi->addIncoming(carryOut, mBasicBlock);
616            mCarryDataVector[baseCarryDataIdx + index] = phi;
617        }
618        // and for any Next nodes in the loop body
619        for (const Next * n : nextNodes) {
620            auto f = mMarkerMap.find(n->getInitial());
621            assert (f != mMarkerMap.end());
622            PHINode * phi = phiNodes[index++];
623            phi->addIncoming(f->second, mBasicBlock);
624            mMarkerMap[n->getInitial()] = phi;
625        }
626
627        bWhileBody.CreateBr(whileCondBlock);
628
629        // EXIT BLOCK
630        mBasicBlock = whileEndBlock;
631        if (--mNestingDepth == 0) {
632            for (index = 0; index < carryDataSize; ++index) {
633                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
634            }
635        }
636 
637}
638
639void PabloCompiler::compileStatement(const Statement * stmt)
640{
641    IRBuilder<> b(mBasicBlock);
642    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
643        Value * expr = compileExpression(assign->getExpr());
644        if (DumpTrace) {
645            genPrintRegister(assign->getName()->to_string(), expr);
646        }
647        mMarkerMap[assign] = expr;
648        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
649            SetOutputValue(expr, assign->getOutputIndex());
650        }
651    }
652    else if (const Next * next = dyn_cast<const Next>(stmt)) {
653        Value * expr = compileExpression(next->getExpr());
654        if (TraceNext) {
655            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
656        }
657        mMarkerMap[next->getInitial()] = expr;
658    }
659    else if (const If * ifStatement = dyn_cast<const If>(stmt))
660    {
661        compileIf(ifStatement);
662    }
663    else if (const While * whileStatement = dyn_cast<const While>(stmt))
664    {
665        compileWhile(whileStatement);
666    }
667    else if (const Call* call = dyn_cast<Call>(stmt)) {
668        //Call the callee once and store the result in the marker map.
669        auto mi = mMarkerMap.find(call);
670        if (mi == mMarkerMap.end()) {
671            auto ci = mCalleeMap.find(call->getCallee());
672            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
673                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
674            }
675            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
676        }
677        // return mi->second;
678    }
679    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
680        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
681        mMarkerMap[pablo_and] = expr;
682        // return expr;
683    }
684    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
685        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
686        mMarkerMap[pablo_or] = expr;
687        // return expr;
688    }
689    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
690        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
691        mMarkerMap[pablo_xor] = expr;
692        // return expr;
693    }
694    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
695        Value* ifMask = compileExpression(sel->getCondition());
696        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
697        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
698        Value * expr = b.CreateOr(ifTrue, ifFalse);
699        mMarkerMap[sel] = expr;
700        // return expr;
701    }
702    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
703        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
704        mMarkerMap[pablo_not] = expr;
705        // return expr;
706    }
707    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
708        Value* strm_value = compileExpression(adv->getExpr());
709        int shift = adv->getAdvanceAmount();
710        unsigned advance_index = adv->getLocalAdvanceIndex();
711        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
712        mMarkerMap[adv] = expr;
713        // return expr;
714    }
715    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
716    {
717        Value * marker = compileExpression(mstar->getMarker());
718        Value * cc = compileExpression(mstar->getCharClass());
719        Value * marker_and_cc = b.CreateAnd(marker, cc);
720        unsigned carry_index = mstar->getLocalCarryIndex();
721        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
722        mMarkerMap[mstar] = expr;
723        // return expr;
724    }
725    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
726    {
727        Value * marker_expr = compileExpression(sthru->getScanFrom());
728        Value * cc_expr = compileExpression(sthru->getScanThru());
729        unsigned carry_index = sthru->getLocalCarryIndex();
730        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
731        mMarkerMap[sthru] = expr;
732        // return expr;
733    }
734    else {
735        PabloPrinter::print(stmt, std::cerr);
736        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
737    }
738}
739
740Value * PabloCompiler::compileExpression(const PabloAST * expr) {
741    if (isa<Ones>(expr)) {
742        return mOneInitializer;
743    }
744    else if (isa<Zeroes>(expr)) {
745        return mZeroInitializer;
746    }
747    else if (const Next * next = dyn_cast<Next>(expr)) {
748        expr = next->getInitial();
749    }
750    auto f = mMarkerMap.find(expr);
751    if (f == mMarkerMap.end()) {
752        std::stringstream str;
753        str << "\"";
754        PabloPrinter::print(expr, str);
755        str << "\" was used before definition!";
756        throw std::runtime_error(str.str());
757    }
758    return f->second;
759}
760
761
762#ifdef USE_UADD_OVERFLOW
763#ifdef USE_TWO_UADD_OVERFLOW
764PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
765    std::vector<Value*> struct_res_params;
766    struct_res_params.push_back(int128_e1);
767    struct_res_params.push_back(int128_e2);
768    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
769    struct_res->setCallingConv(CallingConv::C);
770    struct_res->setTailCall(false);
771    AttributeSet struct_res_PAL;
772    struct_res->setAttributes(struct_res_PAL);
773
774    SumWithOverflowPack ret;
775
776    std::vector<unsigned> int128_sum_indices;
777    int128_sum_indices.push_back(0);
778    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
779
780    std::vector<unsigned> int1_obit_indices;
781    int1_obit_indices.push_back(1);
782    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
783
784    return ret;
785}
786#else
787PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
788    std::vector<Value*> struct_res_params;
789    struct_res_params.push_back(int128_e1);
790    struct_res_params.push_back(int128_e2);
791    struct_res_params.push_back(int1_cin);
792    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
793    struct_res->setCallingConv(CallingConv::C);
794    struct_res->setTailCall(false);
795    AttributeSet struct_res_PAL;
796    struct_res->setAttributes(struct_res_PAL);
797
798    SumWithOverflowPack ret;
799
800    std::vector<unsigned> int128_sum_indices;
801    int128_sum_indices.push_back(0);
802    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
803
804    std::vector<unsigned> int1_obit_indices;
805    int1_obit_indices.push_back(1);
806    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
807
808    return ret;
809}
810#endif
811#endif
812
813
814Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
815    IRBuilder<> b(mBasicBlock);
816
817    //CarryQ - carry in.
818    const int carryIdx = blk->getCarryIndexBase() + localIndex;
819    Value* carryq_value = genCarryDataLoad(carryIdx);
820#ifdef USE_TWO_UADD_OVERFLOW
821    //This is the ideal implementation, which uses two uadd.with.overflow
822    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
823    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
824    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
825    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
826
827    SumWithOverflowPack sumpack0, sumpack1;
828
829    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
830    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
831
832    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
833    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
834
835    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
836    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
837    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
838    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
839    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
840
841#elif defined USE_UADD_OVERFLOW
842    //use llvm.uadd.with.overflow.i128 or i256
843    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
844    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
845
846    //get i1 carryin from iBLOCK_SIZE
847    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
848    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
849    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
850
851    SumWithOverflowPack sumpack0;
852    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
853    Value* obit = sumpack0.obit;
854    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
855
856    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
857    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
858    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
859    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
860#elif (BLOCK_SIZE == 128)
861    //calculate carry through logical ops
862    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
863    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
864    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
865    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
866    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
867    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
868
869    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
870    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
871#else
872    //BLOCK_SIZE == 256, there is no other implementation
873    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
874#endif //USE_TWO_UADD_OVERFLOW
875
876    genCarryDataStore(carry_out, carryIdx);
877    return sum;
878}
879//#define CARRY_DEBUG
880Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
881    assert (index < mCarryDataVector.size());
882    if (mNestingDepth == 0) {
883        IRBuilder<> b(mBasicBlock);
884        mCarryDataVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
885    }
886#ifdef CARRY_DEBUG
887    genPrintRegister("carry_in_" + std::to_string(index), mCarryDataVector[index]);
888#endif
889    return mCarryDataVector[index];
890}
891
892void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
893    assert (carryOut);
894    assert (index < mCarryDataVector.size());
895    if (mNestingDepth == 0) {
896        IRBuilder<> b(mBasicBlock);
897        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
898    }
899    mCarryDataSummaryIdx[index] = -1;
900#ifdef CARRY_DEBUG
901    genPrintRegister("carry_out_" + std::to_string(index), mCarryDataVector[index]);
902#endif
903    mCarryDataVector[index] = carryOut;
904}
905
906inline Value* PabloCompiler::genBitBlockAny(Value* test) {
907    IRBuilder<> b(mBasicBlock);
908    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
909    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
910}
911
912Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
913    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
914    IRBuilder<> b(mBasicBlock);
915    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
916    Value * v = b.CreateBitCast(op, vType);
917    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
918}
919
920Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
921    IRBuilder<> b(mBasicBlock);
922    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
923    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
924}
925
926inline Value* PabloCompiler::genNot(Value* expr) {
927    IRBuilder<> b(mBasicBlock);
928    return b.CreateXor(expr, mOneInitializer, "not");
929}
930Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
931    IRBuilder<> b(mBasicBlock);
932    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
933    int block_shift = shift_amount % BLOCK_SIZE;
934    const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
935    const auto storeIdx = advanceIndex;
936    const auto loadIdx = advanceIndex + advEntries - 1;
937    Value* result_value;
938   
939    if (advEntries == 1) {
940        if (block_shift == 0) { 
941            result_value = genCarryDataLoad(loadIdx);
942            //b.CreateCall(mFunc_print_register, result_value);
943        }
944#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
945        if (block_shift == 1) {
946            Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(loadIdx));
947            Value* srli_1_value = b.CreateLShr(strm_value, 63);
948            Value* packed_shuffle;
949            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
950            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
951            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
952
953            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
954            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
955
956            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
957            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
958        }
959        else { //if (block_shift < BLOCK_SIZE) {
960            // This is the preferred logic, but is too slow for the general case.
961            // We need to speed up our custom LLVM for this code.
962            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
963            Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
964            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
965            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
966        }
967#else
968        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
969        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
970        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
971        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
972
973#endif
974    }
975    else {
976        if (block_shift == 0) {
977            result_value = genCarryDataLoad(loadIdx);
978        }
979        else { 
980            // The advance is based on the two oldest bit blocks in the advance queue.
981            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
982            Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), b.getIntNTy(BLOCK_SIZE));
983            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
984            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
985            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx));
986            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx-1));
987            //b.CreateCall(mFunc_print_register, result_value);
988        }
989        // copy entries from previous blocks forward
990        for (int i = loadIdx; i > storeIdx; i--) {
991            genCarryDataStore(genCarryDataLoad(i-1), i);
992        }
993    }
994    genCarryDataStore(strm_value, storeIdx);
995    return result_value;
996}
997
998void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
999    IRBuilder<> b(mBasicBlock);
1000    if (marker->getType()->isPointerTy()) {
1001        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1002    }
1003    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1004    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1005    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1006}
1007
1008CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1009: CarryDataSize(carryDataSize)
1010, FunctionPointer(executionEngine->getPointerToFunction(function))
1011, mFunction(function)
1012, mExecutionEngine(executionEngine)
1013{
1014
1015}
1016
1017// Clean up the memory for the compiled function once we're finished using it.
1018CompiledPabloFunction::~CompiledPabloFunction() {
1019    if (mExecutionEngine) {
1020        assert (mFunction);
1021        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1022        delete mExecutionEngine;
1023    }
1024}
1025
1026}
Note: See TracBrowser for help on using the repository browser.