source: icGREP/icgrep-devel/icgrep-1.00/pablo/pablo_compiler.cpp @ 4529

Last change on this file since 4529 was 4516, checked in by nmedfort, 4 years ago

More memory leak fixes. All known leaks accounted for.

File size: 48.0 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7/*
8 *  Copyright (c) 2014 International Characters.
9 *  This software is licensed to the public under the Open Software License 3.0.
10 *  icgrep is a trademark of International Characters.
11 */
12
13#include <pablo/pablo_compiler.h>
14#include <pablo/codegenstate.h>
15#include <pablo/printer_pablos.h>
16#include <cc/cc_namemap.hpp>
17#include <re/re_name.h>
18#include <stdexcept>
19#include <include/simd-lib/bitblock.hpp>
20#include <sstream>
21
22#ifdef USE_LLVM_3_4
23#include <llvm/Analysis/Verifier.h>
24#include <llvm/Assembly/PrintModulePass.h>
25#include <llvm/Linker.h>
26#endif
27#ifdef USE_LLVM_3_5
28#include <llvm/IR/Verifier.h>
29#endif
30
31#include <llvm/Pass.h>
32#include <llvm/PassManager.h>
33#include <llvm/ADT/SmallVector.h>
34#include <llvm/Analysis/Passes.h>
35#include <llvm/IR/BasicBlock.h>
36#include <llvm/IR/CallingConv.h>
37#include <llvm/IR/Constants.h>
38#include <llvm/IR/DataLayout.h>
39#include <llvm/IR/DerivedTypes.h>
40#include <llvm/IR/Function.h>
41#include <llvm/IR/GlobalVariable.h>
42#include <llvm/IR/InlineAsm.h>
43#include <llvm/IR/Instructions.h>
44#include <llvm/IR/LLVMContext.h>
45#include <llvm/IR/Module.h>
46#include <llvm/Support/FormattedStream.h>
47#include <llvm/Support/MathExtras.h>
48#include <llvm/Support/Casting.h>
49#include <llvm/Support/Compiler.h>
50#include <llvm/Support/Debug.h>
51#include <llvm/Support/TargetSelect.h>
52#include <llvm/Support/Host.h>
53#include <llvm/Transforms/Scalar.h>
54#include <llvm/ExecutionEngine/ExecutionEngine.h>
55#include <llvm/ExecutionEngine/MCJIT.h>
56#include <llvm/IRReader/IRReader.h>
57#include <llvm/Bitcode/ReaderWriter.h>
58#include <llvm/Support/MemoryBuffer.h>
59#include <llvm/IR/IRBuilder.h>
60#include <llvm/Support/CommandLine.h>
61#include <llvm/ADT/Twine.h>
62#include <iostream>
63
64cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
65static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
66
67extern "C" {
68  void wrapped_print_register(BitBlock bit_block) {
69      print_register<BitBlock>("", bit_block);
70  }
71}
72
73namespace pablo {
74
75PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
76: mBasisBits(basisBits)
77, mMod(new Module("icgrep", getGlobalContext()))
78, mBasicBlock(nullptr)
79, mExecutionEngine(nullptr)
80, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
81, mBasisBitsInputPtr(nullptr)
82, mCarryQueueIdx(0)
83, mCarryQueuePtr(nullptr)
84, mNestingDepth(0)
85, mCarryQueueSize(0)
86, mAdvanceQueueIdx(0)
87, mAdvanceQueuePtr(nullptr)
88, mAdvanceQueueSize(0)
89, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
90, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
91, mFunctionType(nullptr)
92, mFunction(nullptr)
93, mBasisBitsAddr(nullptr)
94, mOutputAddrPtr(nullptr)
95, mMaxNestingDepth(0)
96, mPrintRegisterFunction(nullptr)
97{
98    //Create the jit execution engine.up
99    InitializeNativeTarget();
100    InitializeNativeTargetAsmPrinter();
101    InitializeNativeTargetAsmParser();
102    DefineTypes();
103}
104
105PabloCompiler::~PabloCompiler()
106{
107
108}
109   
110void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
111    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
112}
113
114
115CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
116{
117    mNestingDepth = 0;
118    mMaxNestingDepth = 0;
119    mCarryQueueSize = 0;
120    mAdvanceQueueSize = 0;
121    Examine(pb.statements());
122    mCarryQueueVector.resize(mCarryQueueSize);
123    mAdvanceQueueVector.resize(mAdvanceQueueSize);
124    mCarryQueueSummaryIdx.resize(mCarryQueueSize);
125    mAdvanceQueueSummaryIdx.resize(mAdvanceQueueSize);
126    std::string errMessage;
127    EngineBuilder builder(mMod);
128    builder.setErrorStr(&errMessage);
129    builder.setMCPU(sys::getHostCPUName());
130    builder.setUseMCJIT(true);
131    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
132    mExecutionEngine = builder.create();
133    if (mExecutionEngine == nullptr) {
134        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
135    }
136    DeclareFunctions();
137
138    DeclareCallFunctions();
139
140    Function::arg_iterator args = mFunction->arg_begin();
141    mBasisBitsAddr = args++;
142    mBasisBitsAddr->setName("basis_bits");
143    mCarryQueuePtr = args++;
144    mCarryQueuePtr->setName("carry_q");
145    mAdvanceQueuePtr = args++;
146    mAdvanceQueuePtr->setName("advance_q");
147    mOutputAddrPtr = args++;
148    mOutputAddrPtr->setName("output");
149
150    //Create the carry and advance queues.
151    mCarryQueueIdx = 0;
152    mAdvanceQueueIdx = 0;
153    mNestingDepth = 0;
154    mMaxNestingDepth = 0;
155    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
156
157    //The basis bits structure
158    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
159        IRBuilder<> b(mBasicBlock);
160        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
161        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
162        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
163        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
164    }
165
166    //Generate the IR instructions for the function.
167    compileStatements(pb.statements());
168
169    if (LLVM_UNLIKELY(mCarryQueueIdx != mCarryQueueSize)) {
170        throw std::runtime_error("Actual carry queue size (" + std::to_string(mCarryQueueIdx) + ") does not match expected (" + std::to_string(mCarryQueueSize) + ")");
171    }
172    if (LLVM_UNLIKELY(mAdvanceQueueIdx != mAdvanceQueueSize)) {
173        throw std::runtime_error("Actual advance queue size (" + std::to_string(mAdvanceQueueIdx) + ") does not match expected (" + std::to_string(mAdvanceQueueSize) + ")");
174    }
175    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
176        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
177    }
178
179    //Terminate the block
180    ReturnInst::Create(mMod->getContext(), mBasicBlock);
181
182    //Display the IR that has been generated by this module.
183    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
184        mMod->dump();
185    }
186    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
187    #ifdef USE_LLVM_3_5
188    verifyModule(*mMod, &dbgs());
189    #endif
190    #ifdef USE_LLVM_3_4
191    verifyModule(*mMod, PrintMessageAction);
192    #endif
193
194    mExecutionEngine->finalizeObject();
195
196    //Return the required size of the carry queue and a pointer to the process_block function.
197    return CompiledPabloFunction(mCarryQueueSize, mAdvanceQueueSize, mFunction, mExecutionEngine);
198}
199
200void PabloCompiler::DefineTypes()
201{
202    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
203    if (structBasisBits == nullptr) {
204        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
205    }
206    std::vector<Type*>StructTy_struct_Basis_bits_fields;
207    for (int i = 0; i != mBasisBits.size(); i++)
208    {
209        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
210    }
211    if (structBasisBits->isOpaque()) {
212        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
213    }
214    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
215
216    std::vector<Type*>functionTypeArgs;
217    functionTypeArgs.push_back(mBasisBitsInputPtr);
218
219    //The carry q array.
220    //A pointer to the BitBlock vector.
221    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
222    // Advance q array
223    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
224
225    //The output structure.
226    StructType * outputStruct = mMod->getTypeByName("struct.Output");
227    if (!outputStruct) {
228        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
229    }
230    if (outputStruct->isOpaque()) {
231        std::vector<Type*>fields;
232        fields.push_back(mBitBlockType);
233        fields.push_back(mBitBlockType);
234        outputStruct->setBody(fields, /*isPacked=*/false);
235    }
236    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
237
238    //The &output parameter.
239    functionTypeArgs.push_back(outputStructPtr);
240
241    mFunctionType = FunctionType::get(
242     /*Result=*/Type::getVoidTy(mMod->getContext()),
243     /*Params=*/functionTypeArgs,
244     /*isVarArg=*/false);
245}
246
247void PabloCompiler::DeclareFunctions()
248{
249    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
250    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mBitBlockType, NULL);
251    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
252    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
253
254#ifdef USE_UADD_OVERFLOW
255#ifdef USE_TWO_UADD_OVERFLOW
256    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
257    std::vector<Type*>StructTy_0_fields;
258    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
259    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
260    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
261
262    std::vector<Type*>FuncTy_1_args;
263    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
264    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
265    FunctionType* FuncTy_1 = FunctionType::get(
266                                              /*Result=*/StructTy_0,
267                                              /*Params=*/FuncTy_1_args,
268                                              /*isVarArg=*/false);
269
270    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
271                                              std::to_string(BLOCK_SIZE));
272    if (!mFunctionUaddOverflow) {
273        mFunctionUaddOverflow= Function::Create(
274          /*Type=*/ FuncTy_1,
275          /*Linkage=*/ GlobalValue::ExternalLinkage,
276          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
277        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
278    }
279    AttributeSet mFunctionUaddOverflowPAL;
280    {
281        SmallVector<AttributeSet, 4> Attrs;
282        AttributeSet PAS;
283        {
284          AttrBuilder B;
285          B.addAttribute(Attribute::NoUnwind);
286          B.addAttribute(Attribute::ReadNone);
287          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
288        }
289
290        Attrs.push_back(PAS);
291        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
292    }
293    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
294#else
295    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
296    std::vector<Type*>StructTy_0_fields;
297    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
298    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
299    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
300
301    std::vector<Type*>FuncTy_1_args;
302    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
303    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
304    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
305    FunctionType* FuncTy_1 = FunctionType::get(
306                                              /*Result=*/StructTy_0,
307                                              /*Params=*/FuncTy_1_args,
308                                              /*isVarArg=*/false);
309
310    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
311                                              std::to_string(BLOCK_SIZE));
312    if (!mFunctionUaddOverflowCarryin) {
313        mFunctionUaddOverflowCarryin = Function::Create(
314          /*Type=*/ FuncTy_1,
315          /*Linkage=*/ GlobalValue::ExternalLinkage,
316          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
317        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
318    }
319    AttributeSet mFunctionUaddOverflowCarryinPAL;
320    {
321        SmallVector<AttributeSet, 4> Attrs;
322        AttributeSet PAS;
323        {
324          AttrBuilder B;
325          B.addAttribute(Attribute::NoUnwind);
326          B.addAttribute(Attribute::ReadNone);
327          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
328        }
329
330        Attrs.push_back(PAS);
331        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
332    }
333    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
334#endif
335#endif
336
337    //Starts on process_block
338    SmallVector<AttributeSet, 5> Attrs;
339    AttributeSet PAS;
340    {
341        AttrBuilder B;
342        B.addAttribute(Attribute::ReadOnly);
343        B.addAttribute(Attribute::NoCapture);
344        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
345    }
346    Attrs.push_back(PAS);
347    {
348        AttrBuilder B;
349        B.addAttribute(Attribute::NoCapture);
350        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
351    }
352    Attrs.push_back(PAS);
353    {
354        AttrBuilder B;
355        B.addAttribute(Attribute::NoCapture);
356        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
357    }
358    Attrs.push_back(PAS);
359    {
360        AttrBuilder B;
361        B.addAttribute(Attribute::NoCapture);
362        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
363    }
364    Attrs.push_back(PAS);
365    {
366        AttrBuilder B;
367        B.addAttribute(Attribute::NoUnwind);
368        B.addAttribute(Attribute::UWTable);
369        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
370    }
371    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
372
373    //Create the function that will be generated.
374    mFunction = mMod->getFunction("process_block");
375    if (!mFunction) {
376        mFunction = Function::Create(
377            /*Type=*/mFunctionType,
378            /*Linkage=*/GlobalValue::ExternalLinkage,
379            /*Name=*/"process_block", mMod);
380        mFunction->setCallingConv(CallingConv::C);
381    }
382    mFunction->setAttributes(AttrSet);
383}
384
385//
386// CarryNumbering: sequential numbers associated with each
387// carry-generating operation encountered in a traversal of the
388// Pablo AST.    Carry-generating operations are MatchStar, ScanThru,
389// and so on.
390// AdvanceNumbering: sequential numbers associated with each Advance
391// operation encountered in tree traversal, with the following modifications.
392//   (a) an additional AdvanceQueue entry is created for each if-statement
393//       having more than one carry or advance opreation within it.  This
394//       additional entry is a summary entry which must be nonzero to
395//       indicate that there are carry or advance bits associated with
396//       any operation within the if-structure (at any nesting level).
397//   (b) advancing by a large amount may require multiple advance entries.
398//       the number of advance entries for an operation Adv(x, n) is
399//       (n - 1) / BLOCK_SIZE + 1
400//
401// Note that the initial carry/advance numbering is determined by the
402// Examine function.  The values determined at this stage must be consistent
403// with the later numbering calculated during actual statement compilation.
404//
405// Examine precomputes some CarryNumbering and AdvanceNumbering, as
406// well as mMaxNestingDepth of while loops.
407//
408void PabloCompiler::Examine(StatementList & stmts) {
409    for (Statement * stmt : stmts) {
410
411        if (Advance * adv = dyn_cast<Advance>(stmt)) {
412            mAdvanceQueueSize += (((adv->getAdvanceAmount() - 1) / BLOCK_SIZE) + 1);
413        }
414        else if (isa<MatchStar>(stmt) || isa<ScanThru>(stmt)) {
415            ++mCarryQueueSize;
416        }
417        if (Call * call = dyn_cast<Call>(stmt)) {
418            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
419        }
420        else if (If * ifStatement = dyn_cast<If>(stmt)) {
421            const auto preIfCarryCount = mCarryQueueSize;
422            const auto preIfAdvanceCount = mAdvanceQueueSize;
423            Examine(ifStatement->getBody());
424            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
425            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
426            if ((ifCarryCount + ifAdvanceCount) > 1) {
427              ++mAdvanceQueueSize;
428              ++ifAdvanceCount;
429            }
430            ifStatement->setInclusiveCarryCount(ifCarryCount);
431            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
432        }
433        else if (While * whileStatement = dyn_cast<While>(stmt)) {
434            const auto preWhileCarryCount = mCarryQueueSize;
435            const auto preWhileAdvanceCount = mAdvanceQueueSize;
436            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
437            Examine(whileStatement->getBody());
438            --mNestingDepth;
439            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
440            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
441        }
442    }
443}
444
445void PabloCompiler::DeclareCallFunctions() {
446    for (auto mapping : mCalleeMap) {
447        const String * callee = mapping.first;
448        //std::cerr << callee->str() << " to be declared\n";
449        auto ei = mExternalMap.find(callee->value());
450        if (ei != mExternalMap.end()) {
451            void * fn_ptr = ei->second;
452            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
453            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
454            if (LLVM_UNLIKELY(externalValue == nullptr)) {
455                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
456            }
457            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
458            mCalleeMap[callee] = externalValue;
459        }
460        else {
461            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
462        }
463    }
464}
465
466void PabloCompiler::compileStatements(const StatementList & stmts) {
467    for (const Statement * statement : stmts) {
468        compileStatement(statement);
469    }
470}
471
472void PabloCompiler::compileStatement(const Statement * stmt)
473{
474    IRBuilder<> b(mBasicBlock);
475    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
476        Value * expr = compileExpression(assign->getExpr());
477        mMarkerMap[assign] = expr;
478        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
479            SetOutputValue(expr, assign->getOutputIndex());
480        }
481    }
482    else if (const Next * next = dyn_cast<const Next>(stmt)) {
483        Value * expr = compileExpression(next->getExpr());
484        mMarkerMap[next->getInitial()] = expr;
485    }
486    else if (const If * ifStatement = dyn_cast<const If>(stmt))
487    {
488        //
489        //  The If-ElseZero stmt:
490        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
491        //  If the value of the predicate is nonzero, then determine the values of variables
492        //  <var>* by executing the given statements.  Otherwise, the value of the
493        //  variables are all zero.  Requirements: (a) no variable that is defined within
494        //  the body of the if may be accessed outside unless it is explicitly
495        //  listed in the variable list, (b) every variable in the defined list receives
496        //  a value within the body, and (c) the logical consequence of executing
497        //  the statements in the event that the predicate is zero is that the
498        //  values of all defined variables indeed work out to be 0.
499        //
500        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
501        //  is inserted for each variable in the defined variable list.  It receives
502        //  a zero value from the ifentry block and the defined value from the if
503        //  body.
504        //
505
506        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
507        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
508        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
509       
510        const auto baseCarryQueueIdx = mCarryQueueIdx;
511        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
512       
513        int ifCarryCount = ifStatement->getInclusiveCarryCount();
514        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
515        //  Carry/Advance queue strategy.   
516        //  If there are any carries or advances at any nesting level within the
517        //  if statement, then the statement must be executed.   A "summary"
518        //  carryover variable is determined for this purpose, consisting of the
519        //  or of all of the carry and advance variables within the if.
520        //  This variable is determined as follows.
521        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
522        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
523        //       carryover variable is just the single carry queue entry.
524        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
525        //       carryover variable is just the advance carry queue entry.
526        //  (d)  Otherwise, an additional advance queue entry is created for the
527        //       summary variable.
528        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
529        //  variable is just last advance queue entry.
530        //
531       
532        IRBuilder<> b_entry(ifEntryBlock);
533        mBasicBlock = ifEntryBlock;
534        Value* if_test_value = compileExpression(ifStatement->getCondition());
535       
536        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
537            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
538            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
539        }
540        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
541            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
542            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
543        }
544        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
545
546        // Entry processing is complete, now handle the body of the if.
547        mBasicBlock = ifBodyBlock;
548        compileStatements(ifStatement->getBody());
549
550        // If we compiled an If or a While statement, we won't be in the same basic block as before.
551        // Create the branch from the current basic block to the end block.
552        IRBuilder<> bIfBody(mBasicBlock);
553        // After the recursive compile, now insert the code to compute the summary
554        // carry over variable.
555       
556        if ((ifCarryCount + ifAdvanceCount) > 1) {
557            // A summary variable is needed.
558
559            Value * carry_summary = mZeroInitializer;
560            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++) {
561                int s = mCarryQueueSummaryIdx[c];
562                if (s == -1) {
563                    Value* carryq_value = mCarryQueueVector[c];
564                    if (carry_summary == mZeroInitializer) {
565                        carry_summary = carryq_value;
566                    }
567                    else {
568                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
569                    }
570                    mCarryQueueSummaryIdx[c] = mAdvanceQueueIdx;
571                }
572            }
573            // Note that the limit in the following uses -1, because
574            // last entry of the advance queue is for the summary variable.
575            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++) {
576                int s = mAdvanceQueueSummaryIdx[c];
577                if (s == -1 ) {
578                    Value* advance_q_value = mAdvanceQueueVector[c];
579                    if (carry_summary == mZeroInitializer) {
580                        carry_summary = advance_q_value;
581                    }
582                    else {
583                        carry_summary = bIfBody.CreateOr(carry_summary, advance_q_value);
584                    }
585                    mAdvanceQueueSummaryIdx[c] = mAdvanceQueueIdx;
586                }
587            }
588            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++);
589        }
590        bIfBody.CreateBr(ifEndBlock);
591        //End Block
592        IRBuilder<> bEnd(ifEndBlock);
593        for (const PabloAST * node : ifStatement->getDefined()) {
594            const Assign * assign = cast<Assign>(node);
595            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
596            auto f = mMarkerMap.find(assign);
597            assert (f != mMarkerMap.end());
598            phi->addIncoming(mZeroInitializer, ifEntryBlock);
599            phi->addIncoming(f->second, mBasicBlock);
600            mMarkerMap[assign] = phi;
601        }
602        // Create the phi Node for the summary variable.
603        if (ifAdvanceCount >= 1) {
604            // final AdvanceQ entry is summary variable.
605            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
606            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
607            summary_phi->addIncoming(mAdvanceQueueVector[mAdvanceQueueIdx-1], mBasicBlock);
608            mAdvanceQueueVector[mAdvanceQueueIdx-1] = summary_phi;
609        }
610        else if (ifCarryCount == 1) {
611            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
612            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
613            summary_phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx], mBasicBlock);
614            mCarryQueueVector[baseCarryQueueIdx] = summary_phi;
615        }
616       
617        // Set the basic block to the new end block
618        mBasicBlock = ifEndBlock;
619    }
620    else if (const While * whileStatement = dyn_cast<const While>(stmt))
621    {
622        const auto baseCarryQueueIdx = mCarryQueueIdx;
623        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
624        if (mNestingDepth == 0) {
625            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
626                genCarryInLoad(baseCarryQueueIdx + i);
627            }
628            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
629                genAdvanceInLoad(baseAdvanceQueueIdx + i);
630            }
631        }
632
633        SmallVector<const Next*, 4> nextNodes;
634        for (const PabloAST * node : whileStatement->getBody()) {
635            if (isa<Next>(node)) {
636                nextNodes.push_back(cast<Next>(node));
637            }
638        }
639
640        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
641        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
642        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
643        // will refer to the previous value.
644
645        ++mNestingDepth;
646
647        compileStatements(whileStatement->getBody());
648
649        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
650        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
651        // but works for now.
652        mCarryQueueIdx = baseCarryQueueIdx;
653        mAdvanceQueueIdx = baseAdvanceQueueIdx;
654
655        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
656        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
657        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
658
659        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
660        // may not be same one that we entered the function with.
661        IRBuilder<> bEntry(mBasicBlock);
662        bEntry.CreateBr(whileCondBlock);
663
664        // CONDITION BLOCK
665        IRBuilder<> bCond(whileCondBlock);
666        // generate phi nodes for any carry propogating instruction
667        int whileCarryCount = whileStatement->getInclusiveCarryCount();
668        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
669        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
670        unsigned index = 0;
671        for (index = 0; index != whileCarryCount; ++index) {
672            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
673            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
674            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
675            phiNodes[index] = phi;
676        }
677        for (int i = 0; i != whileAdvanceCount; ++i) {
678            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
679            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
680            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
681            phiNodes[index++] = phi;
682        }
683        // and for any Next nodes in the loop body
684        for (const Next * n : nextNodes) {
685            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
686            auto f = mMarkerMap.find(n->getInitial());
687            assert (f != mMarkerMap.end());
688            phi->addIncoming(f->second, mBasicBlock);
689            mMarkerMap[n->getInitial()] = phi;
690            phiNodes[index++] = phi;
691        }
692
693        mBasicBlock = whileCondBlock;
694        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
695
696        // BODY BLOCK
697        mBasicBlock = whileBodyBlock;
698        compileStatements(whileStatement->getBody());
699        // update phi nodes for any carry propogating instruction
700        IRBuilder<> bWhileBody(mBasicBlock);
701        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
702            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
703            PHINode * phi = phiNodes[index];
704            phi->addIncoming(carryOut, mBasicBlock);
705            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
706        }
707        for (int i = 0; i != whileAdvanceCount; ++i) {
708            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
709            PHINode * phi = phiNodes[index++];
710            phi->addIncoming(advOut, mBasicBlock);
711            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
712        }
713        // and for any Next nodes in the loop body
714        for (const Next * n : nextNodes) {
715            auto f = mMarkerMap.find(n->getInitial());
716            assert (f != mMarkerMap.end());
717            PHINode * phi = phiNodes[index++];
718            phi->addIncoming(f->second, mBasicBlock);
719            mMarkerMap[n->getInitial()] = phi;
720        }
721
722        bWhileBody.CreateBr(whileCondBlock);
723
724        // EXIT BLOCK
725        mBasicBlock = whileEndBlock;
726        if (--mNestingDepth == 0) {
727            for (index = 0; index != whileCarryCount; ++index) {
728                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
729            }
730            for (index = 0; index != whileAdvanceCount; ++index) {
731                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
732            }
733        }
734    }
735    else if (const Call* call = dyn_cast<Call>(stmt)) {
736        //Call the callee once and store the result in the marker map.
737        auto mi = mMarkerMap.find(call);
738        if (mi == mMarkerMap.end()) {
739            auto ci = mCalleeMap.find(call->getCallee());
740            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
741                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
742            }
743            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
744        }
745        // return mi->second;
746    }
747    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
748        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
749        mMarkerMap[pablo_and] = expr;
750        // return expr;
751    }
752    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
753        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
754        mMarkerMap[pablo_or] = expr;
755        // return expr;
756    }
757    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
758        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
759        mMarkerMap[pablo_xor] = expr;
760        // return expr;
761    }
762    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
763        Value* ifMask = compileExpression(sel->getCondition());
764        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
765        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
766        Value * expr = b.CreateOr(ifTrue, ifFalse);
767        mMarkerMap[sel] = expr;
768        // return expr;
769    }
770    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
771        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
772        mMarkerMap[pablo_not] = expr;
773        // return expr;
774    }
775    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
776        Value* strm_value = compileExpression(adv->getExpr());
777        int shift = adv->getAdvanceAmount();
778        Value * expr = genAdvanceWithCarry(strm_value, shift);
779        mMarkerMap[adv] = expr;
780        // return expr;
781    }
782    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
783    {
784        Value * marker = compileExpression(mstar->getMarker());
785        Value * cc = compileExpression(mstar->getCharClass());
786        Value * marker_and_cc = b.CreateAnd(marker, cc);
787        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
788        mMarkerMap[mstar] = expr;
789        // return expr;
790    }
791    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
792    {
793        Value * marker_expr = compileExpression(sthru->getScanFrom());
794        Value * cc_expr = compileExpression(sthru->getScanThru());
795        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
796        mMarkerMap[sthru] = expr;
797        // return expr;
798    }
799    else {
800        PabloPrinter::print(stmt, std::cerr);
801        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
802    }
803}
804
805Value * PabloCompiler::compileExpression(const PabloAST * expr) {
806    if (isa<Ones>(expr)) {
807        return mOneInitializer;
808    }
809    else if (isa<Zeroes>(expr)) {
810        return mZeroInitializer;
811    }
812    else if (const Next * next = dyn_cast<Next>(expr)) {
813        expr = next->getInitial();
814    }
815    auto f = mMarkerMap.find(expr);
816    if (f == mMarkerMap.end()) {
817        std::stringstream str;
818        str << "\"";
819        PabloPrinter::print(expr, str);
820        str << "\" was used before definition!";
821        throw std::runtime_error(str.str());
822    }
823    return f->second;
824}
825
826#ifdef USE_UADD_OVERFLOW
827#ifdef USE_TWO_UADD_OVERFLOW
828PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
829    std::vector<Value*> struct_res_params;
830    struct_res_params.push_back(int128_e1);
831    struct_res_params.push_back(int128_e2);
832    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
833    struct_res->setCallingConv(CallingConv::C);
834    struct_res->setTailCall(false);
835    AttributeSet struct_res_PAL;
836    struct_res->setAttributes(struct_res_PAL);
837
838    SumWithOverflowPack ret;
839
840    std::vector<unsigned> int128_sum_indices;
841    int128_sum_indices.push_back(0);
842    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
843
844    std::vector<unsigned> int1_obit_indices;
845    int1_obit_indices.push_back(1);
846    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
847
848    return ret;
849}
850#else
851PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
852    std::vector<Value*> struct_res_params;
853    struct_res_params.push_back(int128_e1);
854    struct_res_params.push_back(int128_e2);
855    struct_res_params.push_back(int1_cin);
856    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
857    struct_res->setCallingConv(CallingConv::C);
858    struct_res->setTailCall(false);
859    AttributeSet struct_res_PAL;
860    struct_res->setAttributes(struct_res_PAL);
861
862    SumWithOverflowPack ret;
863
864    std::vector<unsigned> int128_sum_indices;
865    int128_sum_indices.push_back(0);
866    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
867
868    std::vector<unsigned> int1_obit_indices;
869    int1_obit_indices.push_back(1);
870    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
871
872    return ret;
873}
874#endif
875#endif
876
877Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
878    IRBuilder<> b(mBasicBlock);
879
880    //CarryQ - carry in.
881    const int carryIdx = mCarryQueueIdx++;
882    Value* carryq_value = genCarryInLoad(carryIdx);
883#ifdef USE_TWO_UADD_OVERFLOW
884    //This is the ideal implementation, which uses two uadd.with.overflow
885    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
886    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
887    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
888    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
889
890    SumWithOverflowPack sumpack0, sumpack1;
891
892    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
893    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
894
895    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
896    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
897
898    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
899    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
900    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
901    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
902    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
903
904#elif defined USE_UADD_OVERFLOW
905    //use llvm.uadd.with.overflow.i128 or i256
906    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
907    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
908
909    //get i1 carryin from iBLOCK_SIZE
910    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
911    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
912    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
913
914    SumWithOverflowPack sumpack0;
915    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
916    Value* obit = sumpack0.obit;
917    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
918
919    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
920    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
921    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
922    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
923#elif (BLOCK_SIZE == 128)
924    //calculate carry through logical ops
925    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
926    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
927    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
928    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
929    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
930    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
931
932    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
933    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
934#else
935    //BLOCK_SIZE == 256, there is no other implementation
936    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
937#endif //USE_TWO_UADD_OVERFLOW
938
939    genCarryOutStore(carry_out, carryIdx);
940    return sum;
941}
942
943Value* PabloCompiler::genCarryInLoad(const unsigned index) {
944    assert (index < mCarryQueueVector.size());
945    if (mNestingDepth == 0) {
946        IRBuilder<> b(mBasicBlock);
947        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
948    }
949    return mCarryQueueVector[index];
950}
951
952void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
953    assert (carryOut);
954    assert (index < mCarryQueueVector.size());
955    if (mNestingDepth == 0) {
956        IRBuilder<> b(mBasicBlock);
957        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
958    }
959    mCarryQueueSummaryIdx[index] = -1;
960    mCarryQueueVector[index] = carryOut;
961}
962
963Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
964    assert (index < mAdvanceQueueVector.size());
965    if (mNestingDepth == 0) {
966        IRBuilder<> b(mBasicBlock);
967        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
968    }
969    return mAdvanceQueueVector[index];
970}
971
972void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
973    assert (advanceOut);
974    assert (index < mAdvanceQueueVector.size());
975    if (mNestingDepth == 0) {
976        IRBuilder<> b(mBasicBlock);
977        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
978    }
979    mAdvanceQueueSummaryIdx[index] = -1;
980    mAdvanceQueueVector[index] = advanceOut;
981}
982
983inline Value* PabloCompiler::genBitBlockAny(Value* test) {
984    IRBuilder<> b(mBasicBlock);
985    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
986    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
987}
988
989Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
990    IRBuilder<> b(mBasicBlock);
991    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
992    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
993}
994
995Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
996    IRBuilder<> b(mBasicBlock);
997    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
998    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
999}
1000
1001inline Value* PabloCompiler::genNot(Value* expr) {
1002    IRBuilder<> b(mBasicBlock);
1003    return b.CreateXor(expr, mOneInitializer, "not");
1004}
1005
1006Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
1007    IRBuilder<> b(mBasicBlock);
1008    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
1009    int block_shift = shift_amount % BLOCK_SIZE;
1010    const auto storeIdx = mAdvanceQueueIdx;
1011    const auto loadIdx = mAdvanceQueueIdx + advEntries - 1;
1012    mAdvanceQueueIdx += advEntries;
1013    Value* result_value;
1014   
1015#ifdef USE_LONG_INTEGER_SHIFT
1016    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1017    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1018    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1019    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1020    genAdvanceOutStore(strm_value, storeIdx);
1021
1022    return result_value;
1023#elif (BLOCK_SIZE == 128)
1024    if (advEntries == 1) {
1025        if (block_shift == 0) { 
1026            result_value = genAdvanceInLoad(loadIdx);
1027            //b.CreateCall(mFunc_print_register, result_value);
1028        }
1029        if (block_shift == 1) {
1030            Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(loadIdx));
1031            Value* srli_1_value = b.CreateLShr(strm_value, 63);
1032            Value* packed_shuffle;
1033            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1034            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1035            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1036
1037            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1038            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1039
1040            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1041            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1042        }
1043        else { //if (block_shift < BLOCK_SIZE) {
1044            // This is the preferred logic, but is too slow for the general case.
1045            // We need to speed up our custom LLVM for this code.
1046            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1047            Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1048            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1049            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1050        }
1051    }
1052    else {
1053        if (block_shift == 0) {
1054            result_value = genAdvanceInLoad(loadIdx);
1055        }
1056        else { 
1057            // The advance is based on the two oldest bit blocks in the advance queue.
1058            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1059            Value* strm_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx-1), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1060            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1061            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1062            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx));
1063            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx-1));
1064            //b.CreateCall(mFunc_print_register, result_value);
1065        }
1066        // copy entries from previous blocks forward
1067        for (int i = loadIdx; i > storeIdx; i--) {
1068            genAdvanceOutStore(genAdvanceInLoad(i-1), i);
1069        }
1070    }
1071    genAdvanceOutStore(strm_value, storeIdx);
1072    return result_value;
1073#else
1074    //BLOCK_SIZE == 256
1075    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1076#endif //USE_LONG_INTEGER_SHIFT
1077}
1078
1079void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1080    IRBuilder<> b(mBasicBlock);
1081    if (marker->getType()->isPointerTy()) {
1082        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1083    }
1084    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1085    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1086    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1087}
1088
1089CompiledPabloFunction::CompiledPabloFunction(unsigned carryQSize, unsigned advanceQSize, Function * function, ExecutionEngine * executionEngine)
1090: CarryQueueSize(carryQSize)
1091, AdvanceQueueSize(advanceQSize)
1092, FunctionPointer(executionEngine->getPointerToFunction(function))
1093, mFunction(function)
1094, mExecutionEngine(executionEngine)
1095{
1096
1097}
1098
1099// Clean up the memory for the compiled function once we're finished using it.
1100CompiledPabloFunction::~CompiledPabloFunction() {
1101    if (mExecutionEngine) {
1102        assert (mFunction);
1103        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1104        delete mExecutionEngine;
1105    }
1106}
1107
1108}
Note: See TracBrowser for help on using the repository browser.