source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4415

Last change on this file since 4415 was 4415, checked in by nmedfort, 4 years ago

More work on usedef info.

File size: 45.7 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7/*
8 *  Copyright (c) 2014 International Characters.
9 *  This software is licensed to the public under the Open Software License 3.0.
10 *  icgrep is a trademark of International Characters.
11 */
12
13#include <pablo/pablo_compiler.h>
14#include <pablo/codegenstate.h>
15#include <pablo/printer_pablos.h>
16#include <cc/cc_namemap.hpp>
17#include <re/re_name.h>
18#include <stdexcept>
19#include <include/simd-lib/bitblock.hpp>
20
21#ifdef USE_LLVM_3_4
22#include <llvm/Analysis/Verifier.h>
23#include <llvm/Assembly/PrintModulePass.h>
24#include <llvm/Linker.h>
25#endif
26#ifdef USE_LLVM_3_5
27#include <llvm/IR/Verifier.h>
28#endif
29
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/SmallVector.h>
33#include <llvm/Analysis/Passes.h>
34#include <llvm/IR/BasicBlock.h>
35#include <llvm/IR/CallingConv.h>
36#include <llvm/IR/Constants.h>
37#include <llvm/IR/DataLayout.h>
38#include <llvm/IR/DerivedTypes.h>
39#include <llvm/IR/Function.h>
40#include <llvm/IR/GlobalVariable.h>
41#include <llvm/IR/InlineAsm.h>
42#include <llvm/IR/Instructions.h>
43#include <llvm/IR/LLVMContext.h>
44#include <llvm/IR/Module.h>
45#include <llvm/Support/FormattedStream.h>
46#include <llvm/Support/MathExtras.h>
47#include <llvm/Support/Casting.h>
48#include <llvm/Support/Compiler.h>
49#include <llvm/Support/Debug.h>
50#include <llvm/Support/TargetSelect.h>
51#include <llvm/Support/Host.h>
52#include <llvm/Transforms/Scalar.h>
53#include <llvm/ExecutionEngine/ExecutionEngine.h>
54#include <llvm/ExecutionEngine/MCJIT.h>
55#include <llvm/IRReader/IRReader.h>
56#include <llvm/Bitcode/ReaderWriter.h>
57#include <llvm/Support/MemoryBuffer.h>
58#include <llvm/IR/IRBuilder.h>
59
60#include "llvm/Support/CommandLine.h"
61
62
63cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
64static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
65
66extern "C" {
67  void wrapped_print_register(BitBlock bit_block) {
68      print_register<BitBlock>("", bit_block);
69  }
70}
71
72namespace pablo {
73
74PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
75: mBasisBits(basisBits)
76, mMod(new Module("icgrep", getGlobalContext()))
77, mBasicBlock(nullptr)
78, mExecutionEngine(nullptr)
79, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
80, mBasisBitsInputPtr(nullptr)
81, mCarryQueueIdx(0)
82, mCarryQueuePtr(nullptr)
83, mNestingDepth(0)
84, mCarryQueueSize(0)
85, mAdvanceQueueIdx(0)
86, mAdvanceQueuePtr(nullptr)
87, mAdvanceQueueSize(0)
88, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
89, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
90, mFunctionType(nullptr)
91, mFunction(nullptr)
92, mBasisBitsAddr(nullptr)
93, mOutputAddrPtr(nullptr)
94, mMaxNestingDepth(0)
95{
96    //Create the jit execution engine.up
97    InitializeNativeTarget();
98    InitializeNativeTargetAsmPrinter();
99    InitializeNativeTargetAsmParser();
100    DefineTypes();
101    DeclareFunctions();
102}
103
104PabloCompiler::~PabloCompiler()
105{
106    delete mMod;
107}
108   
109void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
110    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
111}
112
113
114LLVM_Gen_RetVal PabloCompiler::compile(PabloBlock & pb)
115{
116    mNestingDepth = 0;
117    mMaxNestingDepth = 0;
118    mCarryQueueSize = 0;
119    mAdvanceQueueSize = 0;
120    Examine(pb.statements());
121    mCarryQueueVector.resize(mCarryQueueSize);
122    mAdvanceQueueVector.resize(mAdvanceQueueSize);
123    mCarryQueueSummaryIdx.resize(mCarryQueueSize);
124    mAdvanceQueueSummaryIdx.resize(mAdvanceQueueSize);
125    std::string errMessage;
126    EngineBuilder builder(mMod);
127    builder.setErrorStr(&errMessage);
128    builder.setMCPU(sys::getHostCPUName());
129    builder.setUseMCJIT(true);
130    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
131    mExecutionEngine = builder.create();
132    if (mExecutionEngine == nullptr) {
133        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
134    }
135
136    if (!mCalleeMap.empty()) {
137        DeclareCallFunctions();
138    }
139
140    Function::arg_iterator args = mFunction->arg_begin();
141    mBasisBitsAddr = args++;
142    mBasisBitsAddr->setName("basis_bits");
143    mCarryQueuePtr = args++;
144    mCarryQueuePtr->setName("carry_q");
145    mAdvanceQueuePtr = args++;
146    mAdvanceQueuePtr->setName("advance_q");
147    mOutputAddrPtr = args++;
148    mOutputAddrPtr->setName("output");
149
150    //Create the carry and advance queues.
151    mCarryQueueIdx = 0;
152    mAdvanceQueueIdx = 0;
153    mNestingDepth = 0;
154    mMaxNestingDepth = 0;
155    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
156
157    //The basis bits structure
158    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
159        IRBuilder<> b(mBasicBlock);
160        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
161        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
162        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->str());
163        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
164    }
165
166    //Generate the IR instructions for the function.
167    compileStatements(pb.statements());
168
169    assert (mCarryQueueIdx <= mCarryQueueSize);
170    assert (mAdvanceQueueIdx <= mAdvanceQueueSize);
171    assert (mNestingDepth == 0);
172    //Terminate the block
173    ReturnInst::Create(mMod->getContext(), mBasicBlock);
174
175    //Display the IR that has been generated by this module.
176    if (DumpGeneratedIR) {
177      mMod->dump();
178    }
179
180
181
182    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
183    #ifdef USE_LLVM_3_5
184    verifyModule(*mMod, &dbgs());
185    #endif
186    #ifdef USE_LLVM_3_4
187    verifyModule(*mMod, PrintMessageAction);
188    #endif
189
190    //Use the pass manager to run optimizations on the function.
191    FunctionPassManager fpm(mMod);
192 #ifdef USE_LLVM_3_5
193    mMod->setDataLayout(mExecutionEngine->getDataLayout());
194    // Set up the optimizer pipeline.  Start with registering info about how the target lays out data structures.
195    fpm.add(new DataLayoutPass(mMod));
196#endif
197#ifdef USE_LLVM_3_4
198    fpm.add(new DataLayout(*mExecutionEngine->getDataLayout()));
199#endif
200    fpm.doInitialization();
201    fpm.run(*mFunction);
202
203    mExecutionEngine->finalizeObject();
204
205    LLVM_Gen_RetVal retVal;
206    //Return the required size of the carry queue and a pointer to the process_block function.
207    retVal.carry_q_size = mCarryQueueVector.size();
208    retVal.advance_q_size = mAdvanceQueueVector.size();
209    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunction);
210
211    return retVal;
212}
213
214void PabloCompiler::DefineTypes()
215{
216    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
217    if (structBasisBits == nullptr) {
218        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
219    }
220    std::vector<Type*>StructTy_struct_Basis_bits_fields;
221    for (int i = 0; i != mBasisBits.size(); i++)
222    {
223        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
224    }
225    if (structBasisBits->isOpaque()) {
226        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
227    }
228    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
229
230    std::vector<Type*>functionTypeArgs;
231    functionTypeArgs.push_back(mBasisBitsInputPtr);
232
233    //The carry q array.
234    //A pointer to the BitBlock vector.
235    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
236    // Advance q array
237    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
238
239    //The output structure.
240    StructType * outputStruct = mMod->getTypeByName("struct.Output");
241    if (!outputStruct) {
242        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
243    }
244    if (outputStruct->isOpaque()) {
245        std::vector<Type*>fields;
246        fields.push_back(mBitBlockType);
247        fields.push_back(mBitBlockType);
248        outputStruct->setBody(fields, /*isPacked=*/false);
249    }
250    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
251
252    //The &output parameter.
253    functionTypeArgs.push_back(outputStructPtr);
254
255    mFunctionType = FunctionType::get(
256     /*Result=*/Type::getVoidTy(mMod->getContext()),
257     /*Params=*/functionTypeArgs,
258     /*isVarArg=*/false);
259}
260
261void PabloCompiler::DeclareFunctions()
262{
263    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
264    //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mXi64Vect, NULL);
265    //mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mFunc_print_register), (void *)&wrapped_print_register);
266    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
267
268#ifdef USE_UADD_OVERFLOW
269#ifdef USE_TWO_UADD_OVERFLOW
270    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
271    std::vector<Type*>StructTy_0_fields;
272    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
273    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
274    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
275
276    std::vector<Type*>FuncTy_1_args;
277    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
278    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
279    FunctionType* FuncTy_1 = FunctionType::get(
280                                              /*Result=*/StructTy_0,
281                                              /*Params=*/FuncTy_1_args,
282                                              /*isVarArg=*/false);
283
284    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
285                                              std::to_string(BLOCK_SIZE));
286    if (!mFunctionUaddOverflow) {
287        mFunctionUaddOverflow= Function::Create(
288          /*Type=*/ FuncTy_1,
289          /*Linkage=*/ GlobalValue::ExternalLinkage,
290          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
291        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
292    }
293    AttributeSet mFunctionUaddOverflowPAL;
294    {
295        SmallVector<AttributeSet, 4> Attrs;
296        AttributeSet PAS;
297        {
298          AttrBuilder B;
299          B.addAttribute(Attribute::NoUnwind);
300          B.addAttribute(Attribute::ReadNone);
301          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
302        }
303
304        Attrs.push_back(PAS);
305        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
306    }
307    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
308#else
309    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
310    std::vector<Type*>StructTy_0_fields;
311    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
312    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
313    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
314
315    std::vector<Type*>FuncTy_1_args;
316    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
317    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
318    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
319    FunctionType* FuncTy_1 = FunctionType::get(
320                                              /*Result=*/StructTy_0,
321                                              /*Params=*/FuncTy_1_args,
322                                              /*isVarArg=*/false);
323
324    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
325                                              std::to_string(BLOCK_SIZE));
326    if (!mFunctionUaddOverflowCarryin) {
327        mFunctionUaddOverflowCarryin = Function::Create(
328          /*Type=*/ FuncTy_1,
329          /*Linkage=*/ GlobalValue::ExternalLinkage,
330          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
331        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
332    }
333    AttributeSet mFunctionUaddOverflowCarryinPAL;
334    {
335        SmallVector<AttributeSet, 4> Attrs;
336        AttributeSet PAS;
337        {
338          AttrBuilder B;
339          B.addAttribute(Attribute::NoUnwind);
340          B.addAttribute(Attribute::ReadNone);
341          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
342        }
343
344        Attrs.push_back(PAS);
345        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
346    }
347    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
348#endif
349#endif
350
351    //Starts on process_block
352    SmallVector<AttributeSet, 5> Attrs;
353    AttributeSet PAS;
354    {
355        AttrBuilder B;
356        B.addAttribute(Attribute::ReadOnly);
357        B.addAttribute(Attribute::NoCapture);
358        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
359    }
360    Attrs.push_back(PAS);
361    {
362        AttrBuilder B;
363        B.addAttribute(Attribute::NoCapture);
364        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
365    }
366    Attrs.push_back(PAS);
367    {
368        AttrBuilder B;
369        B.addAttribute(Attribute::NoCapture);
370        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
371    }
372    Attrs.push_back(PAS);
373    {
374        AttrBuilder B;
375        B.addAttribute(Attribute::NoCapture);
376        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
377    }
378    Attrs.push_back(PAS);
379    {
380        AttrBuilder B;
381        B.addAttribute(Attribute::NoUnwind);
382        B.addAttribute(Attribute::UWTable);
383        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
384    }
385    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
386
387    //Create the function that will be generated.
388    mFunction = mMod->getFunction("process_block");
389    if (!mFunction) {
390        mFunction = Function::Create(
391            /*Type=*/mFunctionType,
392            /*Linkage=*/GlobalValue::ExternalLinkage,
393            /*Name=*/"process_block", mMod);
394        mFunction->setCallingConv(CallingConv::C);
395    }
396    mFunction->setAttributes(AttrSet);
397}
398
399void PabloCompiler::Examine(StatementList & stmts) {
400    for (Statement * stmt : stmts) {
401        if (Assign * assign = dyn_cast<Assign>(stmt)) {
402            Examine(assign->getExpr());
403        }
404        if (Next * next = dyn_cast<Next>(stmt)) {
405            Examine(next->getExpr());
406        }
407        else if (If * ifStatement = dyn_cast<If>(stmt)) {
408            const auto preIfCarryCount = mCarryQueueSize;
409            const auto preIfAdvanceCount = mAdvanceQueueSize;
410            Examine(ifStatement->getCondition());
411            Examine(ifStatement->getBody());
412            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
413            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
414            if ((ifCarryCount + ifAdvanceCount) > 1) {
415              ++mAdvanceQueueSize;
416              ++ifAdvanceCount;
417            }
418            ifStatement->setInclusiveCarryCount(ifCarryCount);
419            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
420        }
421        else if (While * whileStatement = dyn_cast<While>(stmt)) {
422            const auto preWhileCarryCount = mCarryQueueSize;
423            const auto preWhileAdvanceCount = mAdvanceQueueSize;
424            Examine(whileStatement->getCondition());
425            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
426            Examine(whileStatement->getBody());
427            --mNestingDepth;
428            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
429            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
430        }
431    }
432}
433
434void PabloCompiler::Examine(PabloAST *expr)
435{
436    if (Call * call = dyn_cast<Call>(expr)) {
437        mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
438    }
439    else if (And * pablo_and = dyn_cast<And>(expr)) {
440        Examine(pablo_and->getExpr1());
441        Examine(pablo_and->getExpr2());
442    }
443    else if (Or * pablo_or = dyn_cast<Or>(expr)) {
444        Examine(pablo_or->getExpr1());
445        Examine(pablo_or->getExpr2());
446    }
447    else if (Sel * pablo_sel = dyn_cast<Sel>(expr)) {
448        Examine(pablo_sel->getCondition());
449        Examine(pablo_sel->getTrueExpr());
450        Examine(pablo_sel->getFalseExpr());
451    }
452    else if (Not * pablo_not = dyn_cast<Not>(expr)) {
453        Examine(pablo_not->getExpr());
454    }
455    else if (Advance * adv = dyn_cast<Advance>(expr)) {
456        ++mAdvanceQueueSize;
457        Examine(adv->getExpr());
458    }
459    else if (MatchStar * mstar = dyn_cast<MatchStar>(expr)) {
460        ++mCarryQueueSize;
461        Examine(mstar->getMarker());
462        Examine(mstar->getCharClass());
463    }
464    else if (ScanThru * sthru = dyn_cast<ScanThru>(expr)) {
465        ++mCarryQueueSize;
466        Examine(sthru->getScanFrom());
467        Examine(sthru->getScanThru());
468    }
469}
470
471void PabloCompiler::DeclareCallFunctions() {
472    for (auto mapping : mCalleeMap) {
473        const String * callee = mapping.first;
474        //std::cerr << callee->str() << " to be declared\n";
475        auto ei = mExternalMap.find(callee->str());
476        if (ei != mExternalMap.end()) {
477            void * fn_ptr = ei->second;
478            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
479            Value * externalValue = mMod->getOrInsertFunction(callee->str(), mBitBlockType, mBasisBitsInputPtr, NULL);
480            if (LLVM_UNLIKELY(externalValue == nullptr)) {
481                throw std::runtime_error("Could not create static method call for external function \"" + callee->str() + "\"");
482            }
483            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
484            mCalleeMap[callee] = externalValue;
485        }
486        else {
487            throw std::runtime_error("External function \"" + callee->str() + "\" not installed");
488        }
489    }
490}
491
492void PabloCompiler::compileStatements(const StatementList & stmts) {
493    for (const Statement * statement : stmts) {
494        compileStatement(statement);
495    }
496}
497
498void PabloCompiler::compileStatement(const Statement * stmt)
499{
500    IRBuilder<> b(mBasicBlock);
501    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
502        Value * expr = compileExpression(assign->getExpr());
503        mMarkerMap[assign] = expr;
504        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
505            SetOutputValue(expr, assign->getOutputIndex());
506        }
507    }
508    else if (const Next * next = dyn_cast<const Next>(stmt)) {
509        Value * expr = compileExpression(next->getExpr());
510        mMarkerMap[next->getInitial()] = expr;
511    }
512    else if (const If * ifStatement = dyn_cast<const If>(stmt))
513    {
514        //
515        //  The If-ElseZero stmt:
516        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
517        //  If the value of the predicate is nonzero, then determine the values of variables
518        //  <var>* by executing the given statements.  Otherwise, the value of the
519        //  variables are all zero.  Requirements: (a) no variable that is defined within
520        //  the body of the if may be accessed outside unless it is explicitly
521        //  listed in the variable list, (b) every variable in the defined list receives
522        //  a value within the body, and (c) the logical consequence of executing
523        //  the statements in the event that the predicate is zero is that the
524        //  values of all defined variables indeed work out to be 0.
525        //
526        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
527        //  is inserted for each variable in the defined variable list.  It receives
528        //  a zero value from the ifentry block and the defined value from the if
529        //  body.
530        //
531
532        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
533        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
534        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
535       
536        const auto baseCarryQueueIdx = mCarryQueueIdx;
537        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
538       
539        int ifCarryCount = ifStatement->getInclusiveCarryCount();
540        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
541        //  Carry/Advance queue strategy.   
542        //  If there are any carries or advances at any nesting level within the
543        //  if statement, then the statement must be executed.   A "summary"
544        //  carryover variable is determined for this purpose, consisting of the
545        //  or of all of the carry and advance variables within the if.
546        //  This variable is determined as follows.
547        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
548        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
549        //       carryover variable is just the single carry queue entry.
550        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
551        //       carryover variable is just the advance carry queue entry.
552        //  (d)  Otherwise, an additional advance queue entry is created for the
553        //       summary variable.
554        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
555        //  variable is just last advance queue entry.
556        //
557       
558        IRBuilder<> b_entry(ifEntryBlock);
559        mBasicBlock = ifEntryBlock;
560        Value* if_test_value = compileExpression(ifStatement->getCondition());
561       
562        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
563            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
564            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
565        }
566        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
567            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
568            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
569        }
570        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
571
572        // Entry processing is complete, now handle the body of the if.
573        mBasicBlock = ifBodyBlock;
574        compileStatements(ifStatement->getBody());
575
576        // If we compiled an If or a While statement, we won't be in the same basic block as before.
577        // Create the branch from the current basic block to the end block.
578        IRBuilder<> bIfBody(mBasicBlock);
579        // After the recursive compile, now insert the code to compute the summary
580        // carry over variable.
581       
582        if ((ifCarryCount + ifAdvanceCount) > 1) {
583            // A summary variable is needed.
584
585            Value * carry_summary = mZeroInitializer;
586            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++) {
587                int s = mCarryQueueSummaryIdx[c];
588                if (s == -1) {
589                    Value* carryq_value = mCarryQueueVector[c];
590                    if (carry_summary == mZeroInitializer) {
591                        carry_summary = carryq_value;
592                    }
593                    else {
594                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
595                    }
596                    mCarryQueueSummaryIdx[c] = mAdvanceQueueIdx;
597                }
598            }
599            // Note that the limit in the following uses -1, because
600            // last entry of the advance queue is for the summary variable.
601            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++) {
602                int s = mAdvanceQueueSummaryIdx[c];
603                if (s == -1 ) {
604                    Value* advance_q_value = mAdvanceQueueVector[c];
605                    if (carry_summary == mZeroInitializer) {
606                        carry_summary = advance_q_value;
607                    }
608                    else {
609                        carry_summary = bIfBody.CreateOr(carry_summary, advance_q_value);
610                    }
611                    mAdvanceQueueSummaryIdx[c] = mAdvanceQueueIdx;
612                }
613            }
614            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++);
615        }
616        bIfBody.CreateBr(ifEndBlock);
617        //End Block
618        IRBuilder<> bEnd(ifEndBlock);
619        for (const Assign * a : ifStatement->getDefined()) {
620            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, a->getName()->str());
621            auto f = mMarkerMap.find(a);
622            assert (f != mMarkerMap.end());
623            phi->addIncoming(mZeroInitializer, ifEntryBlock);
624            phi->addIncoming(f->second, mBasicBlock);
625            mMarkerMap[a] = phi;
626        }
627        // Create the phi Node for the summary variable.
628        if (ifAdvanceCount >= 1) {
629            // final AdvanceQ entry is summary variable.
630            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
631            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
632            summary_phi->addIncoming(mAdvanceQueueVector[mAdvanceQueueIdx-1], mBasicBlock);
633            mAdvanceQueueVector[mAdvanceQueueIdx-1] = summary_phi;
634        }
635        else if (ifCarryCount == 1) {
636            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
637            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
638            summary_phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx], mBasicBlock);
639            mCarryQueueVector[baseCarryQueueIdx] = summary_phi;
640        }
641       
642        // Set the basic block to the new end block
643        mBasicBlock = ifEndBlock;
644    }
645    else if (const While * whileStatement = dyn_cast<const While>(stmt))
646    {
647        const auto baseCarryQueueIdx = mCarryQueueIdx;
648        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
649        if (mNestingDepth == 0) {
650            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
651                genCarryInLoad(baseCarryQueueIdx + i);
652            }
653            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
654                genAdvanceInLoad(baseAdvanceQueueIdx + i);
655            }
656        }
657
658        SmallVector<const Next*, 4> nextNodes;
659        for (const PabloAST * node : whileStatement->getBody()) {
660            if (isa<Next>(node)) {
661                nextNodes.push_back(cast<Next>(node));
662            }
663        }
664
665        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
666        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
667        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
668        // will refer to the previous value.
669
670        ++mNestingDepth;
671
672        compileStatements(whileStatement->getBody());
673
674        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
675        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
676        // but works for now.
677        mCarryQueueIdx = baseCarryQueueIdx;
678        mAdvanceQueueIdx = baseAdvanceQueueIdx;
679
680        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
681        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
682        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
683
684        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
685        // may not be same one that we entered the function with.
686        IRBuilder<> bEntry(mBasicBlock);
687        bEntry.CreateBr(whileCondBlock);
688
689        // CONDITION BLOCK
690        IRBuilder<> bCond(whileCondBlock);
691        // generate phi nodes for any carry propogating instruction
692        int whileCarryCount = whileStatement->getInclusiveCarryCount();
693        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
694        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
695        unsigned index = 0;
696        for (index = 0; index != whileCarryCount; ++index) {
697            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
698            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
699            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
700            phiNodes[index] = phi;
701        }
702        for (int i = 0; i != whileAdvanceCount; ++i) {
703            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
704            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
705            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
706            phiNodes[index++] = phi;
707        }
708        // and for any Next nodes in the loop body
709        for (const Next * n : nextNodes) {
710            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->str());
711            auto f = mMarkerMap.find(n->getInitial());
712            assert (f != mMarkerMap.end());
713            phi->addIncoming(f->second, mBasicBlock);
714            mMarkerMap[n->getInitial()] = phi;
715            phiNodes[index++] = phi;
716        }
717
718        mBasicBlock = whileCondBlock;
719        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
720
721        // BODY BLOCK
722        mBasicBlock = whileBodyBlock;
723        compileStatements(whileStatement->getBody());
724        // update phi nodes for any carry propogating instruction
725        IRBuilder<> bWhileBody(mBasicBlock);
726        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
727            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
728            PHINode * phi = phiNodes[index];
729            phi->addIncoming(carryOut, mBasicBlock);
730            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
731        }
732        for (int i = 0; i != whileAdvanceCount; ++i) {
733            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
734            PHINode * phi = phiNodes[index++];
735            phi->addIncoming(advOut, mBasicBlock);
736            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
737        }
738        // and for any Next nodes in the loop body
739        for (const Next * n : nextNodes) {
740            auto f = mMarkerMap.find(n->getInitial());
741            assert (f != mMarkerMap.end());
742            PHINode * phi = phiNodes[index++];
743            phi->addIncoming(f->second, mBasicBlock);
744            mMarkerMap[n->getInitial()] = phi;
745        }
746
747        bWhileBody.CreateBr(whileCondBlock);
748
749        // EXIT BLOCK
750        mBasicBlock = whileEndBlock;
751        if (--mNestingDepth == 0) {
752            for (index = 0; index != whileCarryCount; ++index) {
753                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
754            }
755            for (index = 0; index != whileAdvanceCount; ++index) {
756                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
757            }
758        }
759    }
760    else if (const Call* call = dyn_cast<Call>(stmt)) {
761        //Call the callee once and store the result in the marker map.
762        auto mi = mMarkerMap.find(call);
763        if (mi == mMarkerMap.end()) {
764            auto ci = mCalleeMap.find(call->getCallee());
765            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
766                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->str() + "\"");
767            }
768            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
769        }
770        // return mi->second;
771    }
772    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
773        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
774        mMarkerMap[pablo_and] = expr;
775        // return expr;
776    }
777    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
778        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
779        mMarkerMap[pablo_or] = expr;
780        // return expr;
781    }
782    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
783        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
784        mMarkerMap[pablo_xor] = expr;
785        // return expr;
786    }
787    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
788        Value* ifMask = compileExpression(sel->getCondition());
789        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
790        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
791        Value * expr = b.CreateOr(ifTrue, ifFalse);
792        mMarkerMap[sel] = expr;
793        // return expr;
794    }
795    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
796        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
797        mMarkerMap[pablo_not] = expr;
798        // return expr;
799    }
800    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
801        Value* strm_value = compileExpression(adv->getExpr());
802        int shift = adv->getAdvanceAmount();
803        Value * expr = genAdvanceWithCarry(strm_value, shift);
804        mMarkerMap[adv] = expr;
805        // return expr;
806    }
807    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
808    {
809        Value * marker = compileExpression(mstar->getMarker());
810        Value * cc = compileExpression(mstar->getCharClass());
811        Value * marker_and_cc = b.CreateAnd(marker, cc);
812        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
813        mMarkerMap[mstar] = expr;
814        // return expr;
815    }
816    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
817    {
818        Value * marker_expr = compileExpression(sthru->getScanFrom());
819        Value * cc_expr = compileExpression(sthru->getScanThru());
820        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
821        mMarkerMap[sthru] = expr;
822        // return expr;
823    }
824    else {
825        PabloPrinter::print(stmt, std::cerr);
826        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
827    }
828}
829
830Value * PabloCompiler::compileExpression(const PabloAST * expr) {
831    if (isa<Ones>(expr)) {
832        return mOneInitializer;
833    }
834    else if (isa<Zeroes>(expr)) {
835        return mZeroInitializer;
836    }
837    else if (const Next * next = dyn_cast<Next>(expr)) {
838        expr = next->getInitial();
839    }
840    auto f = mMarkerMap.find(expr);
841    if (f == mMarkerMap.end()) {
842        throw std::runtime_error("Unrecognized Pablo expression type; can't compile.");
843    }
844    return f->second;
845}
846
847#ifdef USE_UADD_OVERFLOW
848#ifdef USE_TWO_UADD_OVERFLOW
849PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
850    std::vector<Value*> struct_res_params;
851    struct_res_params.push_back(int128_e1);
852    struct_res_params.push_back(int128_e2);
853    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
854    struct_res->setCallingConv(CallingConv::C);
855    struct_res->setTailCall(false);
856    AttributeSet struct_res_PAL;
857    struct_res->setAttributes(struct_res_PAL);
858
859    SumWithOverflowPack ret;
860
861    std::vector<unsigned> int128_sum_indices;
862    int128_sum_indices.push_back(0);
863    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
864
865    std::vector<unsigned> int1_obit_indices;
866    int1_obit_indices.push_back(1);
867    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
868
869    return ret;
870}
871#else
872PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
873    std::vector<Value*> struct_res_params;
874    struct_res_params.push_back(int128_e1);
875    struct_res_params.push_back(int128_e2);
876    struct_res_params.push_back(int1_cin);
877    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
878    struct_res->setCallingConv(CallingConv::C);
879    struct_res->setTailCall(false);
880    AttributeSet struct_res_PAL;
881    struct_res->setAttributes(struct_res_PAL);
882
883    SumWithOverflowPack ret;
884
885    std::vector<unsigned> int128_sum_indices;
886    int128_sum_indices.push_back(0);
887    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
888
889    std::vector<unsigned> int1_obit_indices;
890    int1_obit_indices.push_back(1);
891    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
892
893    return ret;
894}
895#endif
896#endif
897
898Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
899    IRBuilder<> b(mBasicBlock);
900
901    //CarryQ - carry in.
902    const int carryIdx = mCarryQueueIdx++;
903    Value* carryq_value = genCarryInLoad(carryIdx);
904#ifdef USE_TWO_UADD_OVERFLOW
905    //This is the ideal implementation, which uses two uadd.with.overflow
906    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
907    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
908    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
909    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
910
911    SumWithOverflowPack sumpack0, sumpack1;
912
913    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
914    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
915
916    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
917    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
918
919    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
920    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
921    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
922    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
923    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
924
925#elif defined USE_UADD_OVERFLOW
926    //use llvm.uadd.with.overflow.i128 or i256
927    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
928    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
929
930    //get i1 carryin from iBLOCK_SIZE
931    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
932    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
933    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
934
935    SumWithOverflowPack sumpack0;
936    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
937    Value* obit = sumpack0.obit;
938    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
939
940    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
941    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
942    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
943    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
944#elif (BLOCK_SIZE == 128)
945    //calculate carry through logical ops
946    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
947    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
948    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
949    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
950    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
951    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
952
953    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
954    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
955#else
956    //BLOCK_SIZE == 256, there is no other implementation
957    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
958#endif //USE_TWO_UADD_OVERFLOW
959
960    genCarryOutStore(carry_out, carryIdx);
961    return sum;
962}
963
964Value* PabloCompiler::genCarryInLoad(const unsigned index) {
965    assert (index < mCarryQueueVector.size());
966    if (mNestingDepth == 0) {
967        IRBuilder<> b(mBasicBlock);
968        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
969    }
970    return mCarryQueueVector[index];
971}
972
973void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
974    assert (carryOut);
975    assert (index < mCarryQueueVector.size());
976    if (mNestingDepth == 0) {
977        IRBuilder<> b(mBasicBlock);
978        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
979    }
980    mCarryQueueSummaryIdx[index] = -1;
981    mCarryQueueVector[index] = carryOut;
982}
983
984Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
985    assert (index < mAdvanceQueueVector.size());
986    if (mNestingDepth == 0) {
987        IRBuilder<> b(mBasicBlock);
988        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
989    }
990    return mAdvanceQueueVector[index];
991}
992
993void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
994    assert (advanceOut);
995    assert (index < mAdvanceQueueVector.size());
996    if (mNestingDepth == 0) {
997        IRBuilder<> b(mBasicBlock);
998        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
999    }
1000    mAdvanceQueueSummaryIdx[index] = -1;
1001    mAdvanceQueueVector[index] = advanceOut;
1002}
1003
1004inline Value* PabloCompiler::genBitBlockAny(Value* test) {
1005    IRBuilder<> b(mBasicBlock);
1006    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1007    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
1008}
1009
1010Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
1011    IRBuilder<> b(mBasicBlock);
1012    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1013    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
1014}
1015
1016Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
1017    IRBuilder<> b(mBasicBlock);
1018    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1019    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
1020}
1021
1022inline Value* PabloCompiler::genNot(Value* expr) {
1023    IRBuilder<> b(mBasicBlock);
1024    return b.CreateXor(expr, mOneInitializer, "not");
1025}
1026
1027Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
1028
1029    IRBuilder<> b(mBasicBlock);
1030
1031    const auto advanceIdx = mAdvanceQueueIdx++;
1032#ifdef USE_LONG_INTEGER_SHIFT
1033    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1034    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1035    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1036    Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1037    genAdvanceOutStore(strm_value, advanceIdx);
1038
1039    return result_value;
1040#elif (BLOCK_SIZE == 128)
1041    if (shift_amount == 1) {
1042        Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(advanceIdx));
1043        Value* srli_1_value = b.CreateLShr(strm_value, 63);
1044        Value* packed_shuffle;
1045        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1046        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1047        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1048
1049        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1050        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1051
1052        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1053        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1054
1055        //CarryQ - carry out:
1056        genAdvanceOutStore(strm_value, advanceIdx);
1057
1058        return result_value;
1059    }
1060    else if (shift_amount < 64) {
1061        // This is the preferred logic, but is too slow for the general case.
1062        // We need to speed up our custom LLVM for this code.
1063        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1064        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1065        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1066        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1067        genAdvanceOutStore(strm_value, advanceIdx);
1068
1069        return result_value;
1070    }
1071    else {//if (shift_amount >= 64) {
1072        throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
1073    }
1074#else
1075    //BLOCK_SIZE == 256
1076    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1077#endif //USE_LONG_INTEGER_SHIFT
1078}
1079
1080void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1081    IRBuilder<> b(mBasicBlock);
1082    if (marker->getType()->isPointerTy()) {
1083        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1084    }
1085    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1086    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1087    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1088}
1089
1090}
Note: See TracBrowser for help on using the repository browser.