source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4382

Last change on this file since 4382 was 4382, checked in by cameron, 5 years ago

Create ability to install arbitrary external functions

File size: 43.6 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7/*
8 *  Copyright (c) 2014 International Characters.
9 *  This software is licensed to the public under the Open Software License 3.0.
10 *  icgrep is a trademark of International Characters.
11 */
12
13#include <pablo/pablo_compiler.h>
14#include <pablo/codegenstate.h>
15#include <pablo/printer_pablos.h>
16#include <cc/cc_namemap.hpp>
17#include <re/re_name.h>
18#include <stdexcept>
19#include <include/simd-lib/bitblock.hpp>
20
21#ifdef USE_LLVM_3_4
22#include <llvm/Analysis/Verifier.h>
23#include <llvm/Assembly/PrintModulePass.h>
24#include <llvm/Linker.h>
25#endif
26#ifdef USE_LLVM_3_5
27#include <llvm/IR/Verifier.h>
28#endif
29
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/SmallVector.h>
33#include <llvm/Analysis/Passes.h>
34#include <llvm/IR/BasicBlock.h>
35#include <llvm/IR/CallingConv.h>
36#include <llvm/IR/Constants.h>
37#include <llvm/IR/DataLayout.h>
38#include <llvm/IR/DerivedTypes.h>
39#include <llvm/IR/Function.h>
40#include <llvm/IR/GlobalVariable.h>
41#include <llvm/IR/InlineAsm.h>
42#include <llvm/IR/Instructions.h>
43#include <llvm/IR/LLVMContext.h>
44#include <llvm/IR/Module.h>
45#include <llvm/Support/FormattedStream.h>
46#include <llvm/Support/MathExtras.h>
47#include <llvm/Support/Casting.h>
48#include <llvm/Support/Compiler.h>
49#include <llvm/Support/Debug.h>
50#include <llvm/Support/TargetSelect.h>
51#include <llvm/Support/Host.h>
52#include <llvm/Transforms/Scalar.h>
53#include <llvm/ExecutionEngine/ExecutionEngine.h>
54#include <llvm/ExecutionEngine/MCJIT.h>
55#include <llvm/IRReader/IRReader.h>
56#include <llvm/Bitcode/ReaderWriter.h>
57#include <llvm/Support/MemoryBuffer.h>
58#include <llvm/IR/IRBuilder.h>
59
60#include "llvm/Support/CommandLine.h"
61
62
63cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
64static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
65
66extern "C" {
67  void wrapped_print_register(BitBlock bit_block) {
68      print_register<BitBlock>("", bit_block);
69  }
70}
71
72namespace pablo {
73
74PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
75: mBasisBits(basisBits)
76, mMod(new Module("icgrep", getGlobalContext()))
77, mBasicBlock(nullptr)
78, mExecutionEngine(nullptr)
79, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
80, mBasisBitsInputPtr(nullptr)
81, mCarryQueueIdx(0)
82, mCarryQueuePtr(nullptr)
83, mNestingDepth(0)
84, mCarryQueueSize(0)
85, mAdvanceQueueIdx(0)
86, mAdvanceQueuePtr(nullptr)
87, mAdvanceQueueSize(0)
88, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
89, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
90, mFunctionType(nullptr)
91, mFunction(nullptr)
92, mBasisBitsAddr(nullptr)
93, mOutputAddrPtr(nullptr)
94, mMaxNestingDepth(0)
95{
96    //Create the jit execution engine.up
97    InitializeNativeTarget();
98    InitializeNativeTargetAsmPrinter();
99    InitializeNativeTargetAsmParser();
100    DefineTypes();
101    DeclareFunctions();
102}
103
104PabloCompiler::~PabloCompiler()
105{
106    delete mMod;
107}
108   
109void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
110    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
111}
112
113
114LLVM_Gen_RetVal PabloCompiler::compile(PabloBlock & pb)
115{
116    mNestingDepth = 0;
117    mMaxNestingDepth = 0;
118    mCarryQueueSize = 0;
119    mAdvanceQueueSize = 0;
120    Examine(pb.statements());
121    mCarryQueueVector.resize(mCarryQueueSize);
122    mAdvanceQueueVector.resize(mAdvanceQueueSize);
123    std::string errMessage;
124    EngineBuilder builder(mMod);
125    builder.setErrorStr(&errMessage);
126    builder.setMCPU(sys::getHostCPUName());
127    builder.setUseMCJIT(true);
128    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
129    mExecutionEngine = builder.create();
130    if (mExecutionEngine == nullptr) {
131        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
132    }
133
134    if (!mCalleeMap.empty()) {
135        DeclareCallFunctions();
136    }
137
138    Function::arg_iterator args = mFunction->arg_begin();
139    mBasisBitsAddr = args++;
140    mBasisBitsAddr->setName("basis_bits");
141    mCarryQueuePtr = args++;
142    mCarryQueuePtr->setName("carry_q");
143    mAdvanceQueuePtr = args++;
144    mAdvanceQueuePtr->setName("advance_q");
145    mOutputAddrPtr = args++;
146    mOutputAddrPtr->setName("output");
147
148    //Create the carry and advance queues.
149    mCarryQueueIdx = 0;
150    mAdvanceQueueIdx = 0;
151    mNestingDepth = 0;
152    mMaxNestingDepth = 0;
153    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
154
155    //The basis bits structure
156    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
157        IRBuilder<> b(mBasicBlock);
158        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
159        const String * const name = mBasisBits[i]->getName();
160        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
161        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, name->str());
162        mMarkerMap.insert(std::make_pair(name, basisBit));
163    }
164
165    //Generate the IR instructions for the function.
166    compileStatements(pb.statements());
167
168    assert (mCarryQueueIdx == mCarryQueueSize);
169    assert (mAdvanceQueueIdx == mAdvanceQueueSize);
170    assert (mNestingDepth == 0);
171    //Terminate the block
172    ReturnInst::Create(mMod->getContext(), mBasicBlock);
173
174    //Display the IR that has been generated by this module.
175    if (DumpGeneratedIR) {
176      mMod->dump();
177    }
178
179
180
181    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
182    #ifdef USE_LLVM_3_5
183    verifyModule(*mMod, &dbgs());
184    #endif
185    #ifdef USE_LLVM_3_4
186    verifyModule(*mMod, PrintMessageAction);
187    #endif
188
189    //Use the pass manager to run optimizations on the function.
190    FunctionPassManager fpm(mMod);
191 #ifdef USE_LLVM_3_5
192    mMod->setDataLayout(mExecutionEngine->getDataLayout());
193    // Set up the optimizer pipeline.  Start with registering info about how the target lays out data structures.
194    fpm.add(new DataLayoutPass(mMod));
195#endif
196#ifdef USE_LLVM_3_4
197    fpm.add(new DataLayout(*mExecutionEngine->getDataLayout()));
198#endif
199    fpm.doInitialization();
200    fpm.run(*mFunction);
201
202    mExecutionEngine->finalizeObject();
203
204    LLVM_Gen_RetVal retVal;
205    //Return the required size of the carry queue and a pointer to the process_block function.
206    retVal.carry_q_size = mCarryQueueVector.size();
207    retVal.advance_q_size = mAdvanceQueueVector.size();
208    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunction);
209
210    return retVal;
211}
212
213void PabloCompiler::DefineTypes()
214{
215    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
216    if (structBasisBits == nullptr) {
217        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
218    }
219    std::vector<Type*>StructTy_struct_Basis_bits_fields;
220    for (int i = 0; i != mBasisBits.size(); i++)
221    {
222        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
223    }
224    if (structBasisBits->isOpaque()) {
225        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
226    }
227    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
228
229    std::vector<Type*>functionTypeArgs;
230    functionTypeArgs.push_back(mBasisBitsInputPtr);
231
232    //The carry q array.
233    //A pointer to the BitBlock vector.
234    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
235    // Advance q array
236    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
237
238    //The output structure.
239    StructType * outputStruct = mMod->getTypeByName("struct.Output");
240    if (!outputStruct) {
241        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
242    }
243    if (outputStruct->isOpaque()) {
244        std::vector<Type*>fields;
245        fields.push_back(mBitBlockType);
246        fields.push_back(mBitBlockType);
247        outputStruct->setBody(fields, /*isPacked=*/false);
248    }
249    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
250
251    //The &output parameter.
252    functionTypeArgs.push_back(outputStructPtr);
253
254    mFunctionType = FunctionType::get(
255     /*Result=*/Type::getVoidTy(mMod->getContext()),
256     /*Params=*/functionTypeArgs,
257     /*isVarArg=*/false);
258}
259
260void PabloCompiler::DeclareFunctions()
261{
262    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
263    //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mXi64Vect, NULL);
264    //mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mFunc_print_register), (void *)&wrapped_print_register);
265    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
266
267#ifdef USE_UADD_OVERFLOW
268#ifdef USE_TWO_UADD_OVERFLOW
269    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
270    std::vector<Type*>StructTy_0_fields;
271    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
272    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
273    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
274
275    std::vector<Type*>FuncTy_1_args;
276    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
277    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
278    FunctionType* FuncTy_1 = FunctionType::get(
279                                              /*Result=*/StructTy_0,
280                                              /*Params=*/FuncTy_1_args,
281                                              /*isVarArg=*/false);
282
283    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
284                                              std::to_string(BLOCK_SIZE));
285    if (!mFunctionUaddOverflow) {
286        mFunctionUaddOverflow= Function::Create(
287          /*Type=*/ FuncTy_1,
288          /*Linkage=*/ GlobalValue::ExternalLinkage,
289          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
290        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
291    }
292    AttributeSet mFunctionUaddOverflowPAL;
293    {
294        SmallVector<AttributeSet, 4> Attrs;
295        AttributeSet PAS;
296        {
297          AttrBuilder B;
298          B.addAttribute(Attribute::NoUnwind);
299          B.addAttribute(Attribute::ReadNone);
300          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
301        }
302
303        Attrs.push_back(PAS);
304        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
305    }
306    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
307#else
308    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
309    std::vector<Type*>StructTy_0_fields;
310    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
311    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
312    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
313
314    std::vector<Type*>FuncTy_1_args;
315    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
316    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
317    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
318    FunctionType* FuncTy_1 = FunctionType::get(
319                                              /*Result=*/StructTy_0,
320                                              /*Params=*/FuncTy_1_args,
321                                              /*isVarArg=*/false);
322
323    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
324                                              std::to_string(BLOCK_SIZE));
325    if (!mFunctionUaddOverflowCarryin) {
326        mFunctionUaddOverflowCarryin = Function::Create(
327          /*Type=*/ FuncTy_1,
328          /*Linkage=*/ GlobalValue::ExternalLinkage,
329          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
330        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
331    }
332    AttributeSet mFunctionUaddOverflowCarryinPAL;
333    {
334        SmallVector<AttributeSet, 4> Attrs;
335        AttributeSet PAS;
336        {
337          AttrBuilder B;
338          B.addAttribute(Attribute::NoUnwind);
339          B.addAttribute(Attribute::ReadNone);
340          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
341        }
342
343        Attrs.push_back(PAS);
344        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
345    }
346    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
347#endif
348#endif
349
350    //Starts on process_block
351    SmallVector<AttributeSet, 5> Attrs;
352    AttributeSet PAS;
353    {
354        AttrBuilder B;
355        B.addAttribute(Attribute::ReadOnly);
356        B.addAttribute(Attribute::NoCapture);
357        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
358    }
359    Attrs.push_back(PAS);
360    {
361        AttrBuilder B;
362        B.addAttribute(Attribute::NoCapture);
363        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
364    }
365    Attrs.push_back(PAS);
366    {
367        AttrBuilder B;
368        B.addAttribute(Attribute::NoCapture);
369        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
370    }
371    Attrs.push_back(PAS);
372    {
373        AttrBuilder B;
374        B.addAttribute(Attribute::NoCapture);
375        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
376    }
377    Attrs.push_back(PAS);
378    {
379        AttrBuilder B;
380        B.addAttribute(Attribute::NoUnwind);
381        B.addAttribute(Attribute::UWTable);
382        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
383    }
384    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
385
386    //Create the function that will be generated.
387    mFunction = mMod->getFunction("process_block");
388    if (!mFunction) {
389        mFunction = Function::Create(
390            /*Type=*/mFunctionType,
391            /*Linkage=*/GlobalValue::ExternalLinkage,
392            /*Name=*/"process_block", mMod);
393        mFunction->setCallingConv(CallingConv::C);
394    }
395    mFunction->setAttributes(AttrSet);
396}
397
398void PabloCompiler::Examine(StatementList & stmts) {
399    for (Statement * stmt : stmts) {
400        if (Assign * assign = dyn_cast<Assign>(stmt)) {
401            Examine(assign->getExpr());
402        }
403        if (Next * next = dyn_cast<Next>(stmt)) {
404            Examine(next->getExpr());
405        }
406        else if (If * ifStatement = dyn_cast<If>(stmt)) {
407            const auto preIfCarryCount = mCarryQueueSize;
408            const auto preIfAdvanceCount = mAdvanceQueueSize;
409            Examine(ifStatement->getCondition());
410            Examine(ifStatement->getBody());
411            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
412            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
413            if ((ifCarryCount + ifAdvanceCount) > 1) {
414              ++mAdvanceQueueSize;
415              ++ifAdvanceCount;
416            }
417            ifStatement->setInclusiveCarryCount(ifCarryCount);
418            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
419        }
420        else if (While * whileStatement = dyn_cast<While>(stmt)) {
421            const auto preWhileCarryCount = mCarryQueueSize;
422            const auto preWhileAdvanceCount = mAdvanceQueueSize;
423            Examine(whileStatement->getCondition());
424            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
425            Examine(whileStatement->getBody());
426            --mNestingDepth;
427            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
428            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
429        }
430    }
431}
432
433void PabloCompiler::Examine(PabloAST *expr)
434{
435    if (Call * call = dyn_cast<Call>(expr)) {
436        mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
437    }
438    else if (And * pablo_and = dyn_cast<And>(expr)) {
439        Examine(pablo_and->getExpr1());
440        Examine(pablo_and->getExpr2());
441    }
442    else if (Or * pablo_or = dyn_cast<Or>(expr)) {
443        Examine(pablo_or->getExpr1());
444        Examine(pablo_or->getExpr2());
445    }
446    else if (Sel * pablo_sel = dyn_cast<Sel>(expr)) {
447        Examine(pablo_sel->getCondition());
448        Examine(pablo_sel->getTrueExpr());
449        Examine(pablo_sel->getFalseExpr());
450    }
451    else if (Not * pablo_not = dyn_cast<Not>(expr)) {
452        Examine(pablo_not->getExpr());
453    }
454    else if (Advance * adv = dyn_cast<Advance>(expr)) {
455        ++mAdvanceQueueSize;
456        Examine(adv->getExpr());
457    }
458    else if (MatchStar * mstar = dyn_cast<MatchStar>(expr)) {
459        ++mCarryQueueSize;
460        Examine(mstar->getMarker());
461        Examine(mstar->getCharClass());
462    }
463    else if (ScanThru * sthru = dyn_cast<ScanThru>(expr)) {
464        ++mCarryQueueSize;
465        Examine(sthru->getScanFrom());
466        Examine(sthru->getScanThru());
467    }
468}
469
470void PabloCompiler::DeclareCallFunctions() {
471    for (auto mapping : mCalleeMap) {
472        const String * callee = mapping.first;
473        //std::cerr << callee->str() << " to be declared\n";
474        auto ei = mExternalMap.find(callee->str());
475        if (ei != mExternalMap.end()) {
476            void * fn_ptr = ei->second;
477            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
478            Value * externalValue = mMod->getOrInsertFunction(callee->str(), mBitBlockType, mBasisBitsInputPtr, NULL);
479            if (LLVM_UNLIKELY(externalValue == nullptr)) {
480                throw std::runtime_error("Could not create static method call for external function \"" + callee->str() + "\"");
481            }
482            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
483            mCalleeMap[callee] = externalValue;
484        }
485        else {
486            throw std::runtime_error("External function \"" + callee->str() + "\" not installed");
487        }
488    }
489}
490
491void PabloCompiler::compileStatements(const StatementList & stmts) {
492    for (const PabloAST * statement : stmts) {
493        compileStatement(statement);
494    }
495}
496
497void PabloCompiler::compileStatement(const PabloAST * stmt)
498{
499    if (const Assign * assign = dyn_cast<const Assign>(stmt))
500    {
501        Value* expr = compileExpression(assign->getExpr());
502        mMarkerMap[assign->getName()] = expr;
503        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
504            SetOutputValue(expr, assign->getOutputIndex());
505        }
506    }
507    if (const Next * next = dyn_cast<const Next>(stmt))
508    {
509        Value* expr = compileExpression(next->getExpr());
510        mMarkerMap[next->getName()] = expr;
511    }
512    else if (const If * ifStatement = dyn_cast<const If>(stmt))
513    //
514    //  The If-ElseZero stmt:
515    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
516    //  If the value of the predicate is nonzero, then determine the values of variables
517    //  <var>* by executing the given statements.  Otherwise, the value of the
518    //  variables are all zero.  Requirements: (a) no variable that is defined within
519    //  the body of the if may be accessed outside unless it is explicitly 
520    //  listed in the variable list, (b) every variable in the defined list receives
521    //  a value within the body, and (c) the logical consequence of executing
522    //  the statements in the event that the predicate is zero is that the
523    //  values of all defined variables indeed work out to be 0.
524    //
525    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
526    //  is inserted for each variable in the defined variable list.  It receives
527    //  a zero value from the ifentry block and the defined value from the if
528    //  body.
529    //
530    {
531        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
532        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
533        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
534       
535        const auto baseCarryQueueIdx = mCarryQueueIdx;
536        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
537       
538        int ifCarryCount = ifStatement->getInclusiveCarryCount();
539        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
540        //  Carry/Advance queue strategy.   
541        //  If there are any carries or advances at any nesting level within the
542        //  if statement, then the statement must be executed.   A "summary"
543        //  carryover variable is determined for this purpose, consisting of the
544        //  or of all of the carry and advance variables within the if.
545        //  This variable is determined as follows.
546        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
547        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
548        //       carryover variable is just the single carry queue entry.
549        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
550        //       carryover variable is just the advance carry queue entry.
551        //  (d)  Otherwise, an additional advance queue entry is created for the
552        //       summary variable.
553        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
554        //  variable is just last advance queue entry.
555        //
556       
557        IRBuilder<> b_entry(ifEntryBlock);
558        mBasicBlock = ifEntryBlock;
559        Value* if_test_value = compileExpression(ifStatement->getCondition());
560       
561        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
562            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
563            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
564        }
565        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
566            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
567            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
568        }
569        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
570
571        // Entry processing is complete, now handle the body of the if.
572       
573        mBasicBlock = ifBodyBlock;
574       
575        compileStatements(ifStatement->getBody());
576
577        // If we compiled an If or a While statement, we won't be in the same basic block as before.
578        // Create the branch from the current basic block to the end block.
579        IRBuilder<> bIfBody(mBasicBlock);
580        // After the recursive compile, now insert the code to compute the summary
581        // carry over variable.
582       
583        if ((ifCarryCount + ifAdvanceCount) > 1) {
584            // A summary variable is needed.
585
586            Value * carry_summary = mZeroInitializer;
587            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++)
588            {
589                Value* carryq_value = genCarryInLoad(c);
590                carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
591            }
592            // Note that the limit in the following uses -1, because
593            // last entry of the advance queue is for the summary variable.
594            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++)
595            {
596                Value* advance_q_value = genAdvanceInLoad(c);
597                carry_summary = bIfBody.CreateOr(advance_q_value, carry_summary);
598            }
599            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++); //baseAdvanceQueueIdx + ifAdvanceCount - 1);
600        }
601        bIfBody.CreateBr(ifEndBlock);
602        //End Block
603        IRBuilder<> bEnd(ifEndBlock);
604        for (const Assign * a : ifStatement->getDefined()) {
605            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, a->getName()->str());
606            auto f = mMarkerMap.find(a->getName());
607            assert (f != mMarkerMap.end());
608            phi->addIncoming(mZeroInitializer, ifEntryBlock);
609            phi->addIncoming(f->second, mBasicBlock);
610            mMarkerMap[a->getName()] = phi;
611        }
612        // Set the basic block to the new end block
613        mBasicBlock = ifEndBlock;
614    }
615    else if (const While * whileStatement = dyn_cast<const While>(stmt))
616    {
617        const auto baseCarryQueueIdx = mCarryQueueIdx;
618        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
619        if (mNestingDepth == 0) {
620            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
621                genCarryInLoad(baseCarryQueueIdx + i);
622            }
623            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
624                genAdvanceInLoad(baseAdvanceQueueIdx + i);
625            }
626        }
627
628        SmallVector<const Next*, 4> nextNodes;
629        for (const PabloAST * node : whileStatement->getBody()) {
630            if (isa<Next>(node)) {
631                nextNodes.push_back(cast<Next>(node));
632            }
633        }
634
635        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
636        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
637        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
638        // will refer to the previous value.
639
640        ++mNestingDepth;
641
642        compileStatements(whileStatement->getBody());
643
644        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
645        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
646        // but works for now.
647        mCarryQueueIdx = baseCarryQueueIdx;
648        mAdvanceQueueIdx = baseAdvanceQueueIdx;
649
650        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
651        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
652        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
653
654        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
655        // may not be same one that we entered the function with.
656        IRBuilder<> bEntry(mBasicBlock);
657        bEntry.CreateBr(whileCondBlock);
658
659        // CONDITION BLOCK
660        IRBuilder<> bCond(whileCondBlock);
661        // generate phi nodes for any carry propogating instruction
662        int whileCarryCount = whileStatement->getInclusiveCarryCount();
663        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
664        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
665        unsigned index = 0;
666        for (index = 0; index != whileCarryCount; ++index) {
667            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
668            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
669            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
670            phiNodes[index] = phi;
671        }
672        for (int i = 0; i != whileAdvanceCount; ++i) {
673            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
674            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
675            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
676            phiNodes[index++] = phi;
677        }
678        // and for any Next nodes in the loop body
679        for (const Next * n : nextNodes) {
680            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->str());
681            auto f = mMarkerMap.find(n->getName());
682            assert (f != mMarkerMap.end());
683            phi->addIncoming(f->second, mBasicBlock);
684            mMarkerMap[n->getName()] = phi;
685            phiNodes[index++] = phi;
686        }
687
688        mBasicBlock = whileCondBlock;
689        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
690
691        // BODY BLOCK
692        mBasicBlock = whileBodyBlock;
693        compileStatements(whileStatement->getBody());
694        // update phi nodes for any carry propogating instruction
695        IRBuilder<> bWhileBody(mBasicBlock);
696        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
697            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
698            PHINode * phi = phiNodes[index];
699            phi->addIncoming(carryOut, mBasicBlock);
700            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
701        }
702        for (int i = 0; i != whileAdvanceCount; ++i) {
703            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
704            PHINode * phi = phiNodes[index++];
705            phi->addIncoming(advOut, mBasicBlock);
706            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
707        }
708        // and for any Next nodes in the loop body
709        for (const Next * n : nextNodes) {
710            auto f = mMarkerMap.find(n->getName());
711            assert (f != mMarkerMap.end());
712            PHINode * phi = phiNodes[index++];
713            phi->addIncoming(f->second, mBasicBlock);
714            mMarkerMap[n->getName()] = phi;
715        }
716
717        bWhileBody.CreateBr(whileCondBlock);
718
719        // EXIT BLOCK
720        mBasicBlock = whileEndBlock;
721        if (--mNestingDepth == 0) {
722            for (index = 0; index != whileCarryCount; ++index) {
723                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
724            }
725            for (index = 0; index != whileAdvanceCount; ++index) {
726                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
727            }
728        }
729    }
730}
731
732Value * PabloCompiler::compileExpression(const PabloAST * expr)
733{
734    IRBuilder<> b(mBasicBlock);
735    if (isa<Ones>(expr)) {
736        return mOneInitializer;
737    }
738    else if (isa<Zeroes>(expr)) {
739        return mZeroInitializer;
740    }
741    else if (const Call* call = dyn_cast<Call>(expr)) {
742        //Call the callee once and store the result in the marker map.
743        auto mi = mMarkerMap.find(call->getCallee());
744        if (mi == mMarkerMap.end()) {
745            auto ci = mCalleeMap.find(call->getCallee());
746            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
747                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->str() + "\"");
748            }
749            mi = mMarkerMap.insert(std::make_pair(call->getCallee(), b.CreateCall(ci->second, mBasisBitsAddr))).first;
750        }
751        return mi->second;
752    }
753    else if (const Var * var = dyn_cast<Var>(expr))
754    {
755        auto f = mMarkerMap.find(var->getName());
756        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
757            throw std::runtime_error((var->getName()->str()) + " used before creation.");
758        }
759        return f->second;
760    }
761    else if (const And * pablo_and = dyn_cast<And>(expr))
762    {
763        return b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
764    }
765    else if (const Or * pablo_or = dyn_cast<Or>(expr))
766    {
767        return b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
768    }
769    else if (const Xor * pablo_xor = dyn_cast<Xor>(expr))
770    {
771        return b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
772    }
773    else if (const Sel * sel = dyn_cast<Sel>(expr))
774    {
775        Value* ifMask = compileExpression(sel->getCondition());
776        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
777        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
778        return b.CreateOr(ifTrue, ifFalse);
779    }
780    else if (const Not * pablo_not = dyn_cast<Not>(expr))
781    {
782        return genNot(compileExpression(pablo_not->getExpr()));
783    }
784    else if (const Advance * adv = dyn_cast<Advance>(expr))
785    {
786        Value* strm_value = compileExpression(adv->getExpr());
787        int shift = adv->getAdvanceAmount();
788        return genAdvanceWithCarry(strm_value, shift);
789    }
790    else if (const MatchStar * mstar = dyn_cast<MatchStar>(expr))
791    {
792        Value* marker = compileExpression(mstar->getMarker());
793        Value* cc = compileExpression(mstar->getCharClass());
794        Value* marker_and_cc = b.CreateAnd(marker, cc);
795        return b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
796    }
797    else if (const ScanThru * sthru = dyn_cast<ScanThru>(expr))
798    {
799        Value* marker_expr = compileExpression(sthru->getScanFrom());
800        Value* cc_expr = compileExpression(sthru->getScanThru());
801        return b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
802    }
803    else {
804        throw std::runtime_error("Unrecognized Pablo expression type; can't compile.");
805    }
806
807}
808
809#ifdef USE_UADD_OVERFLOW
810#ifdef USE_TWO_UADD_OVERFLOW
811PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
812    std::vector<Value*> struct_res_params;
813    struct_res_params.push_back(int128_e1);
814    struct_res_params.push_back(int128_e2);
815    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
816    struct_res->setCallingConv(CallingConv::C);
817    struct_res->setTailCall(false);
818    AttributeSet struct_res_PAL;
819    struct_res->setAttributes(struct_res_PAL);
820
821    SumWithOverflowPack ret;
822
823    std::vector<unsigned> int128_sum_indices;
824    int128_sum_indices.push_back(0);
825    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
826
827    std::vector<unsigned> int1_obit_indices;
828    int1_obit_indices.push_back(1);
829    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
830
831    return ret;
832}
833#else
834PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
835    std::vector<Value*> struct_res_params;
836    struct_res_params.push_back(int128_e1);
837    struct_res_params.push_back(int128_e2);
838    struct_res_params.push_back(int1_cin);
839    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
840    struct_res->setCallingConv(CallingConv::C);
841    struct_res->setTailCall(false);
842    AttributeSet struct_res_PAL;
843    struct_res->setAttributes(struct_res_PAL);
844
845    SumWithOverflowPack ret;
846
847    std::vector<unsigned> int128_sum_indices;
848    int128_sum_indices.push_back(0);
849    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
850
851    std::vector<unsigned> int1_obit_indices;
852    int1_obit_indices.push_back(1);
853    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
854
855    return ret;
856}
857#endif
858#endif
859
860Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
861    IRBuilder<> b(mBasicBlock);
862
863    //CarryQ - carry in.
864    const int carryIdx = mCarryQueueIdx++;
865    Value* carryq_value = genCarryInLoad(carryIdx);
866#ifdef USE_TWO_UADD_OVERFLOW
867    //This is the ideal implementation, which uses two uadd.with.overflow
868    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
869    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
870    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
871    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
872
873    SumWithOverflowPack sumpack0, sumpack1;
874
875    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
876    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
877
878    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
879    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
880
881    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
882    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
883    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
884    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
885    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
886
887#elif defined USE_UADD_OVERFLOW
888    //use llvm.uadd.with.overflow.i128 or i256
889    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
890    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
891
892    //get i1 carryin from iBLOCK_SIZE
893    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
894    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
895    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
896
897    SumWithOverflowPack sumpack0;
898    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
899    Value* obit = sumpack0.obit;
900    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
901
902    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
903    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
904    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
905    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
906#elif (BLOCK_SIZE == 128)
907    //calculate carry through logical ops
908    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
909    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
910    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
911    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
912    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
913    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
914
915    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
916    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
917#else
918    //BLOCK_SIZE == 256, there is no other implementation
919    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
920#endif //USE_TWO_UADD_OVERFLOW
921
922    genCarryOutStore(carry_out, carryIdx);
923    return sum;
924}
925
926Value* PabloCompiler::genCarryInLoad(const unsigned index) {
927    assert (index < mCarryQueueVector.size());
928    if (mNestingDepth == 0) {
929        IRBuilder<> b(mBasicBlock);
930        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
931    }
932    return mCarryQueueVector[index];
933}
934
935void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
936    assert (carryOut);
937    assert (index < mCarryQueueVector.size());
938    if (mNestingDepth == 0) {
939        IRBuilder<> b(mBasicBlock);
940        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
941    }
942    mCarryQueueVector[index] = carryOut;
943}
944
945Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
946    assert (index < mAdvanceQueueVector.size());
947    if (mNestingDepth == 0) {
948        IRBuilder<> b(mBasicBlock);
949        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
950    }
951    return mAdvanceQueueVector[index];
952}
953
954void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
955    assert (advanceOut);
956    assert (index < mAdvanceQueueVector.size());
957    if (mNestingDepth == 0) {
958        IRBuilder<> b(mBasicBlock);
959        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
960    }
961    mAdvanceQueueVector[index] = advanceOut;
962}
963
964inline Value* PabloCompiler::genBitBlockAny(Value* test) {
965    IRBuilder<> b(mBasicBlock);
966    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
967    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
968}
969
970Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
971    IRBuilder<> b(mBasicBlock);
972    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
973    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
974}
975
976Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
977    IRBuilder<> b(mBasicBlock);
978    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
979    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
980}
981
982inline Value* PabloCompiler::genNot(Value* expr) {
983    IRBuilder<> b(mBasicBlock);
984    return b.CreateXor(expr, mOneInitializer, "not");
985}
986
987Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
988
989    IRBuilder<> b(mBasicBlock);
990
991    const auto advanceIdx = mAdvanceQueueIdx++;
992#ifdef USE_LONG_INTEGER_SHIFT
993    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
994    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
995    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
996    Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
997    genAdvanceOutStore(strm_value, advanceIdx);
998
999    return result_value;
1000#elif (BLOCK_SIZE == 128)
1001    if (shift_amount == 1) {
1002        Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(advanceIdx));
1003        Value* srli_1_value = b.CreateLShr(strm_value, 63);
1004        Value* packed_shuffle;
1005        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1006        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1007        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1008
1009        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1010        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1011
1012        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1013        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1014
1015        //CarryQ - carry out:
1016        genAdvanceOutStore(strm_value, advanceIdx);
1017
1018        return result_value;
1019    }
1020    else if (shift_amount < 64) {
1021        // This is the preferred logic, but is too slow for the general case.
1022        // We need to speed up our custom LLVM for this code.
1023        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1024        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1025        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1026        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1027        genAdvanceOutStore(strm_value, advanceIdx);
1028
1029        return result_value;
1030    }
1031    else {//if (shift_amount >= 64) {
1032        throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
1033    }
1034#else
1035    //BLOCK_SIZE == 256
1036    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1037#endif //USE_LONG_INTEGER_SHIFT
1038}
1039
1040void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1041    IRBuilder<> b(mBasicBlock);
1042    if (marker->getType()->isPointerTy()) {
1043        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1044    }
1045    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1046    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1047    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1048}
1049
1050}
Note: See TracBrowser for help on using the repository browser.