source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4378

Last change on this file since 4378 was 4378, checked in by cameron, 5 years ago

Incremental step towards general property support

File size: 44.6 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7/*
8 *  Copyright (c) 2014 International Characters.
9 *  This software is licensed to the public under the Open Software License 3.0.
10 *  icgrep is a trademark of International Characters.
11 */
12
13#include <pablo/pablo_compiler.h>
14#include <pablo/codegenstate.h>
15#include <pablo/printer_pablos.h>
16#include <cc/cc_namemap.hpp>
17#include <re/re_name.h>
18#include <stdexcept>
19#include <include/simd-lib/bitblock.hpp>
20
21#ifdef USE_LLVM_3_4
22#include <llvm/Analysis/Verifier.h>
23#include <llvm/Assembly/PrintModulePass.h>
24#include <llvm/Linker.h>
25#endif
26#ifdef USE_LLVM_3_5
27#include <llvm/IR/Verifier.h>
28#endif
29
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/SmallVector.h>
33#include <llvm/Analysis/Passes.h>
34#include <llvm/IR/BasicBlock.h>
35#include <llvm/IR/CallingConv.h>
36#include <llvm/IR/Constants.h>
37#include <llvm/IR/DataLayout.h>
38#include <llvm/IR/DerivedTypes.h>
39#include <llvm/IR/Function.h>
40#include <llvm/IR/GlobalVariable.h>
41#include <llvm/IR/InlineAsm.h>
42#include <llvm/IR/Instructions.h>
43#include <llvm/IR/LLVMContext.h>
44#include <llvm/IR/Module.h>
45#include <llvm/Support/FormattedStream.h>
46#include <llvm/Support/MathExtras.h>
47#include <llvm/Support/Casting.h>
48#include <llvm/Support/Compiler.h>
49#include <llvm/Support/Debug.h>
50#include <llvm/Support/TargetSelect.h>
51#include <llvm/Support/Host.h>
52#include <llvm/Transforms/Scalar.h>
53#include <llvm/ExecutionEngine/ExecutionEngine.h>
54#include <llvm/ExecutionEngine/MCJIT.h>
55#include <llvm/IRReader/IRReader.h>
56#include <llvm/Bitcode/ReaderWriter.h>
57#include <llvm/Support/MemoryBuffer.h>
58#include <llvm/IR/IRBuilder.h>
59
60#include "llvm/Support/CommandLine.h"
61
62#include "unicode_categories.h"
63
64cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
65static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
66
67extern "C" {
68  void wrapped_print_register(BitBlock bit_block) {
69      print_register<BitBlock>("", bit_block);
70  }
71}
72
73namespace pablo {
74
75PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
76: mBasisBits(basisBits)
77, mMod(new Module("icgrep", getGlobalContext()))
78, mBasicBlock(nullptr)
79, mExecutionEngine(nullptr)
80, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
81, mBasisBitsInputPtr(nullptr)
82, mCarryQueueIdx(0)
83, mCarryQueuePtr(nullptr)
84, mNestingDepth(0)
85, mCarryQueueSize(0)
86, mAdvanceQueueIdx(0)
87, mAdvanceQueuePtr(nullptr)
88, mAdvanceQueueSize(0)
89, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
90, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
91, mFunctionType(nullptr)
92, mFunction(nullptr)
93, mBasisBitsAddr(nullptr)
94, mOutputAddrPtr(nullptr)
95, mMaxNestingDepth(0)
96{
97    //Create the jit execution engine.up
98    InitializeNativeTarget();
99    InitializeNativeTargetAsmPrinter();
100    InitializeNativeTargetAsmParser();
101    DefineTypes();
102    DeclareFunctions();
103}
104
105PabloCompiler::~PabloCompiler()
106{
107    delete mMod;
108}
109
110LLVM_Gen_RetVal PabloCompiler::compile(PabloBlock & pb)
111{
112    mNestingDepth = 0;
113    mMaxNestingDepth = 0;
114    mCarryQueueSize = 0;
115    mAdvanceQueueSize = 0;
116    Examine(pb.statements());
117    mCarryQueueVector.resize(mCarryQueueSize);
118    mAdvanceQueueVector.resize(mAdvanceQueueSize);
119    std::string errMessage;
120    EngineBuilder builder(mMod);
121    builder.setErrorStr(&errMessage);
122    builder.setMCPU(sys::getHostCPUName());
123    builder.setUseMCJIT(true);
124    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
125    mExecutionEngine = builder.create();
126    if (mExecutionEngine == nullptr) {
127        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
128    }
129
130    if (!mCalleeMap.empty()) {
131        DeclareCallFunctions();
132    }
133
134    Function::arg_iterator args = mFunction->arg_begin();
135    mBasisBitsAddr = args++;
136    mBasisBitsAddr->setName("basis_bits");
137    mCarryQueuePtr = args++;
138    mCarryQueuePtr->setName("carry_q");
139    mAdvanceQueuePtr = args++;
140    mAdvanceQueuePtr->setName("advance_q");
141    mOutputAddrPtr = args++;
142    mOutputAddrPtr->setName("output");
143
144    //Create the carry and advance queues.
145    mCarryQueueIdx = 0;
146    mAdvanceQueueIdx = 0;
147    mNestingDepth = 0;
148    mMaxNestingDepth = 0;
149    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
150
151    //The basis bits structure
152    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
153        IRBuilder<> b(mBasicBlock);
154        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
155        const String * const name = mBasisBits[i]->getName();
156        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
157        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, name->str());
158        mMarkerMap.insert(std::make_pair(name, basisBit));
159    }
160
161    //Generate the IR instructions for the function.
162    compileStatements(pb.statements());
163
164    assert (mCarryQueueIdx == mCarryQueueSize);
165    assert (mAdvanceQueueIdx == mAdvanceQueueSize);
166    assert (mNestingDepth == 0);
167    //Terminate the block
168    ReturnInst::Create(mMod->getContext(), mBasicBlock);
169
170    //Display the IR that has been generated by this module.
171    if (DumpGeneratedIR) {
172      mMod->dump();
173    }
174
175
176
177    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
178    #ifdef USE_LLVM_3_5
179    verifyModule(*mMod, &dbgs());
180    #endif
181    #ifdef USE_LLVM_3_4
182    verifyModule(*mMod, PrintMessageAction);
183    #endif
184
185    //Use the pass manager to run optimizations on the function.
186    FunctionPassManager fpm(mMod);
187 #ifdef USE_LLVM_3_5
188    mMod->setDataLayout(mExecutionEngine->getDataLayout());
189    // Set up the optimizer pipeline.  Start with registering info about how the target lays out data structures.
190    fpm.add(new DataLayoutPass(mMod));
191#endif
192#ifdef USE_LLVM_3_4
193    fpm.add(new DataLayout(*mExecutionEngine->getDataLayout()));
194#endif
195    fpm.doInitialization();
196    fpm.run(*mFunction);
197
198    mExecutionEngine->finalizeObject();
199
200    LLVM_Gen_RetVal retVal;
201    //Return the required size of the carry queue and a pointer to the process_block function.
202    retVal.carry_q_size = mCarryQueueVector.size();
203    retVal.advance_q_size = mAdvanceQueueVector.size();
204    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunction);
205
206    return retVal;
207}
208
209void PabloCompiler::DefineTypes()
210{
211    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
212    if (structBasisBits == nullptr) {
213        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
214    }
215    std::vector<Type*>StructTy_struct_Basis_bits_fields;
216    for (int i = 0; i != mBasisBits.size(); i++)
217    {
218        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
219    }
220    if (structBasisBits->isOpaque()) {
221        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
222    }
223    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
224
225    std::vector<Type*>functionTypeArgs;
226    functionTypeArgs.push_back(mBasisBitsInputPtr);
227
228    //The carry q array.
229    //A pointer to the BitBlock vector.
230    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
231    // Advance q array
232    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
233
234    //The output structure.
235    StructType * outputStruct = mMod->getTypeByName("struct.Output");
236    if (!outputStruct) {
237        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
238    }
239    if (outputStruct->isOpaque()) {
240        std::vector<Type*>fields;
241        fields.push_back(mBitBlockType);
242        fields.push_back(mBitBlockType);
243        outputStruct->setBody(fields, /*isPacked=*/false);
244    }
245    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
246
247    //The &output parameter.
248    functionTypeArgs.push_back(outputStructPtr);
249
250    mFunctionType = FunctionType::get(
251     /*Result=*/Type::getVoidTy(mMod->getContext()),
252     /*Params=*/functionTypeArgs,
253     /*isVarArg=*/false);
254}
255
256void PabloCompiler::DeclareFunctions()
257{
258    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
259    //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mXi64Vect, NULL);
260    //mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mFunc_print_register), (void *)&wrapped_print_register);
261    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
262
263#ifdef USE_UADD_OVERFLOW
264#ifdef USE_TWO_UADD_OVERFLOW
265    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
266    std::vector<Type*>StructTy_0_fields;
267    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
268    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
269    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
270
271    std::vector<Type*>FuncTy_1_args;
272    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
273    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
274    FunctionType* FuncTy_1 = FunctionType::get(
275                                              /*Result=*/StructTy_0,
276                                              /*Params=*/FuncTy_1_args,
277                                              /*isVarArg=*/false);
278
279    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
280                                              std::to_string(BLOCK_SIZE));
281    if (!mFunctionUaddOverflow) {
282        mFunctionUaddOverflow= Function::Create(
283          /*Type=*/ FuncTy_1,
284          /*Linkage=*/ GlobalValue::ExternalLinkage,
285          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
286        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
287    }
288    AttributeSet mFunctionUaddOverflowPAL;
289    {
290        SmallVector<AttributeSet, 4> Attrs;
291        AttributeSet PAS;
292        {
293          AttrBuilder B;
294          B.addAttribute(Attribute::NoUnwind);
295          B.addAttribute(Attribute::ReadNone);
296          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
297        }
298
299        Attrs.push_back(PAS);
300        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
301    }
302    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
303#else
304    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
305    std::vector<Type*>StructTy_0_fields;
306    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
307    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
308    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
309
310    std::vector<Type*>FuncTy_1_args;
311    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
312    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
313    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
314    FunctionType* FuncTy_1 = FunctionType::get(
315                                              /*Result=*/StructTy_0,
316                                              /*Params=*/FuncTy_1_args,
317                                              /*isVarArg=*/false);
318
319    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
320                                              std::to_string(BLOCK_SIZE));
321    if (!mFunctionUaddOverflowCarryin) {
322        mFunctionUaddOverflowCarryin = Function::Create(
323          /*Type=*/ FuncTy_1,
324          /*Linkage=*/ GlobalValue::ExternalLinkage,
325          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
326        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
327    }
328    AttributeSet mFunctionUaddOverflowCarryinPAL;
329    {
330        SmallVector<AttributeSet, 4> Attrs;
331        AttributeSet PAS;
332        {
333          AttrBuilder B;
334          B.addAttribute(Attribute::NoUnwind);
335          B.addAttribute(Attribute::ReadNone);
336          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
337        }
338
339        Attrs.push_back(PAS);
340        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
341    }
342    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
343#endif
344#endif
345
346    //Starts on process_block
347    SmallVector<AttributeSet, 5> Attrs;
348    AttributeSet PAS;
349    {
350        AttrBuilder B;
351        B.addAttribute(Attribute::ReadOnly);
352        B.addAttribute(Attribute::NoCapture);
353        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
354    }
355    Attrs.push_back(PAS);
356    {
357        AttrBuilder B;
358        B.addAttribute(Attribute::NoCapture);
359        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
360    }
361    Attrs.push_back(PAS);
362    {
363        AttrBuilder B;
364        B.addAttribute(Attribute::NoCapture);
365        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
366    }
367    Attrs.push_back(PAS);
368    {
369        AttrBuilder B;
370        B.addAttribute(Attribute::NoCapture);
371        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
372    }
373    Attrs.push_back(PAS);
374    {
375        AttrBuilder B;
376        B.addAttribute(Attribute::NoUnwind);
377        B.addAttribute(Attribute::UWTable);
378        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
379    }
380    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
381
382    //Create the function that will be generated.
383    mFunction = mMod->getFunction("process_block");
384    if (!mFunction) {
385        mFunction = Function::Create(
386            /*Type=*/mFunctionType,
387            /*Linkage=*/GlobalValue::ExternalLinkage,
388            /*Name=*/"process_block", mMod);
389        mFunction->setCallingConv(CallingConv::C);
390    }
391    mFunction->setAttributes(AttrSet);
392}
393
394void PabloCompiler::Examine(StatementList & stmts) {
395    for (Statement * stmt : stmts) {
396        if (Assign * assign = dyn_cast<Assign>(stmt)) {
397            Examine(assign->getExpr());
398        }
399        if (Next * next = dyn_cast<Next>(stmt)) {
400            Examine(next->getExpr());
401        }
402        else if (If * ifStatement = dyn_cast<If>(stmt)) {
403            const auto preIfCarryCount = mCarryQueueSize;
404            const auto preIfAdvanceCount = mAdvanceQueueSize;
405            Examine(ifStatement->getCondition());
406            Examine(ifStatement->getBody());
407            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
408            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
409            if ((ifCarryCount + ifAdvanceCount) > 1) {
410              ++mAdvanceQueueSize;
411              ++ifAdvanceCount;
412            }
413            ifStatement->setInclusiveCarryCount(ifCarryCount);
414            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
415        }
416        else if (While * whileStatement = dyn_cast<While>(stmt)) {
417            const auto preWhileCarryCount = mCarryQueueSize;
418            const auto preWhileAdvanceCount = mAdvanceQueueSize;
419            Examine(whileStatement->getCondition());
420            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
421            Examine(whileStatement->getBody());
422            --mNestingDepth;
423            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
424            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
425        }
426    }
427}
428
429void PabloCompiler::Examine(PabloAST *expr)
430{
431    if (Call * call = dyn_cast<Call>(expr)) {
432        mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
433    }
434    else if (And * pablo_and = dyn_cast<And>(expr)) {
435        Examine(pablo_and->getExpr1());
436        Examine(pablo_and->getExpr2());
437    }
438    else if (Or * pablo_or = dyn_cast<Or>(expr)) {
439        Examine(pablo_or->getExpr1());
440        Examine(pablo_or->getExpr2());
441    }
442    else if (Sel * pablo_sel = dyn_cast<Sel>(expr)) {
443        Examine(pablo_sel->getCondition());
444        Examine(pablo_sel->getTrueExpr());
445        Examine(pablo_sel->getFalseExpr());
446    }
447    else if (Not * pablo_not = dyn_cast<Not>(expr)) {
448        Examine(pablo_not->getExpr());
449    }
450    else if (Advance * adv = dyn_cast<Advance>(expr)) {
451        ++mAdvanceQueueSize;
452        Examine(adv->getExpr());
453    }
454    else if (MatchStar * mstar = dyn_cast<MatchStar>(expr)) {
455        ++mCarryQueueSize;
456        Examine(mstar->getMarker());
457        Examine(mstar->getCharClass());
458    }
459    else if (ScanThru * sthru = dyn_cast<ScanThru>(expr)) {
460        ++mCarryQueueSize;
461        Examine(sthru->getScanFrom());
462        Examine(sthru->getScanThru());
463    }
464}
465
466void PabloCompiler::DeclareCallFunctions() {
467    for (auto mapping : mCalleeMap) {
468        const String * callee = mapping.first;
469        void * callee_ptr = nullptr;
470        #define CHECK_GENERAL_CODE_CATEGORY(SUFFIX) \
471            if (callee->str() == #SUFFIX) { \
472                callee_ptr = (void*)&__get_gc_##SUFFIX; \
473            } else
474        CHECK_GENERAL_CODE_CATEGORY(Cc)
475        CHECK_GENERAL_CODE_CATEGORY(Cf)
476        CHECK_GENERAL_CODE_CATEGORY(Cn)
477        CHECK_GENERAL_CODE_CATEGORY(Co)
478        CHECK_GENERAL_CODE_CATEGORY(Cs)
479        CHECK_GENERAL_CODE_CATEGORY(Ll)
480        CHECK_GENERAL_CODE_CATEGORY(Lm)
481        CHECK_GENERAL_CODE_CATEGORY(Lo)
482        CHECK_GENERAL_CODE_CATEGORY(Lt)
483        CHECK_GENERAL_CODE_CATEGORY(Lu)
484        CHECK_GENERAL_CODE_CATEGORY(Mc)
485        CHECK_GENERAL_CODE_CATEGORY(Me)
486        CHECK_GENERAL_CODE_CATEGORY(Mn)
487        CHECK_GENERAL_CODE_CATEGORY(Nd)
488        CHECK_GENERAL_CODE_CATEGORY(Nl)
489        CHECK_GENERAL_CODE_CATEGORY(No)
490        CHECK_GENERAL_CODE_CATEGORY(Pc)
491        CHECK_GENERAL_CODE_CATEGORY(Pd)
492        CHECK_GENERAL_CODE_CATEGORY(Pe)
493        CHECK_GENERAL_CODE_CATEGORY(Pf)
494        CHECK_GENERAL_CODE_CATEGORY(Pi)
495        CHECK_GENERAL_CODE_CATEGORY(Po)
496        CHECK_GENERAL_CODE_CATEGORY(Ps)
497        CHECK_GENERAL_CODE_CATEGORY(Sc)
498        CHECK_GENERAL_CODE_CATEGORY(Sk)
499        CHECK_GENERAL_CODE_CATEGORY(Sm)
500        CHECK_GENERAL_CODE_CATEGORY(So)
501        CHECK_GENERAL_CODE_CATEGORY(Zl)
502        CHECK_GENERAL_CODE_CATEGORY(Zp)
503        CHECK_GENERAL_CODE_CATEGORY(Zs)
504        // OTHERWISE ...
505        throw std::runtime_error("Unknown unicode category \"" + callee->str() + "\"");
506        #undef CHECK_GENERAL_CODE_CATEGORY
507        Value * unicodeCategory = mMod->getOrInsertFunction("__get_gc_" + callee->str(), mBitBlockType, mBasisBitsInputPtr, NULL);
508        if (LLVM_UNLIKELY(unicodeCategory == nullptr)) {
509            throw std::runtime_error("Could not create static method call for unicode category \"" + callee->str() + "\"");
510        }
511        mExecutionEngine->addGlobalMapping(cast<GlobalValue>(unicodeCategory), callee_ptr);
512        mCalleeMap[callee] = unicodeCategory;
513    }
514}
515
516void PabloCompiler::compileStatements(const StatementList & stmts) {
517    for (const PabloAST * statement : stmts) {
518        compileStatement(statement);
519    }
520}
521
522void PabloCompiler::compileStatement(const PabloAST * stmt)
523{
524    if (const Assign * assign = dyn_cast<const Assign>(stmt))
525    {
526        Value* expr = compileExpression(assign->getExpr());
527        mMarkerMap[assign->getName()] = expr;
528        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
529            SetOutputValue(expr, assign->getOutputIndex());
530        }
531    }
532    if (const Next * next = dyn_cast<const Next>(stmt))
533    {
534        Value* expr = compileExpression(next->getExpr());
535        mMarkerMap[next->getName()] = expr;
536    }
537    else if (const If * ifStatement = dyn_cast<const If>(stmt))
538    //
539    //  The If-ElseZero stmt:
540    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
541    //  If the value of the predicate is nonzero, then determine the values of variables
542    //  <var>* by executing the given statements.  Otherwise, the value of the
543    //  variables are all zero.  Requirements: (a) no variable that is defined within
544    //  the body of the if may be accessed outside unless it is explicitly 
545    //  listed in the variable list, (b) every variable in the defined list receives
546    //  a value within the body, and (c) the logical consequence of executing
547    //  the statements in the event that the predicate is zero is that the
548    //  values of all defined variables indeed work out to be 0.
549    //
550    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
551    //  is inserted for each variable in the defined variable list.  It receives
552    //  a zero value from the ifentry block and the defined value from the if
553    //  body.
554    //
555    {
556        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
557        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
558        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
559       
560        const auto baseCarryQueueIdx = mCarryQueueIdx;
561        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
562       
563        int ifCarryCount = ifStatement->getInclusiveCarryCount();
564        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
565        //  Carry/Advance queue strategy.   
566        //  If there are any carries or advances at any nesting level within the
567        //  if statement, then the statement must be executed.   A "summary"
568        //  carryover variable is determined for this purpose, consisting of the
569        //  or of all of the carry and advance variables within the if.
570        //  This variable is determined as follows.
571        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
572        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
573        //       carryover variable is just the single carry queue entry.
574        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
575        //       carryover variable is just the advance carry queue entry.
576        //  (d)  Otherwise, an additional advance queue entry is created for the
577        //       summary variable.
578        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
579        //  variable is just last advance queue entry.
580        //
581       
582        IRBuilder<> b_entry(ifEntryBlock);
583        mBasicBlock = ifEntryBlock;
584        Value* if_test_value = compileExpression(ifStatement->getCondition());
585       
586        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
587            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
588            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
589        }
590        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
591            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
592            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
593        }
594        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
595
596        // Entry processing is complete, now handle the body of the if.
597       
598        mBasicBlock = ifBodyBlock;
599       
600        compileStatements(ifStatement->getBody());
601
602        // If we compiled an If or a While statement, we won't be in the same basic block as before.
603        // Create the branch from the current basic block to the end block.
604        IRBuilder<> bIfBody(mBasicBlock);
605        // After the recursive compile, now insert the code to compute the summary
606        // carry over variable.
607       
608        if ((ifCarryCount + ifAdvanceCount) > 1) {
609            // A summary variable is needed.
610
611            Value * carry_summary = mZeroInitializer;
612            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++)
613            {
614                Value* carryq_value = genCarryInLoad(c);
615                carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
616            }
617            // Note that the limit in the following uses -1, because
618            // last entry of the advance queue is for the summary variable.
619            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++)
620            {
621                Value* advance_q_value = genAdvanceInLoad(c);
622                carry_summary = bIfBody.CreateOr(advance_q_value, carry_summary);
623            }
624            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++); //baseAdvanceQueueIdx + ifAdvanceCount - 1);
625        }
626        bIfBody.CreateBr(ifEndBlock);
627        //End Block
628        IRBuilder<> bEnd(ifEndBlock);
629        for (const Assign * a : ifStatement->getDefined()) {
630            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, a->getName()->str());
631            auto f = mMarkerMap.find(a->getName());
632            assert (f != mMarkerMap.end());
633            phi->addIncoming(mZeroInitializer, ifEntryBlock);
634            phi->addIncoming(f->second, mBasicBlock);
635            mMarkerMap[a->getName()] = phi;
636        }
637        // Set the basic block to the new end block
638        mBasicBlock = ifEndBlock;
639    }
640    else if (const While * whileStatement = dyn_cast<const While>(stmt))
641    {
642        const auto baseCarryQueueIdx = mCarryQueueIdx;
643        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
644        if (mNestingDepth == 0) {
645            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
646                genCarryInLoad(baseCarryQueueIdx + i);
647            }
648            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
649                genAdvanceInLoad(baseAdvanceQueueIdx + i);
650            }
651        }
652
653        SmallVector<const Next*, 4> nextNodes;
654        for (const PabloAST * node : whileStatement->getBody()) {
655            if (isa<Next>(node)) {
656                nextNodes.push_back(cast<Next>(node));
657            }
658        }
659
660        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
661        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
662        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
663        // will refer to the previous value.
664
665        ++mNestingDepth;
666
667        compileStatements(whileStatement->getBody());
668
669        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
670        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
671        // but works for now.
672        mCarryQueueIdx = baseCarryQueueIdx;
673        mAdvanceQueueIdx = baseAdvanceQueueIdx;
674
675        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
676        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
677        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
678
679        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
680        // may not be same one that we entered the function with.
681        IRBuilder<> bEntry(mBasicBlock);
682        bEntry.CreateBr(whileCondBlock);
683
684        // CONDITION BLOCK
685        IRBuilder<> bCond(whileCondBlock);
686        // generate phi nodes for any carry propogating instruction
687        int whileCarryCount = whileStatement->getInclusiveCarryCount();
688        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
689        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
690        unsigned index = 0;
691        for (index = 0; index != whileCarryCount; ++index) {
692            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
693            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
694            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
695            phiNodes[index] = phi;
696        }
697        for (int i = 0; i != whileAdvanceCount; ++i) {
698            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
699            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
700            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
701            phiNodes[index++] = phi;
702        }
703        // and for any Next nodes in the loop body
704        for (const Next * n : nextNodes) {
705            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->str());
706            auto f = mMarkerMap.find(n->getName());
707            assert (f != mMarkerMap.end());
708            phi->addIncoming(f->second, mBasicBlock);
709            mMarkerMap[n->getName()] = phi;
710            phiNodes[index++] = phi;
711        }
712
713        mBasicBlock = whileCondBlock;
714        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
715
716        // BODY BLOCK
717        mBasicBlock = whileBodyBlock;
718        compileStatements(whileStatement->getBody());
719        // update phi nodes for any carry propogating instruction
720        IRBuilder<> bWhileBody(mBasicBlock);
721        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
722            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
723            PHINode * phi = phiNodes[index];
724            phi->addIncoming(carryOut, mBasicBlock);
725            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
726        }
727        for (int i = 0; i != whileAdvanceCount; ++i) {
728            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
729            PHINode * phi = phiNodes[index++];
730            phi->addIncoming(advOut, mBasicBlock);
731            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
732        }
733        // and for any Next nodes in the loop body
734        for (const Next * n : nextNodes) {
735            auto f = mMarkerMap.find(n->getName());
736            assert (f != mMarkerMap.end());
737            PHINode * phi = phiNodes[index++];
738            phi->addIncoming(f->second, mBasicBlock);
739            mMarkerMap[n->getName()] = phi;
740        }
741
742        bWhileBody.CreateBr(whileCondBlock);
743
744        // EXIT BLOCK
745        mBasicBlock = whileEndBlock;
746        if (--mNestingDepth == 0) {
747            for (index = 0; index != whileCarryCount; ++index) {
748                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
749            }
750            for (index = 0; index != whileAdvanceCount; ++index) {
751                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
752            }
753        }
754    }
755}
756
757Value * PabloCompiler::compileExpression(const PabloAST * expr)
758{
759    IRBuilder<> b(mBasicBlock);
760    if (isa<Ones>(expr)) {
761        return mOneInitializer;
762    }
763    else if (isa<Zeroes>(expr)) {
764        return mZeroInitializer;
765    }
766    else if (const Call* call = dyn_cast<Call>(expr)) {
767        //Call the callee once and store the result in the marker map.
768        auto mi = mMarkerMap.find(call->getCallee());
769        if (mi == mMarkerMap.end()) {
770            auto ci = mCalleeMap.find(call->getCallee());
771            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
772                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->str() + "\"");
773            }
774            mi = mMarkerMap.insert(std::make_pair(call->getCallee(), b.CreateCall(ci->second, mBasisBitsAddr))).first;
775        }
776        return mi->second;
777    }
778    else if (const Var * var = dyn_cast<Var>(expr))
779    {
780        auto f = mMarkerMap.find(var->getName());
781        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
782            throw std::runtime_error((var->getName()->str()) + " used before creation.");
783        }
784        return f->second;
785    }
786    else if (const And * pablo_and = dyn_cast<And>(expr))
787    {
788        return b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
789    }
790    else if (const Or * pablo_or = dyn_cast<Or>(expr))
791    {
792        return b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
793    }
794    else if (const Xor * pablo_xor = dyn_cast<Xor>(expr))
795    {
796        return b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
797    }
798    else if (const Sel * sel = dyn_cast<Sel>(expr))
799    {
800        Value* ifMask = compileExpression(sel->getCondition());
801        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
802        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
803        return b.CreateOr(ifTrue, ifFalse);
804    }
805    else if (const Not * pablo_not = dyn_cast<Not>(expr))
806    {
807        return genNot(compileExpression(pablo_not->getExpr()));
808    }
809    else if (const Advance * adv = dyn_cast<Advance>(expr))
810    {
811        Value* strm_value = compileExpression(adv->getExpr());
812        int shift = adv->getAdvanceAmount();
813        return genAdvanceWithCarry(strm_value, shift);
814    }
815    else if (const MatchStar * mstar = dyn_cast<MatchStar>(expr))
816    {
817        Value* marker = compileExpression(mstar->getMarker());
818        Value* cc = compileExpression(mstar->getCharClass());
819        Value* marker_and_cc = b.CreateAnd(marker, cc);
820        return b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
821    }
822    else if (const ScanThru * sthru = dyn_cast<ScanThru>(expr))
823    {
824        Value* marker_expr = compileExpression(sthru->getScanFrom());
825        Value* cc_expr = compileExpression(sthru->getScanThru());
826        return b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
827    }
828    else {
829        throw std::runtime_error("Unrecognized Pablo expression type; can't compile.");
830    }
831
832}
833
834#ifdef USE_UADD_OVERFLOW
835#ifdef USE_TWO_UADD_OVERFLOW
836PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
837    std::vector<Value*> struct_res_params;
838    struct_res_params.push_back(int128_e1);
839    struct_res_params.push_back(int128_e2);
840    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
841    struct_res->setCallingConv(CallingConv::C);
842    struct_res->setTailCall(false);
843    AttributeSet struct_res_PAL;
844    struct_res->setAttributes(struct_res_PAL);
845
846    SumWithOverflowPack ret;
847
848    std::vector<unsigned> int128_sum_indices;
849    int128_sum_indices.push_back(0);
850    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
851
852    std::vector<unsigned> int1_obit_indices;
853    int1_obit_indices.push_back(1);
854    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
855
856    return ret;
857}
858#else
859PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
860    std::vector<Value*> struct_res_params;
861    struct_res_params.push_back(int128_e1);
862    struct_res_params.push_back(int128_e2);
863    struct_res_params.push_back(int1_cin);
864    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
865    struct_res->setCallingConv(CallingConv::C);
866    struct_res->setTailCall(false);
867    AttributeSet struct_res_PAL;
868    struct_res->setAttributes(struct_res_PAL);
869
870    SumWithOverflowPack ret;
871
872    std::vector<unsigned> int128_sum_indices;
873    int128_sum_indices.push_back(0);
874    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
875
876    std::vector<unsigned> int1_obit_indices;
877    int1_obit_indices.push_back(1);
878    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
879
880    return ret;
881}
882#endif
883#endif
884
885Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
886    IRBuilder<> b(mBasicBlock);
887
888    //CarryQ - carry in.
889    const int carryIdx = mCarryQueueIdx++;
890    Value* carryq_value = genCarryInLoad(carryIdx);
891#ifdef USE_TWO_UADD_OVERFLOW
892    //This is the ideal implementation, which uses two uadd.with.overflow
893    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
894    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
895    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
896    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
897
898    SumWithOverflowPack sumpack0, sumpack1;
899
900    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
901    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
902
903    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
904    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
905
906    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
907    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
908    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
909    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
910    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
911
912#elif defined USE_UADD_OVERFLOW
913    //use llvm.uadd.with.overflow.i128 or i256
914    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
915    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
916
917    //get i1 carryin from iBLOCK_SIZE
918    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
919    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
920    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
921
922    SumWithOverflowPack sumpack0;
923    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
924    Value* obit = sumpack0.obit;
925    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
926
927    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
928    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
929    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
930    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
931#elif (BLOCK_SIZE == 128)
932    //calculate carry through logical ops
933    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
934    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
935    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
936    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
937    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
938    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
939
940    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
941    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
942#else
943    //BLOCK_SIZE == 256, there is no other implementation
944    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
945#endif //USE_TWO_UADD_OVERFLOW
946
947    genCarryOutStore(carry_out, carryIdx);
948    return sum;
949}
950
951Value* PabloCompiler::genCarryInLoad(const unsigned index) {
952    assert (index < mCarryQueueVector.size());
953    if (mNestingDepth == 0) {
954        IRBuilder<> b(mBasicBlock);
955        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
956    }
957    return mCarryQueueVector[index];
958}
959
960void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
961    assert (carryOut);
962    assert (index < mCarryQueueVector.size());
963    if (mNestingDepth == 0) {
964        IRBuilder<> b(mBasicBlock);
965        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
966    }
967    mCarryQueueVector[index] = carryOut;
968}
969
970Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
971    assert (index < mAdvanceQueueVector.size());
972    if (mNestingDepth == 0) {
973        IRBuilder<> b(mBasicBlock);
974        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
975    }
976    return mAdvanceQueueVector[index];
977}
978
979void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
980    assert (advanceOut);
981    assert (index < mAdvanceQueueVector.size());
982    if (mNestingDepth == 0) {
983        IRBuilder<> b(mBasicBlock);
984        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
985    }
986    mAdvanceQueueVector[index] = advanceOut;
987}
988
989inline Value* PabloCompiler::genBitBlockAny(Value* test) {
990    IRBuilder<> b(mBasicBlock);
991    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
992    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
993}
994
995Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
996    IRBuilder<> b(mBasicBlock);
997    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
998    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
999}
1000
1001Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
1002    IRBuilder<> b(mBasicBlock);
1003    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1004    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
1005}
1006
1007inline Value* PabloCompiler::genNot(Value* expr) {
1008    IRBuilder<> b(mBasicBlock);
1009    return b.CreateXor(expr, mOneInitializer, "not");
1010}
1011
1012Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
1013
1014    IRBuilder<> b(mBasicBlock);
1015
1016    const auto advanceIdx = mAdvanceQueueIdx++;
1017#ifdef USE_LONG_INTEGER_SHIFT
1018    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1019    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1020    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1021    Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1022    genAdvanceOutStore(strm_value, advanceIdx);
1023
1024    return result_value;
1025#elif (BLOCK_SIZE == 128)
1026    if (shift_amount == 1) {
1027        Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(advanceIdx));
1028        Value* srli_1_value = b.CreateLShr(strm_value, 63);
1029        Value* packed_shuffle;
1030        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1031        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1032        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1033
1034        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1035        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1036
1037        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1038        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1039
1040        //CarryQ - carry out:
1041        genAdvanceOutStore(strm_value, advanceIdx);
1042
1043        return result_value;
1044    }
1045    else if (shift_amount < 64) {
1046        // This is the preferred logic, but is too slow for the general case.
1047        // We need to speed up our custom LLVM for this code.
1048        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1049        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1050        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1051        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1052        genAdvanceOutStore(strm_value, advanceIdx);
1053
1054        return result_value;
1055    }
1056    else {//if (shift_amount >= 64) {
1057        throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
1058    }
1059#else
1060    //BLOCK_SIZE == 256
1061    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1062#endif //USE_LONG_INTEGER_SHIFT
1063}
1064
1065void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1066    IRBuilder<> b(mBasicBlock);
1067    if (marker->getType()->isPointerTy()) {
1068        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1069    }
1070    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1071    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1072    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1073}
1074
1075}
Note: See TracBrowser for help on using the repository browser.