source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4539

Last change on this file since 4539 was 4539, checked in by cameron, 4 years ago

Slight refactoring to have access to PabloBlock? during Examine/compile

File size: 47.3 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52extern "C" {
53  void wrapped_print_register(BitBlock bit_block) {
54      print_register<BitBlock>("", bit_block);
55  }
56}
57
58namespace pablo {
59
60PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
61: mBasisBits(basisBits)
62, mMod(new Module("icgrep", getGlobalContext()))
63, mBasicBlock(nullptr)
64, mExecutionEngine(nullptr)
65, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
66, mBasisBitsInputPtr(nullptr)
67, mCarryQueueIdx(0)
68, mCarryDataPtr(nullptr)
69, mNestingDepth(0)
70, mCarryQueueSize(0)
71, mAdvanceQueueIdx(0)
72, mAdvanceQueueSize(0)
73, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
74, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
75, mFunctionType(nullptr)
76, mFunction(nullptr)
77, mBasisBitsAddr(nullptr)
78, mOutputAddrPtr(nullptr)
79, mMaxNestingDepth(0)
80, mPrintRegisterFunction(nullptr)
81{
82    //Create the jit execution engine.up
83    InitializeNativeTarget();
84    InitializeNativeTargetAsmPrinter();
85    InitializeNativeTargetAsmParser();
86    DefineTypes();
87}
88
89PabloCompiler::~PabloCompiler()
90{
91
92}
93   
94void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
95    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
96}
97
98
99CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
100{
101    mNestingDepth = 0;
102    mMaxNestingDepth = 0;
103    mCarryQueueSize = 0;
104    mAdvanceQueueSize = 0;
105    Examine(pb);
106    mCarryQueueVector.resize(mCarryQueueSize);
107    mAdvanceQueueVector.resize(mAdvanceQueueSize);
108    mCarryQueueSummaryIdx.resize(mCarryQueueSize);
109    mAdvanceQueueSummaryIdx.resize(mAdvanceQueueSize);
110    std::string errMessage;
111    EngineBuilder builder(mMod);
112    builder.setErrorStr(&errMessage);
113    builder.setMCPU(sys::getHostCPUName());
114    builder.setUseMCJIT(true);
115    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
116    mExecutionEngine = builder.create();
117    if (mExecutionEngine == nullptr) {
118        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
119    }
120    DeclareFunctions();
121
122    DeclareCallFunctions();
123
124    Function::arg_iterator args = mFunction->arg_begin();
125    mBasisBitsAddr = args++;
126    mBasisBitsAddr->setName("basis_bits");
127    mCarryDataPtr = args++;
128    mCarryDataPtr->setName("carry_data");
129    mOutputAddrPtr = args++;
130    mOutputAddrPtr->setName("output");
131
132    //Create the carry and advance queues.
133    mCarryQueueIdx = 0;
134    mAdvanceQueueIdx = 0;
135    mNestingDepth = 0;
136    mMaxNestingDepth = 0;
137    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
138
139    //The basis bits structure
140    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
141        IRBuilder<> b(mBasicBlock);
142        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
143        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
144        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
145        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
146    }
147
148    //Generate the IR instructions for the function.
149    compileBlock(pb);
150
151    if (LLVM_UNLIKELY(mCarryQueueIdx != mCarryQueueSize)) {
152        throw std::runtime_error("Actual carry queue size (" + std::to_string(mCarryQueueIdx) + ") does not match expected (" + std::to_string(mCarryQueueSize) + ")");
153    }
154    if (LLVM_UNLIKELY(mAdvanceQueueIdx != mAdvanceQueueSize)) {
155        throw std::runtime_error("Actual advance queue size (" + std::to_string(mAdvanceQueueIdx) + ") does not match expected (" + std::to_string(mAdvanceQueueSize) + ")");
156    }
157    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
158        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
159    }
160
161    //Terminate the block
162    ReturnInst::Create(mMod->getContext(), mBasicBlock);
163
164    //Display the IR that has been generated by this module.
165    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
166        mMod->dump();
167    }
168    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
169    verifyModule(*mMod, &dbgs());
170
171    mExecutionEngine->finalizeObject();
172
173    //Return the required size of the carry data area to the process_block function.
174    return CompiledPabloFunction((mCarryQueueSize + mAdvanceQueueSize) * sizeof(BitBlock), mFunction, mExecutionEngine);
175}
176
177void PabloCompiler::DefineTypes()
178{
179    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
180    if (structBasisBits == nullptr) {
181        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
182    }
183    std::vector<Type*>StructTy_struct_Basis_bits_fields;
184    for (int i = 0; i != mBasisBits.size(); i++)
185    {
186        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
187    }
188    if (structBasisBits->isOpaque()) {
189        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
190    }
191    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
192
193    std::vector<Type*>functionTypeArgs;
194    functionTypeArgs.push_back(mBasisBitsInputPtr);
195
196    //The carry data array.
197    //A pointer to the BitBlock vector.
198    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
199
200    //The output structure.
201    StructType * outputStruct = mMod->getTypeByName("struct.Output");
202    if (!outputStruct) {
203        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
204    }
205    if (outputStruct->isOpaque()) {
206        std::vector<Type*>fields;
207        fields.push_back(mBitBlockType);
208        fields.push_back(mBitBlockType);
209        outputStruct->setBody(fields, /*isPacked=*/false);
210    }
211    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
212
213    //The &output parameter.
214    functionTypeArgs.push_back(outputStructPtr);
215
216    mFunctionType = FunctionType::get(
217     /*Result=*/Type::getVoidTy(mMod->getContext()),
218     /*Params=*/functionTypeArgs,
219     /*isVarArg=*/false);
220}
221
222void PabloCompiler::DeclareFunctions()
223{
224    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
225    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mBitBlockType, NULL);
226    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
227    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
228
229#ifdef USE_UADD_OVERFLOW
230#ifdef USE_TWO_UADD_OVERFLOW
231    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
232    std::vector<Type*>StructTy_0_fields;
233    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
234    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
235    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
236
237    std::vector<Type*>FuncTy_1_args;
238    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
239    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
240    FunctionType* FuncTy_1 = FunctionType::get(
241                                              /*Result=*/StructTy_0,
242                                              /*Params=*/FuncTy_1_args,
243                                              /*isVarArg=*/false);
244
245    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
246                                              std::to_string(BLOCK_SIZE));
247    if (!mFunctionUaddOverflow) {
248        mFunctionUaddOverflow= Function::Create(
249          /*Type=*/ FuncTy_1,
250          /*Linkage=*/ GlobalValue::ExternalLinkage,
251          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
252        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
253    }
254    AttributeSet mFunctionUaddOverflowPAL;
255    {
256        SmallVector<AttributeSet, 4> Attrs;
257        AttributeSet PAS;
258        {
259          AttrBuilder B;
260          B.addAttribute(Attribute::NoUnwind);
261          B.addAttribute(Attribute::ReadNone);
262          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
263        }
264
265        Attrs.push_back(PAS);
266        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
267    }
268    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
269#else
270    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
271    std::vector<Type*>StructTy_0_fields;
272    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
273    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
274    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
275
276    std::vector<Type*>FuncTy_1_args;
277    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
278    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
279    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
280    FunctionType* FuncTy_1 = FunctionType::get(
281                                              /*Result=*/StructTy_0,
282                                              /*Params=*/FuncTy_1_args,
283                                              /*isVarArg=*/false);
284
285    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
286                                              std::to_string(BLOCK_SIZE));
287    if (!mFunctionUaddOverflowCarryin) {
288        mFunctionUaddOverflowCarryin = Function::Create(
289          /*Type=*/ FuncTy_1,
290          /*Linkage=*/ GlobalValue::ExternalLinkage,
291          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
292        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
293    }
294    AttributeSet mFunctionUaddOverflowCarryinPAL;
295    {
296        SmallVector<AttributeSet, 4> Attrs;
297        AttributeSet PAS;
298        {
299          AttrBuilder B;
300          B.addAttribute(Attribute::NoUnwind);
301          B.addAttribute(Attribute::ReadNone);
302          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
303        }
304
305        Attrs.push_back(PAS);
306        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
307    }
308    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
309#endif
310#endif
311
312    //Starts on process_block
313    SmallVector<AttributeSet, 4> Attrs;
314    AttributeSet PAS;
315    {
316        AttrBuilder B;
317        B.addAttribute(Attribute::ReadOnly);
318        B.addAttribute(Attribute::NoCapture);
319        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
320    }
321    Attrs.push_back(PAS);
322    {
323        AttrBuilder B;
324        B.addAttribute(Attribute::NoCapture);
325        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
326    }
327    Attrs.push_back(PAS);
328    {
329        AttrBuilder B;
330        B.addAttribute(Attribute::NoCapture);
331        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
332    }
333    Attrs.push_back(PAS);
334    {
335        AttrBuilder B;
336        B.addAttribute(Attribute::NoUnwind);
337        B.addAttribute(Attribute::UWTable);
338        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
339    }
340    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
341
342    //Create the function that will be generated.
343    mFunction = mMod->getFunction("process_block");
344    if (!mFunction) {
345        mFunction = Function::Create(
346            /*Type=*/mFunctionType,
347            /*Linkage=*/GlobalValue::ExternalLinkage,
348            /*Name=*/"process_block", mMod);
349        mFunction->setCallingConv(CallingConv::C);
350    }
351    mFunction->setAttributes(AttrSet);
352}
353
354//
355// CarryNumbering: sequential numbers associated with each
356// carry-generating operation encountered in a traversal of the
357// Pablo AST.    Carry-generating operations are MatchStar, ScanThru,
358// and so on.
359// AdvanceNumbering: sequential numbers associated with each Advance
360// operation encountered in tree traversal, with the following modifications.
361//   (a) an additional AdvanceQueue entry is created for each if-statement
362//       having more than one carry or advance opreation within it.  This
363//       additional entry is a summary entry which must be nonzero to
364//       indicate that there are carry or advance bits associated with
365//       any operation within the if-structure (at any nesting level).
366//   (b) advancing by a large amount may require multiple advance entries.
367//       the number of advance entries for an operation Adv(x, n) is
368//       (n - 1) / BLOCK_SIZE + 1
369//
370// Note that the initial carry/advance numbering is determined by the
371// Examine function.  The values determined at this stage must be consistent
372// with the later numbering calculated during actual statement compilation.
373//
374// Examine precomputes some CarryNumbering and AdvanceNumbering, as
375// well as mMaxNestingDepth of while loops.
376//
377void PabloCompiler::Examine(PabloBlock & blk) {
378    // Count local carries and advances at this level.
379    unsigned localCarries = 0;
380    unsigned localAdvances = 0;
381    for (Statement * stmt : blk) {
382        if (Advance * adv = dyn_cast<Advance>(stmt)) {
383            localAdvances += (adv->getAdvanceAmount() + BLOCK_SIZE - 1) / BLOCK_SIZE;
384        }
385        else if (isa<MatchStar>(stmt) || isa<ScanThru>(stmt)) {
386            ++localCarries;
387        }
388    }
389    mCarryQueueSize += localCarries;
390    mAdvanceQueueSize += localAdvances;
391    for (Statement * stmt : blk) {
392        if (Call * call = dyn_cast<Call>(stmt)) {
393            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
394        }
395        else if (If * ifStatement = dyn_cast<If>(stmt)) {
396            const auto preIfCarryCount = mCarryQueueSize;
397            const auto preIfAdvanceCount = mAdvanceQueueSize;
398            Examine(ifStatement->getBody());
399            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
400            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
401            if ((ifCarryCount + ifAdvanceCount) > 1) {
402                ++mAdvanceQueueSize;
403                ++ifAdvanceCount;
404            }
405            ifStatement->setInclusiveCarryCount(ifCarryCount);
406            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
407        }
408        else if (While * whileStatement = dyn_cast<While>(stmt)) {
409            const auto preWhileCarryCount = mCarryQueueSize;
410            const auto preWhileAdvanceCount = mAdvanceQueueSize;
411            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
412            Examine(whileStatement->getBody());
413            --mNestingDepth;
414            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
415            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
416        }
417    }
418}
419
420void PabloCompiler::DeclareCallFunctions() {
421    for (auto mapping : mCalleeMap) {
422        const String * callee = mapping.first;
423        //std::cerr << callee->str() << " to be declared\n";
424        auto ei = mExternalMap.find(callee->value());
425        if (ei != mExternalMap.end()) {
426            void * fn_ptr = ei->second;
427            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
428            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
429            if (LLVM_UNLIKELY(externalValue == nullptr)) {
430                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
431            }
432            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
433            mCalleeMap[callee] = externalValue;
434        }
435        else {
436            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
437        }
438    }
439}
440
441void PabloCompiler::compileBlock(const PabloBlock & blk) {
442    for (const Statement * statement : blk) {
443        compileStatement(statement);
444    }
445}
446
447void PabloCompiler::compileIf(const If * ifStatement) {
448        //
449        //  The If-ElseZero stmt:
450        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
451        //  If the value of the predicate is nonzero, then determine the values of variables
452        //  <var>* by executing the given statements.  Otherwise, the value of the
453        //  variables are all zero.  Requirements: (a) no variable that is defined within
454        //  the body of the if may be accessed outside unless it is explicitly
455        //  listed in the variable list, (b) every variable in the defined list receives
456        //  a value within the body, and (c) the logical consequence of executing
457        //  the statements in the event that the predicate is zero is that the
458        //  values of all defined variables indeed work out to be 0.
459        //
460        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
461        //  is inserted for each variable in the defined variable list.  It receives
462        //  a zero value from the ifentry block and the defined value from the if
463        //  body.
464        //
465        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
466        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
467        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
468       
469        const auto baseCarryQueueIdx = mCarryQueueIdx;
470        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
471       
472        int ifCarryCount = ifStatement->getInclusiveCarryCount();
473        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
474        //  Carry/Advance queue strategy.   
475        //  If there are any carries or advances at any nesting level within the
476        //  if statement, then the statement must be executed.   A "summary"
477        //  carryover variable is determined for this purpose, consisting of the
478        //  or of all of the carry and advance variables within the if.
479        //  This variable is determined as follows.
480        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
481        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
482        //       carryover variable is just the single carry queue entry.
483        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
484        //       carryover variable is just the advance carry queue entry.
485        //  (d)  Otherwise, an additional advance queue entry is created for the
486        //       summary variable.
487        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
488        //  variable is just last advance queue entry.
489        //
490       
491        IRBuilder<> b_entry(ifEntryBlock);
492        mBasicBlock = ifEntryBlock;
493        Value* if_test_value = compileExpression(ifStatement->getCondition());
494       
495        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
496            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
497            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
498        }
499        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
500            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
501            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
502        }
503        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
504
505        // Entry processing is complete, now handle the body of the if.
506        mBasicBlock = ifBodyBlock;
507        compileBlock(ifStatement->getBody());
508
509        // If we compiled an If or a While statement, we won't be in the same basic block as before.
510        // Create the branch from the current basic block to the end block.
511        IRBuilder<> bIfBody(mBasicBlock);
512        // After the recursive compile, now insert the code to compute the summary
513        // carry over variable.
514       
515        if ((ifCarryCount + ifAdvanceCount) > 1) {
516            // A summary variable is needed.
517
518            Value * carry_summary = mZeroInitializer;
519            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++) {
520                int s = mCarryQueueSummaryIdx[c];
521                if (s == -1) {
522                    Value* carryq_value = mCarryQueueVector[c];
523                    if (carry_summary == mZeroInitializer) {
524                        carry_summary = carryq_value;
525                    }
526                    else {
527                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
528                    }
529                    mCarryQueueSummaryIdx[c] = mAdvanceQueueIdx;
530                }
531            }
532            // Note that the limit in the following uses -1, because
533            // last entry of the advance queue is for the summary variable.
534            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++) {
535                int s = mAdvanceQueueSummaryIdx[c];
536                if (s == -1 ) {
537                    Value* advance_q_value = mAdvanceQueueVector[c];
538                    if (carry_summary == mZeroInitializer) {
539                        carry_summary = advance_q_value;
540                    }
541                    else {
542                        carry_summary = bIfBody.CreateOr(carry_summary, advance_q_value);
543                    }
544                    mAdvanceQueueSummaryIdx[c] = mAdvanceQueueIdx;
545                }
546            }
547            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++);
548        }
549        bIfBody.CreateBr(ifEndBlock);
550        //End Block
551        IRBuilder<> bEnd(ifEndBlock);
552        for (const PabloAST * node : ifStatement->getDefined()) {
553            const Assign * assign = cast<Assign>(node);
554            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
555            auto f = mMarkerMap.find(assign);
556            assert (f != mMarkerMap.end());
557            phi->addIncoming(mZeroInitializer, ifEntryBlock);
558            phi->addIncoming(f->second, mBasicBlock);
559            mMarkerMap[assign] = phi;
560        }
561        // Create the phi Node for the summary variable.
562        if (ifAdvanceCount >= 1) {
563            // final AdvanceQ entry is summary variable.
564            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
565            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
566            summary_phi->addIncoming(mAdvanceQueueVector[mAdvanceQueueIdx-1], mBasicBlock);
567            mAdvanceQueueVector[mAdvanceQueueIdx-1] = summary_phi;
568        }
569        else if (ifCarryCount == 1) {
570            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
571            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
572            summary_phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx], mBasicBlock);
573            mCarryQueueVector[baseCarryQueueIdx] = summary_phi;
574        }
575       
576        // Set the basic block to the new end block
577        mBasicBlock = ifEndBlock;
578}
579
580void PabloCompiler::compileWhile(const While * whileStatement) {
581        const auto baseCarryQueueIdx = mCarryQueueIdx;
582        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
583        if (mNestingDepth == 0) {
584            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
585                genCarryInLoad(baseCarryQueueIdx + i);
586            }
587            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
588                genAdvanceInLoad(baseAdvanceQueueIdx + i);
589            }
590        }
591
592        SmallVector<const Next*, 4> nextNodes;
593        for (const PabloAST * node : whileStatement->getBody()) {
594            if (isa<Next>(node)) {
595                nextNodes.push_back(cast<Next>(node));
596            }
597        }
598
599        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
600        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
601        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
602        // will refer to the previous value.
603
604        ++mNestingDepth;
605
606        compileBlock(whileStatement->getBody());
607
608        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
609        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
610        // but works for now.
611        mCarryQueueIdx = baseCarryQueueIdx;
612        mAdvanceQueueIdx = baseAdvanceQueueIdx;
613
614        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
615        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
616        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
617
618        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
619        // may not be same one that we entered the function with.
620        IRBuilder<> bEntry(mBasicBlock);
621        bEntry.CreateBr(whileCondBlock);
622
623        // CONDITION BLOCK
624        IRBuilder<> bCond(whileCondBlock);
625        // generate phi nodes for any carry propogating instruction
626        int whileCarryCount = whileStatement->getInclusiveCarryCount();
627        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
628        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
629        unsigned index = 0;
630        for (index = 0; index != whileCarryCount; ++index) {
631            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
632            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
633            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
634            phiNodes[index] = phi;
635        }
636        for (int i = 0; i != whileAdvanceCount; ++i) {
637            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
638            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
639            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
640            phiNodes[index++] = phi;
641        }
642        // and for any Next nodes in the loop body
643        for (const Next * n : nextNodes) {
644            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
645            auto f = mMarkerMap.find(n->getInitial());
646            assert (f != mMarkerMap.end());
647            phi->addIncoming(f->second, mBasicBlock);
648            mMarkerMap[n->getInitial()] = phi;
649            phiNodes[index++] = phi;
650        }
651
652        mBasicBlock = whileCondBlock;
653        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
654
655        // BODY BLOCK
656        mBasicBlock = whileBodyBlock;
657        compileBlock(whileStatement->getBody());
658        // update phi nodes for any carry propogating instruction
659        IRBuilder<> bWhileBody(mBasicBlock);
660        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
661            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
662            PHINode * phi = phiNodes[index];
663            phi->addIncoming(carryOut, mBasicBlock);
664            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
665        }
666        for (int i = 0; i != whileAdvanceCount; ++i) {
667            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
668            PHINode * phi = phiNodes[index++];
669            phi->addIncoming(advOut, mBasicBlock);
670            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
671        }
672        // and for any Next nodes in the loop body
673        for (const Next * n : nextNodes) {
674            auto f = mMarkerMap.find(n->getInitial());
675            assert (f != mMarkerMap.end());
676            PHINode * phi = phiNodes[index++];
677            phi->addIncoming(f->second, mBasicBlock);
678            mMarkerMap[n->getInitial()] = phi;
679        }
680
681        bWhileBody.CreateBr(whileCondBlock);
682
683        // EXIT BLOCK
684        mBasicBlock = whileEndBlock;
685        if (--mNestingDepth == 0) {
686            for (index = 0; index != whileCarryCount; ++index) {
687                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
688            }
689            for (index = 0; index != whileAdvanceCount; ++index) {
690                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
691            }
692        }
693 
694}
695
696void PabloCompiler::compileStatement(const Statement * stmt)
697{
698    IRBuilder<> b(mBasicBlock);
699    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
700        Value * expr = compileExpression(assign->getExpr());
701        mMarkerMap[assign] = expr;
702        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
703            SetOutputValue(expr, assign->getOutputIndex());
704        }
705    }
706    else if (const Next * next = dyn_cast<const Next>(stmt)) {
707        Value * expr = compileExpression(next->getExpr());
708        mMarkerMap[next->getInitial()] = expr;
709    }
710    else if (const If * ifStatement = dyn_cast<const If>(stmt))
711    {
712        compileIf(ifStatement);
713    }
714    else if (const While * whileStatement = dyn_cast<const While>(stmt))
715    {
716        compileWhile(whileStatement);
717    }
718    else if (const Call* call = dyn_cast<Call>(stmt)) {
719        //Call the callee once and store the result in the marker map.
720        auto mi = mMarkerMap.find(call);
721        if (mi == mMarkerMap.end()) {
722            auto ci = mCalleeMap.find(call->getCallee());
723            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
724                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
725            }
726            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
727        }
728        // return mi->second;
729    }
730    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
731        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
732        mMarkerMap[pablo_and] = expr;
733        // return expr;
734    }
735    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
736        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
737        mMarkerMap[pablo_or] = expr;
738        // return expr;
739    }
740    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
741        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
742        mMarkerMap[pablo_xor] = expr;
743        // return expr;
744    }
745    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
746        Value* ifMask = compileExpression(sel->getCondition());
747        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
748        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
749        Value * expr = b.CreateOr(ifTrue, ifFalse);
750        mMarkerMap[sel] = expr;
751        // return expr;
752    }
753    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
754        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
755        mMarkerMap[pablo_not] = expr;
756        // return expr;
757    }
758    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
759        Value* strm_value = compileExpression(adv->getExpr());
760        int shift = adv->getAdvanceAmount();
761        Value * expr = genAdvanceWithCarry(strm_value, shift);
762        mMarkerMap[adv] = expr;
763        // return expr;
764    }
765    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
766    {
767        Value * marker = compileExpression(mstar->getMarker());
768        Value * cc = compileExpression(mstar->getCharClass());
769        Value * marker_and_cc = b.CreateAnd(marker, cc);
770        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
771        mMarkerMap[mstar] = expr;
772        // return expr;
773    }
774    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
775    {
776        Value * marker_expr = compileExpression(sthru->getScanFrom());
777        Value * cc_expr = compileExpression(sthru->getScanThru());
778        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
779        mMarkerMap[sthru] = expr;
780        // return expr;
781    }
782    else {
783        PabloPrinter::print(stmt, std::cerr);
784        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
785    }
786}
787
788Value * PabloCompiler::compileExpression(const PabloAST * expr) {
789    if (isa<Ones>(expr)) {
790        return mOneInitializer;
791    }
792    else if (isa<Zeroes>(expr)) {
793        return mZeroInitializer;
794    }
795    else if (const Next * next = dyn_cast<Next>(expr)) {
796        expr = next->getInitial();
797    }
798    auto f = mMarkerMap.find(expr);
799    if (f == mMarkerMap.end()) {
800        std::stringstream str;
801        str << "\"";
802        PabloPrinter::print(expr, str);
803        str << "\" was used before definition!";
804        throw std::runtime_error(str.str());
805    }
806    return f->second;
807}
808
809
810#ifdef USE_UADD_OVERFLOW
811#ifdef USE_TWO_UADD_OVERFLOW
812PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
813    std::vector<Value*> struct_res_params;
814    struct_res_params.push_back(int128_e1);
815    struct_res_params.push_back(int128_e2);
816    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
817    struct_res->setCallingConv(CallingConv::C);
818    struct_res->setTailCall(false);
819    AttributeSet struct_res_PAL;
820    struct_res->setAttributes(struct_res_PAL);
821
822    SumWithOverflowPack ret;
823
824    std::vector<unsigned> int128_sum_indices;
825    int128_sum_indices.push_back(0);
826    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
827
828    std::vector<unsigned> int1_obit_indices;
829    int1_obit_indices.push_back(1);
830    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
831
832    return ret;
833}
834#else
835PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
836    std::vector<Value*> struct_res_params;
837    struct_res_params.push_back(int128_e1);
838    struct_res_params.push_back(int128_e2);
839    struct_res_params.push_back(int1_cin);
840    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
841    struct_res->setCallingConv(CallingConv::C);
842    struct_res->setTailCall(false);
843    AttributeSet struct_res_PAL;
844    struct_res->setAttributes(struct_res_PAL);
845
846    SumWithOverflowPack ret;
847
848    std::vector<unsigned> int128_sum_indices;
849    int128_sum_indices.push_back(0);
850    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
851
852    std::vector<unsigned> int1_obit_indices;
853    int1_obit_indices.push_back(1);
854    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
855
856    return ret;
857}
858#endif
859#endif
860
861
862Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
863    IRBuilder<> b(mBasicBlock);
864
865    //CarryQ - carry in.
866    const int carryIdx = mCarryQueueIdx++;
867    Value* carryq_value = genCarryInLoad(carryIdx);
868#ifdef USE_TWO_UADD_OVERFLOW
869    //This is the ideal implementation, which uses two uadd.with.overflow
870    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
871    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
872    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
873    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
874
875    SumWithOverflowPack sumpack0, sumpack1;
876
877    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
878    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
879
880    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
881    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
882
883    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
884    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
885    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
886    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
887    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
888
889#elif defined USE_UADD_OVERFLOW
890    //use llvm.uadd.with.overflow.i128 or i256
891    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
892    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
893
894    //get i1 carryin from iBLOCK_SIZE
895    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
896    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
897    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
898
899    SumWithOverflowPack sumpack0;
900    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
901    Value* obit = sumpack0.obit;
902    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
903
904    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
905    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
906    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
907    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
908#elif (BLOCK_SIZE == 128)
909    //calculate carry through logical ops
910    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
911    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
912    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
913    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
914    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
915    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
916
917    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
918    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
919#else
920    //BLOCK_SIZE == 256, there is no other implementation
921    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
922#endif //USE_TWO_UADD_OVERFLOW
923
924    genCarryOutStore(carry_out, carryIdx);
925    return sum;
926}
927
928Value* PabloCompiler::genCarryInLoad(const unsigned index) {
929    assert (index < mCarryQueueVector.size());
930    if (mNestingDepth == 0) {
931        IRBuilder<> b(mBasicBlock);
932        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
933    }
934    return mCarryQueueVector[index];
935}
936
937void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
938    assert (carryOut);
939    assert (index < mCarryQueueVector.size());
940    if (mNestingDepth == 0) {
941        IRBuilder<> b(mBasicBlock);
942        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
943    }
944    mCarryQueueSummaryIdx[index] = -1;
945    mCarryQueueVector[index] = carryOut;
946}
947
948Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
949    assert (index < mAdvanceQueueVector.size());
950    if (mNestingDepth == 0) {
951        IRBuilder<> b(mBasicBlock);
952        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(mCarryQueueSize + index)), BLOCK_SIZE/8, false);
953    }
954    return mAdvanceQueueVector[index];
955}
956
957void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
958    assert (advanceOut);
959    assert (index < mAdvanceQueueVector.size());
960    if (mNestingDepth == 0) {
961        IRBuilder<> b(mBasicBlock);
962        b.CreateAlignedStore(advanceOut, b.CreateGEP(mCarryDataPtr, b.getInt64(mCarryQueueSize + index)), BLOCK_SIZE/8, false);
963    }
964    mAdvanceQueueSummaryIdx[index] = -1;
965    mAdvanceQueueVector[index] = advanceOut;
966}
967
968inline Value* PabloCompiler::genBitBlockAny(Value* test) {
969    IRBuilder<> b(mBasicBlock);
970    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
971    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
972}
973
974Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
975    IRBuilder<> b(mBasicBlock);
976    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
977    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
978}
979
980Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
981    IRBuilder<> b(mBasicBlock);
982    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
983    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
984}
985
986inline Value* PabloCompiler::genNot(Value* expr) {
987    IRBuilder<> b(mBasicBlock);
988    return b.CreateXor(expr, mOneInitializer, "not");
989}
990Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
991    IRBuilder<> b(mBasicBlock);
992    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
993    int block_shift = shift_amount % BLOCK_SIZE;
994    const auto storeIdx = mAdvanceQueueIdx;
995    const auto loadIdx = mAdvanceQueueIdx + advEntries - 1;
996    mAdvanceQueueIdx += advEntries;
997    Value* result_value;
998   
999    if (advEntries == 1) {
1000        if (block_shift == 0) { 
1001            result_value = genAdvanceInLoad(loadIdx);
1002            //b.CreateCall(mFunc_print_register, result_value);
1003        }
1004#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
1005        if (block_shift == 1) {
1006            Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(loadIdx));
1007            Value* srli_1_value = b.CreateLShr(strm_value, 63);
1008            Value* packed_shuffle;
1009            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1010            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1011            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1012
1013            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1014            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1015
1016            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1017            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1018        }
1019        else { //if (block_shift < BLOCK_SIZE) {
1020            // This is the preferred logic, but is too slow for the general case.
1021            // We need to speed up our custom LLVM for this code.
1022            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1023            Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1024            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1025            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1026        }
1027#else
1028        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1029        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1030        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1031        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1032
1033#endif
1034    }
1035    else {
1036        if (block_shift == 0) {
1037            result_value = genAdvanceInLoad(loadIdx);
1038        }
1039        else { 
1040            // The advance is based on the two oldest bit blocks in the advance queue.
1041            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1042            Value* strm_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx-1), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1043            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1044            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1045            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx));
1046            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx-1));
1047            //b.CreateCall(mFunc_print_register, result_value);
1048        }
1049        // copy entries from previous blocks forward
1050        for (int i = loadIdx; i > storeIdx; i--) {
1051            genAdvanceOutStore(genAdvanceInLoad(i-1), i);
1052        }
1053    }
1054    genAdvanceOutStore(strm_value, storeIdx);
1055    return result_value;
1056}
1057
1058void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1059    IRBuilder<> b(mBasicBlock);
1060    if (marker->getType()->isPointerTy()) {
1061        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1062    }
1063    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1064    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1065    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1066}
1067
1068CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1069: CarryDataSize(carryDataSize)
1070, FunctionPointer(executionEngine->getPointerToFunction(function))
1071, mFunction(function)
1072, mExecutionEngine(executionEngine)
1073{
1074
1075}
1076
1077// Clean up the memory for the compiled function once we're finished using it.
1078CompiledPabloFunction::~CompiledPabloFunction() {
1079    if (mExecutionEngine) {
1080        assert (mFunction);
1081        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1082        delete mExecutionEngine;
1083    }
1084}
1085
1086}
Note: See TracBrowser for help on using the repository browser.