source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4538

Last change on this file since 4538 was 4538, checked in by cameron, 4 years ago

Restructure to use a single process_block_state data area

File size: 47.1 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52extern "C" {
53  void wrapped_print_register(BitBlock bit_block) {
54      print_register<BitBlock>("", bit_block);
55  }
56}
57
58namespace pablo {
59
60PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
61: mBasisBits(basisBits)
62, mMod(new Module("icgrep", getGlobalContext()))
63, mBasicBlock(nullptr)
64, mExecutionEngine(nullptr)
65, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
66, mBasisBitsInputPtr(nullptr)
67, mCarryQueueIdx(0)
68, mCarryDataPtr(nullptr)
69, mNestingDepth(0)
70, mCarryQueueSize(0)
71, mAdvanceQueueIdx(0)
72, mAdvanceQueueSize(0)
73, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
74, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
75, mFunctionType(nullptr)
76, mFunction(nullptr)
77, mBasisBitsAddr(nullptr)
78, mOutputAddrPtr(nullptr)
79, mMaxNestingDepth(0)
80, mPrintRegisterFunction(nullptr)
81{
82    //Create the jit execution engine.up
83    InitializeNativeTarget();
84    InitializeNativeTargetAsmPrinter();
85    InitializeNativeTargetAsmParser();
86    DefineTypes();
87}
88
89PabloCompiler::~PabloCompiler()
90{
91
92}
93   
94void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
95    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
96}
97
98
99CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
100{
101    mNestingDepth = 0;
102    mMaxNestingDepth = 0;
103    mCarryQueueSize = 0;
104    mAdvanceQueueSize = 0;
105    Examine(pb.statements());
106    mCarryQueueVector.resize(mCarryQueueSize);
107    mAdvanceQueueVector.resize(mAdvanceQueueSize);
108    mCarryQueueSummaryIdx.resize(mCarryQueueSize);
109    mAdvanceQueueSummaryIdx.resize(mAdvanceQueueSize);
110    std::string errMessage;
111    EngineBuilder builder(mMod);
112    builder.setErrorStr(&errMessage);
113    builder.setMCPU(sys::getHostCPUName());
114    builder.setUseMCJIT(true);
115    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
116    mExecutionEngine = builder.create();
117    if (mExecutionEngine == nullptr) {
118        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
119    }
120    DeclareFunctions();
121
122    DeclareCallFunctions();
123
124    Function::arg_iterator args = mFunction->arg_begin();
125    mBasisBitsAddr = args++;
126    mBasisBitsAddr->setName("basis_bits");
127    mCarryDataPtr = args++;
128    mCarryDataPtr->setName("carry_data");
129    mOutputAddrPtr = args++;
130    mOutputAddrPtr->setName("output");
131
132    //Create the carry and advance queues.
133    mCarryQueueIdx = 0;
134    mAdvanceQueueIdx = 0;
135    mNestingDepth = 0;
136    mMaxNestingDepth = 0;
137    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
138
139    //The basis bits structure
140    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
141        IRBuilder<> b(mBasicBlock);
142        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
143        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
144        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
145        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
146    }
147
148    //Generate the IR instructions for the function.
149    compileStatements(pb.statements());
150
151    if (LLVM_UNLIKELY(mCarryQueueIdx != mCarryQueueSize)) {
152        throw std::runtime_error("Actual carry queue size (" + std::to_string(mCarryQueueIdx) + ") does not match expected (" + std::to_string(mCarryQueueSize) + ")");
153    }
154    if (LLVM_UNLIKELY(mAdvanceQueueIdx != mAdvanceQueueSize)) {
155        throw std::runtime_error("Actual advance queue size (" + std::to_string(mAdvanceQueueIdx) + ") does not match expected (" + std::to_string(mAdvanceQueueSize) + ")");
156    }
157    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
158        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
159    }
160
161    //Terminate the block
162    ReturnInst::Create(mMod->getContext(), mBasicBlock);
163
164    //Display the IR that has been generated by this module.
165    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
166        mMod->dump();
167    }
168    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
169    verifyModule(*mMod, &dbgs());
170
171    mExecutionEngine->finalizeObject();
172
173    //Return the required size of the carry data area to the process_block function.
174    return CompiledPabloFunction((mCarryQueueSize + mAdvanceQueueSize) * sizeof(BitBlock), mFunction, mExecutionEngine);
175}
176
177void PabloCompiler::DefineTypes()
178{
179    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
180    if (structBasisBits == nullptr) {
181        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
182    }
183    std::vector<Type*>StructTy_struct_Basis_bits_fields;
184    for (int i = 0; i != mBasisBits.size(); i++)
185    {
186        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
187    }
188    if (structBasisBits->isOpaque()) {
189        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
190    }
191    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
192
193    std::vector<Type*>functionTypeArgs;
194    functionTypeArgs.push_back(mBasisBitsInputPtr);
195
196    //The carry data array.
197    //A pointer to the BitBlock vector.
198    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
199
200    //The output structure.
201    StructType * outputStruct = mMod->getTypeByName("struct.Output");
202    if (!outputStruct) {
203        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
204    }
205    if (outputStruct->isOpaque()) {
206        std::vector<Type*>fields;
207        fields.push_back(mBitBlockType);
208        fields.push_back(mBitBlockType);
209        outputStruct->setBody(fields, /*isPacked=*/false);
210    }
211    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
212
213    //The &output parameter.
214    functionTypeArgs.push_back(outputStructPtr);
215
216    mFunctionType = FunctionType::get(
217     /*Result=*/Type::getVoidTy(mMod->getContext()),
218     /*Params=*/functionTypeArgs,
219     /*isVarArg=*/false);
220}
221
222void PabloCompiler::DeclareFunctions()
223{
224    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
225    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mBitBlockType, NULL);
226    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
227    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
228
229#ifdef USE_UADD_OVERFLOW
230#ifdef USE_TWO_UADD_OVERFLOW
231    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
232    std::vector<Type*>StructTy_0_fields;
233    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
234    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
235    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
236
237    std::vector<Type*>FuncTy_1_args;
238    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
239    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
240    FunctionType* FuncTy_1 = FunctionType::get(
241                                              /*Result=*/StructTy_0,
242                                              /*Params=*/FuncTy_1_args,
243                                              /*isVarArg=*/false);
244
245    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
246                                              std::to_string(BLOCK_SIZE));
247    if (!mFunctionUaddOverflow) {
248        mFunctionUaddOverflow= Function::Create(
249          /*Type=*/ FuncTy_1,
250          /*Linkage=*/ GlobalValue::ExternalLinkage,
251          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
252        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
253    }
254    AttributeSet mFunctionUaddOverflowPAL;
255    {
256        SmallVector<AttributeSet, 4> Attrs;
257        AttributeSet PAS;
258        {
259          AttrBuilder B;
260          B.addAttribute(Attribute::NoUnwind);
261          B.addAttribute(Attribute::ReadNone);
262          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
263        }
264
265        Attrs.push_back(PAS);
266        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
267    }
268    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
269#else
270    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
271    std::vector<Type*>StructTy_0_fields;
272    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
273    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
274    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
275
276    std::vector<Type*>FuncTy_1_args;
277    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
278    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
279    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
280    FunctionType* FuncTy_1 = FunctionType::get(
281                                              /*Result=*/StructTy_0,
282                                              /*Params=*/FuncTy_1_args,
283                                              /*isVarArg=*/false);
284
285    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
286                                              std::to_string(BLOCK_SIZE));
287    if (!mFunctionUaddOverflowCarryin) {
288        mFunctionUaddOverflowCarryin = Function::Create(
289          /*Type=*/ FuncTy_1,
290          /*Linkage=*/ GlobalValue::ExternalLinkage,
291          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
292        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
293    }
294    AttributeSet mFunctionUaddOverflowCarryinPAL;
295    {
296        SmallVector<AttributeSet, 4> Attrs;
297        AttributeSet PAS;
298        {
299          AttrBuilder B;
300          B.addAttribute(Attribute::NoUnwind);
301          B.addAttribute(Attribute::ReadNone);
302          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
303        }
304
305        Attrs.push_back(PAS);
306        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
307    }
308    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
309#endif
310#endif
311
312    //Starts on process_block
313    SmallVector<AttributeSet, 4> Attrs;
314    AttributeSet PAS;
315    {
316        AttrBuilder B;
317        B.addAttribute(Attribute::ReadOnly);
318        B.addAttribute(Attribute::NoCapture);
319        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
320    }
321    Attrs.push_back(PAS);
322    {
323        AttrBuilder B;
324        B.addAttribute(Attribute::NoCapture);
325        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
326    }
327    Attrs.push_back(PAS);
328    {
329        AttrBuilder B;
330        B.addAttribute(Attribute::NoCapture);
331        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
332    }
333    Attrs.push_back(PAS);
334    {
335        AttrBuilder B;
336        B.addAttribute(Attribute::NoUnwind);
337        B.addAttribute(Attribute::UWTable);
338        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
339    }
340    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
341
342    //Create the function that will be generated.
343    mFunction = mMod->getFunction("process_block");
344    if (!mFunction) {
345        mFunction = Function::Create(
346            /*Type=*/mFunctionType,
347            /*Linkage=*/GlobalValue::ExternalLinkage,
348            /*Name=*/"process_block", mMod);
349        mFunction->setCallingConv(CallingConv::C);
350    }
351    mFunction->setAttributes(AttrSet);
352}
353
354//
355// CarryNumbering: sequential numbers associated with each
356// carry-generating operation encountered in a traversal of the
357// Pablo AST.    Carry-generating operations are MatchStar, ScanThru,
358// and so on.
359// AdvanceNumbering: sequential numbers associated with each Advance
360// operation encountered in tree traversal, with the following modifications.
361//   (a) an additional AdvanceQueue entry is created for each if-statement
362//       having more than one carry or advance opreation within it.  This
363//       additional entry is a summary entry which must be nonzero to
364//       indicate that there are carry or advance bits associated with
365//       any operation within the if-structure (at any nesting level).
366//   (b) advancing by a large amount may require multiple advance entries.
367//       the number of advance entries for an operation Adv(x, n) is
368//       (n - 1) / BLOCK_SIZE + 1
369//
370// Note that the initial carry/advance numbering is determined by the
371// Examine function.  The values determined at this stage must be consistent
372// with the later numbering calculated during actual statement compilation.
373//
374// Examine precomputes some CarryNumbering and AdvanceNumbering, as
375// well as mMaxNestingDepth of while loops.
376//
377void PabloCompiler::Examine(StatementList & stmts) {
378    for (Statement * stmt : stmts) {
379
380        if (Advance * adv = dyn_cast<Advance>(stmt)) {
381            mAdvanceQueueSize += (((adv->getAdvanceAmount() - 1) / BLOCK_SIZE) + 1);
382        }
383        else if (isa<MatchStar>(stmt) || isa<ScanThru>(stmt)) {
384            ++mCarryQueueSize;
385        }
386        if (Call * call = dyn_cast<Call>(stmt)) {
387            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
388        }
389        else if (If * ifStatement = dyn_cast<If>(stmt)) {
390            const auto preIfCarryCount = mCarryQueueSize;
391            const auto preIfAdvanceCount = mAdvanceQueueSize;
392            Examine(ifStatement->getBody());
393            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
394            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
395            if ((ifCarryCount + ifAdvanceCount) > 1) {
396              ++mAdvanceQueueSize;
397              ++ifAdvanceCount;
398            }
399            ifStatement->setInclusiveCarryCount(ifCarryCount);
400            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
401        }
402        else if (While * whileStatement = dyn_cast<While>(stmt)) {
403            const auto preWhileCarryCount = mCarryQueueSize;
404            const auto preWhileAdvanceCount = mAdvanceQueueSize;
405            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
406            Examine(whileStatement->getBody());
407            --mNestingDepth;
408            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
409            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
410        }
411    }
412}
413
414void PabloCompiler::DeclareCallFunctions() {
415    for (auto mapping : mCalleeMap) {
416        const String * callee = mapping.first;
417        //std::cerr << callee->str() << " to be declared\n";
418        auto ei = mExternalMap.find(callee->value());
419        if (ei != mExternalMap.end()) {
420            void * fn_ptr = ei->second;
421            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
422            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
423            if (LLVM_UNLIKELY(externalValue == nullptr)) {
424                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
425            }
426            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
427            mCalleeMap[callee] = externalValue;
428        }
429        else {
430            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
431        }
432    }
433}
434
435void PabloCompiler::compileStatements(const StatementList & stmts) {
436    for (const Statement * statement : stmts) {
437        compileStatement(statement);
438    }
439}
440
441void PabloCompiler::compileIf(const If * ifStatement) {
442        //
443        //  The If-ElseZero stmt:
444        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
445        //  If the value of the predicate is nonzero, then determine the values of variables
446        //  <var>* by executing the given statements.  Otherwise, the value of the
447        //  variables are all zero.  Requirements: (a) no variable that is defined within
448        //  the body of the if may be accessed outside unless it is explicitly
449        //  listed in the variable list, (b) every variable in the defined list receives
450        //  a value within the body, and (c) the logical consequence of executing
451        //  the statements in the event that the predicate is zero is that the
452        //  values of all defined variables indeed work out to be 0.
453        //
454        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
455        //  is inserted for each variable in the defined variable list.  It receives
456        //  a zero value from the ifentry block and the defined value from the if
457        //  body.
458        //
459        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
460        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
461        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
462       
463        const auto baseCarryQueueIdx = mCarryQueueIdx;
464        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
465       
466        int ifCarryCount = ifStatement->getInclusiveCarryCount();
467        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
468        //  Carry/Advance queue strategy.   
469        //  If there are any carries or advances at any nesting level within the
470        //  if statement, then the statement must be executed.   A "summary"
471        //  carryover variable is determined for this purpose, consisting of the
472        //  or of all of the carry and advance variables within the if.
473        //  This variable is determined as follows.
474        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
475        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
476        //       carryover variable is just the single carry queue entry.
477        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
478        //       carryover variable is just the advance carry queue entry.
479        //  (d)  Otherwise, an additional advance queue entry is created for the
480        //       summary variable.
481        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
482        //  variable is just last advance queue entry.
483        //
484       
485        IRBuilder<> b_entry(ifEntryBlock);
486        mBasicBlock = ifEntryBlock;
487        Value* if_test_value = compileExpression(ifStatement->getCondition());
488       
489        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
490            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
491            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
492        }
493        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
494            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
495            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
496        }
497        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
498
499        // Entry processing is complete, now handle the body of the if.
500        mBasicBlock = ifBodyBlock;
501        compileStatements(ifStatement->getBody());
502
503        // If we compiled an If or a While statement, we won't be in the same basic block as before.
504        // Create the branch from the current basic block to the end block.
505        IRBuilder<> bIfBody(mBasicBlock);
506        // After the recursive compile, now insert the code to compute the summary
507        // carry over variable.
508       
509        if ((ifCarryCount + ifAdvanceCount) > 1) {
510            // A summary variable is needed.
511
512            Value * carry_summary = mZeroInitializer;
513            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++) {
514                int s = mCarryQueueSummaryIdx[c];
515                if (s == -1) {
516                    Value* carryq_value = mCarryQueueVector[c];
517                    if (carry_summary == mZeroInitializer) {
518                        carry_summary = carryq_value;
519                    }
520                    else {
521                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
522                    }
523                    mCarryQueueSummaryIdx[c] = mAdvanceQueueIdx;
524                }
525            }
526            // Note that the limit in the following uses -1, because
527            // last entry of the advance queue is for the summary variable.
528            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++) {
529                int s = mAdvanceQueueSummaryIdx[c];
530                if (s == -1 ) {
531                    Value* advance_q_value = mAdvanceQueueVector[c];
532                    if (carry_summary == mZeroInitializer) {
533                        carry_summary = advance_q_value;
534                    }
535                    else {
536                        carry_summary = bIfBody.CreateOr(carry_summary, advance_q_value);
537                    }
538                    mAdvanceQueueSummaryIdx[c] = mAdvanceQueueIdx;
539                }
540            }
541            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++);
542        }
543        bIfBody.CreateBr(ifEndBlock);
544        //End Block
545        IRBuilder<> bEnd(ifEndBlock);
546        for (const PabloAST * node : ifStatement->getDefined()) {
547            const Assign * assign = cast<Assign>(node);
548            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
549            auto f = mMarkerMap.find(assign);
550            assert (f != mMarkerMap.end());
551            phi->addIncoming(mZeroInitializer, ifEntryBlock);
552            phi->addIncoming(f->second, mBasicBlock);
553            mMarkerMap[assign] = phi;
554        }
555        // Create the phi Node for the summary variable.
556        if (ifAdvanceCount >= 1) {
557            // final AdvanceQ entry is summary variable.
558            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
559            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
560            summary_phi->addIncoming(mAdvanceQueueVector[mAdvanceQueueIdx-1], mBasicBlock);
561            mAdvanceQueueVector[mAdvanceQueueIdx-1] = summary_phi;
562        }
563        else if (ifCarryCount == 1) {
564            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
565            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
566            summary_phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx], mBasicBlock);
567            mCarryQueueVector[baseCarryQueueIdx] = summary_phi;
568        }
569       
570        // Set the basic block to the new end block
571        mBasicBlock = ifEndBlock;
572}
573
574void PabloCompiler::compileWhile(const While * whileStatement) {
575        const auto baseCarryQueueIdx = mCarryQueueIdx;
576        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
577        if (mNestingDepth == 0) {
578            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
579                genCarryInLoad(baseCarryQueueIdx + i);
580            }
581            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
582                genAdvanceInLoad(baseAdvanceQueueIdx + i);
583            }
584        }
585
586        SmallVector<const Next*, 4> nextNodes;
587        for (const PabloAST * node : whileStatement->getBody()) {
588            if (isa<Next>(node)) {
589                nextNodes.push_back(cast<Next>(node));
590            }
591        }
592
593        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
594        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
595        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
596        // will refer to the previous value.
597
598        ++mNestingDepth;
599
600        compileStatements(whileStatement->getBody());
601
602        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
603        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
604        // but works for now.
605        mCarryQueueIdx = baseCarryQueueIdx;
606        mAdvanceQueueIdx = baseAdvanceQueueIdx;
607
608        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
609        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
610        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
611
612        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
613        // may not be same one that we entered the function with.
614        IRBuilder<> bEntry(mBasicBlock);
615        bEntry.CreateBr(whileCondBlock);
616
617        // CONDITION BLOCK
618        IRBuilder<> bCond(whileCondBlock);
619        // generate phi nodes for any carry propogating instruction
620        int whileCarryCount = whileStatement->getInclusiveCarryCount();
621        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
622        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
623        unsigned index = 0;
624        for (index = 0; index != whileCarryCount; ++index) {
625            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
626            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
627            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
628            phiNodes[index] = phi;
629        }
630        for (int i = 0; i != whileAdvanceCount; ++i) {
631            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
632            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
633            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
634            phiNodes[index++] = phi;
635        }
636        // and for any Next nodes in the loop body
637        for (const Next * n : nextNodes) {
638            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
639            auto f = mMarkerMap.find(n->getInitial());
640            assert (f != mMarkerMap.end());
641            phi->addIncoming(f->second, mBasicBlock);
642            mMarkerMap[n->getInitial()] = phi;
643            phiNodes[index++] = phi;
644        }
645
646        mBasicBlock = whileCondBlock;
647        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
648
649        // BODY BLOCK
650        mBasicBlock = whileBodyBlock;
651        compileStatements(whileStatement->getBody());
652        // update phi nodes for any carry propogating instruction
653        IRBuilder<> bWhileBody(mBasicBlock);
654        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
655            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
656            PHINode * phi = phiNodes[index];
657            phi->addIncoming(carryOut, mBasicBlock);
658            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
659        }
660        for (int i = 0; i != whileAdvanceCount; ++i) {
661            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
662            PHINode * phi = phiNodes[index++];
663            phi->addIncoming(advOut, mBasicBlock);
664            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
665        }
666        // and for any Next nodes in the loop body
667        for (const Next * n : nextNodes) {
668            auto f = mMarkerMap.find(n->getInitial());
669            assert (f != mMarkerMap.end());
670            PHINode * phi = phiNodes[index++];
671            phi->addIncoming(f->second, mBasicBlock);
672            mMarkerMap[n->getInitial()] = phi;
673        }
674
675        bWhileBody.CreateBr(whileCondBlock);
676
677        // EXIT BLOCK
678        mBasicBlock = whileEndBlock;
679        if (--mNestingDepth == 0) {
680            for (index = 0; index != whileCarryCount; ++index) {
681                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
682            }
683            for (index = 0; index != whileAdvanceCount; ++index) {
684                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
685            }
686        }
687 
688}
689
690void PabloCompiler::compileStatement(const Statement * stmt)
691{
692    IRBuilder<> b(mBasicBlock);
693    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
694        Value * expr = compileExpression(assign->getExpr());
695        mMarkerMap[assign] = expr;
696        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
697            SetOutputValue(expr, assign->getOutputIndex());
698        }
699    }
700    else if (const Next * next = dyn_cast<const Next>(stmt)) {
701        Value * expr = compileExpression(next->getExpr());
702        mMarkerMap[next->getInitial()] = expr;
703    }
704    else if (const If * ifStatement = dyn_cast<const If>(stmt))
705    {
706        compileIf(ifStatement);
707    }
708    else if (const While * whileStatement = dyn_cast<const While>(stmt))
709    {
710        compileWhile(whileStatement);
711    }
712    else if (const Call* call = dyn_cast<Call>(stmt)) {
713        //Call the callee once and store the result in the marker map.
714        auto mi = mMarkerMap.find(call);
715        if (mi == mMarkerMap.end()) {
716            auto ci = mCalleeMap.find(call->getCallee());
717            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
718                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
719            }
720            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
721        }
722        // return mi->second;
723    }
724    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
725        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
726        mMarkerMap[pablo_and] = expr;
727        // return expr;
728    }
729    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
730        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
731        mMarkerMap[pablo_or] = expr;
732        // return expr;
733    }
734    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
735        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
736        mMarkerMap[pablo_xor] = expr;
737        // return expr;
738    }
739    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
740        Value* ifMask = compileExpression(sel->getCondition());
741        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
742        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
743        Value * expr = b.CreateOr(ifTrue, ifFalse);
744        mMarkerMap[sel] = expr;
745        // return expr;
746    }
747    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
748        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
749        mMarkerMap[pablo_not] = expr;
750        // return expr;
751    }
752    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
753        Value* strm_value = compileExpression(adv->getExpr());
754        int shift = adv->getAdvanceAmount();
755        Value * expr = genAdvanceWithCarry(strm_value, shift);
756        mMarkerMap[adv] = expr;
757        // return expr;
758    }
759    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
760    {
761        Value * marker = compileExpression(mstar->getMarker());
762        Value * cc = compileExpression(mstar->getCharClass());
763        Value * marker_and_cc = b.CreateAnd(marker, cc);
764        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
765        mMarkerMap[mstar] = expr;
766        // return expr;
767    }
768    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
769    {
770        Value * marker_expr = compileExpression(sthru->getScanFrom());
771        Value * cc_expr = compileExpression(sthru->getScanThru());
772        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
773        mMarkerMap[sthru] = expr;
774        // return expr;
775    }
776    else {
777        PabloPrinter::print(stmt, std::cerr);
778        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
779    }
780}
781
782Value * PabloCompiler::compileExpression(const PabloAST * expr) {
783    if (isa<Ones>(expr)) {
784        return mOneInitializer;
785    }
786    else if (isa<Zeroes>(expr)) {
787        return mZeroInitializer;
788    }
789    else if (const Next * next = dyn_cast<Next>(expr)) {
790        expr = next->getInitial();
791    }
792    auto f = mMarkerMap.find(expr);
793    if (f == mMarkerMap.end()) {
794        std::stringstream str;
795        str << "\"";
796        PabloPrinter::print(expr, str);
797        str << "\" was used before definition!";
798        throw std::runtime_error(str.str());
799    }
800    return f->second;
801}
802
803
804#ifdef USE_UADD_OVERFLOW
805#ifdef USE_TWO_UADD_OVERFLOW
806PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
807    std::vector<Value*> struct_res_params;
808    struct_res_params.push_back(int128_e1);
809    struct_res_params.push_back(int128_e2);
810    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
811    struct_res->setCallingConv(CallingConv::C);
812    struct_res->setTailCall(false);
813    AttributeSet struct_res_PAL;
814    struct_res->setAttributes(struct_res_PAL);
815
816    SumWithOverflowPack ret;
817
818    std::vector<unsigned> int128_sum_indices;
819    int128_sum_indices.push_back(0);
820    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
821
822    std::vector<unsigned> int1_obit_indices;
823    int1_obit_indices.push_back(1);
824    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
825
826    return ret;
827}
828#else
829PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
830    std::vector<Value*> struct_res_params;
831    struct_res_params.push_back(int128_e1);
832    struct_res_params.push_back(int128_e2);
833    struct_res_params.push_back(int1_cin);
834    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
835    struct_res->setCallingConv(CallingConv::C);
836    struct_res->setTailCall(false);
837    AttributeSet struct_res_PAL;
838    struct_res->setAttributes(struct_res_PAL);
839
840    SumWithOverflowPack ret;
841
842    std::vector<unsigned> int128_sum_indices;
843    int128_sum_indices.push_back(0);
844    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
845
846    std::vector<unsigned> int1_obit_indices;
847    int1_obit_indices.push_back(1);
848    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
849
850    return ret;
851}
852#endif
853#endif
854
855
856Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
857    IRBuilder<> b(mBasicBlock);
858
859    //CarryQ - carry in.
860    const int carryIdx = mCarryQueueIdx++;
861    Value* carryq_value = genCarryInLoad(carryIdx);
862#ifdef USE_TWO_UADD_OVERFLOW
863    //This is the ideal implementation, which uses two uadd.with.overflow
864    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
865    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
866    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
867    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
868
869    SumWithOverflowPack sumpack0, sumpack1;
870
871    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
872    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
873
874    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
875    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
876
877    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
878    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
879    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
880    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
881    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
882
883#elif defined USE_UADD_OVERFLOW
884    //use llvm.uadd.with.overflow.i128 or i256
885    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
886    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
887
888    //get i1 carryin from iBLOCK_SIZE
889    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
890    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
891    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
892
893    SumWithOverflowPack sumpack0;
894    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
895    Value* obit = sumpack0.obit;
896    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
897
898    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
899    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
900    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
901    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
902#elif (BLOCK_SIZE == 128)
903    //calculate carry through logical ops
904    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
905    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
906    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
907    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
908    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
909    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
910
911    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
912    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
913#else
914    //BLOCK_SIZE == 256, there is no other implementation
915    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
916#endif //USE_TWO_UADD_OVERFLOW
917
918    genCarryOutStore(carry_out, carryIdx);
919    return sum;
920}
921
922Value* PabloCompiler::genCarryInLoad(const unsigned index) {
923    assert (index < mCarryQueueVector.size());
924    if (mNestingDepth == 0) {
925        IRBuilder<> b(mBasicBlock);
926        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
927    }
928    return mCarryQueueVector[index];
929}
930
931void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
932    assert (carryOut);
933    assert (index < mCarryQueueVector.size());
934    if (mNestingDepth == 0) {
935        IRBuilder<> b(mBasicBlock);
936        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
937    }
938    mCarryQueueSummaryIdx[index] = -1;
939    mCarryQueueVector[index] = carryOut;
940}
941
942Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
943    assert (index < mAdvanceQueueVector.size());
944    if (mNestingDepth == 0) {
945        IRBuilder<> b(mBasicBlock);
946        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(mCarryQueueSize + index)), BLOCK_SIZE/8, false);
947    }
948    return mAdvanceQueueVector[index];
949}
950
951void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
952    assert (advanceOut);
953    assert (index < mAdvanceQueueVector.size());
954    if (mNestingDepth == 0) {
955        IRBuilder<> b(mBasicBlock);
956        b.CreateAlignedStore(advanceOut, b.CreateGEP(mCarryDataPtr, b.getInt64(mCarryQueueSize + index)), BLOCK_SIZE/8, false);
957    }
958    mAdvanceQueueSummaryIdx[index] = -1;
959    mAdvanceQueueVector[index] = advanceOut;
960}
961
962inline Value* PabloCompiler::genBitBlockAny(Value* test) {
963    IRBuilder<> b(mBasicBlock);
964    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
965    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
966}
967
968Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
969    IRBuilder<> b(mBasicBlock);
970    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
971    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
972}
973
974Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
975    IRBuilder<> b(mBasicBlock);
976    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
977    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
978}
979
980inline Value* PabloCompiler::genNot(Value* expr) {
981    IRBuilder<> b(mBasicBlock);
982    return b.CreateXor(expr, mOneInitializer, "not");
983}
984Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
985    IRBuilder<> b(mBasicBlock);
986    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
987    int block_shift = shift_amount % BLOCK_SIZE;
988    const auto storeIdx = mAdvanceQueueIdx;
989    const auto loadIdx = mAdvanceQueueIdx + advEntries - 1;
990    mAdvanceQueueIdx += advEntries;
991    Value* result_value;
992   
993    if (advEntries == 1) {
994        if (block_shift == 0) { 
995            result_value = genAdvanceInLoad(loadIdx);
996            //b.CreateCall(mFunc_print_register, result_value);
997        }
998#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
999        if (block_shift == 1) {
1000            Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(loadIdx));
1001            Value* srli_1_value = b.CreateLShr(strm_value, 63);
1002            Value* packed_shuffle;
1003            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1004            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1005            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1006
1007            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1008            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1009
1010            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1011            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1012        }
1013        else { //if (block_shift < BLOCK_SIZE) {
1014            // This is the preferred logic, but is too slow for the general case.
1015            // We need to speed up our custom LLVM for this code.
1016            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1017            Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1018            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1019            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1020        }
1021#else
1022        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1023        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1024        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1025        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1026
1027#endif
1028    }
1029    else {
1030        if (block_shift == 0) {
1031            result_value = genAdvanceInLoad(loadIdx);
1032        }
1033        else { 
1034            // The advance is based on the two oldest bit blocks in the advance queue.
1035            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1036            Value* strm_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx-1), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1037            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1038            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1039            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx));
1040            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx-1));
1041            //b.CreateCall(mFunc_print_register, result_value);
1042        }
1043        // copy entries from previous blocks forward
1044        for (int i = loadIdx; i > storeIdx; i--) {
1045            genAdvanceOutStore(genAdvanceInLoad(i-1), i);
1046        }
1047    }
1048    genAdvanceOutStore(strm_value, storeIdx);
1049    return result_value;
1050}
1051
1052void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1053    IRBuilder<> b(mBasicBlock);
1054    if (marker->getType()->isPointerTy()) {
1055        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1056    }
1057    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1058    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1059    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1060}
1061
1062CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1063: CarryDataSize(carryDataSize)
1064, FunctionPointer(executionEngine->getPointerToFunction(function))
1065, mFunction(function)
1066, mExecutionEngine(executionEngine)
1067{
1068
1069}
1070
1071// Clean up the memory for the compiled function once we're finished using it.
1072CompiledPabloFunction::~CompiledPabloFunction() {
1073    if (mExecutionEngine) {
1074        assert (mFunction);
1075        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1076        delete mExecutionEngine;
1077    }
1078}
1079
1080}
Note: See TracBrowser for help on using the repository browser.