source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4534

Last change on this file since 4534 was 4534, checked in by cameron, 4 years ago

Remove last LLVM_3_4 reference

File size: 47.5 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52extern "C" {
53  void wrapped_print_register(BitBlock bit_block) {
54      print_register<BitBlock>("", bit_block);
55  }
56}
57
58namespace pablo {
59
60PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
61: mBasisBits(basisBits)
62, mMod(new Module("icgrep", getGlobalContext()))
63, mBasicBlock(nullptr)
64, mExecutionEngine(nullptr)
65, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
66, mBasisBitsInputPtr(nullptr)
67, mCarryQueueIdx(0)
68, mCarryQueuePtr(nullptr)
69, mNestingDepth(0)
70, mCarryQueueSize(0)
71, mAdvanceQueueIdx(0)
72, mAdvanceQueuePtr(nullptr)
73, mAdvanceQueueSize(0)
74, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
75, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
76, mFunctionType(nullptr)
77, mFunction(nullptr)
78, mBasisBitsAddr(nullptr)
79, mOutputAddrPtr(nullptr)
80, mMaxNestingDepth(0)
81, mPrintRegisterFunction(nullptr)
82{
83    //Create the jit execution engine.up
84    InitializeNativeTarget();
85    InitializeNativeTargetAsmPrinter();
86    InitializeNativeTargetAsmParser();
87    DefineTypes();
88}
89
90PabloCompiler::~PabloCompiler()
91{
92
93}
94   
95void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
96    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
97}
98
99
100CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
101{
102    mNestingDepth = 0;
103    mMaxNestingDepth = 0;
104    mCarryQueueSize = 0;
105    mAdvanceQueueSize = 0;
106    Examine(pb.statements());
107    mCarryQueueVector.resize(mCarryQueueSize);
108    mAdvanceQueueVector.resize(mAdvanceQueueSize);
109    mCarryQueueSummaryIdx.resize(mCarryQueueSize);
110    mAdvanceQueueSummaryIdx.resize(mAdvanceQueueSize);
111    std::string errMessage;
112    EngineBuilder builder(mMod);
113    builder.setErrorStr(&errMessage);
114    builder.setMCPU(sys::getHostCPUName());
115    builder.setUseMCJIT(true);
116    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
117    mExecutionEngine = builder.create();
118    if (mExecutionEngine == nullptr) {
119        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
120    }
121    DeclareFunctions();
122
123    DeclareCallFunctions();
124
125    Function::arg_iterator args = mFunction->arg_begin();
126    mBasisBitsAddr = args++;
127    mBasisBitsAddr->setName("basis_bits");
128    mCarryQueuePtr = args++;
129    mCarryQueuePtr->setName("carry_q");
130    mAdvanceQueuePtr = args++;
131    mAdvanceQueuePtr->setName("advance_q");
132    mOutputAddrPtr = args++;
133    mOutputAddrPtr->setName("output");
134
135    //Create the carry and advance queues.
136    mCarryQueueIdx = 0;
137    mAdvanceQueueIdx = 0;
138    mNestingDepth = 0;
139    mMaxNestingDepth = 0;
140    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
141
142    //The basis bits structure
143    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
144        IRBuilder<> b(mBasicBlock);
145        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
146        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
147        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
148        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
149    }
150
151    //Generate the IR instructions for the function.
152    compileStatements(pb.statements());
153
154    if (LLVM_UNLIKELY(mCarryQueueIdx != mCarryQueueSize)) {
155        throw std::runtime_error("Actual carry queue size (" + std::to_string(mCarryQueueIdx) + ") does not match expected (" + std::to_string(mCarryQueueSize) + ")");
156    }
157    if (LLVM_UNLIKELY(mAdvanceQueueIdx != mAdvanceQueueSize)) {
158        throw std::runtime_error("Actual advance queue size (" + std::to_string(mAdvanceQueueIdx) + ") does not match expected (" + std::to_string(mAdvanceQueueSize) + ")");
159    }
160    if (LLVM_UNLIKELY(mNestingDepth != 0)) {
161        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mNestingDepth) + ")");
162    }
163
164    //Terminate the block
165    ReturnInst::Create(mMod->getContext(), mBasicBlock);
166
167    //Display the IR that has been generated by this module.
168    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
169        mMod->dump();
170    }
171    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
172    verifyModule(*mMod, &dbgs());
173
174    mExecutionEngine->finalizeObject();
175
176    //Return the required size of the carry queue and a pointer to the process_block function.
177    return CompiledPabloFunction(mCarryQueueSize, mAdvanceQueueSize, mFunction, mExecutionEngine);
178}
179
180void PabloCompiler::DefineTypes()
181{
182    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
183    if (structBasisBits == nullptr) {
184        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
185    }
186    std::vector<Type*>StructTy_struct_Basis_bits_fields;
187    for (int i = 0; i != mBasisBits.size(); i++)
188    {
189        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
190    }
191    if (structBasisBits->isOpaque()) {
192        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
193    }
194    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
195
196    std::vector<Type*>functionTypeArgs;
197    functionTypeArgs.push_back(mBasisBitsInputPtr);
198
199    //The carry q array.
200    //A pointer to the BitBlock vector.
201    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
202    // Advance q array
203    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
204
205    //The output structure.
206    StructType * outputStruct = mMod->getTypeByName("struct.Output");
207    if (!outputStruct) {
208        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
209    }
210    if (outputStruct->isOpaque()) {
211        std::vector<Type*>fields;
212        fields.push_back(mBitBlockType);
213        fields.push_back(mBitBlockType);
214        outputStruct->setBody(fields, /*isPacked=*/false);
215    }
216    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
217
218    //The &output parameter.
219    functionTypeArgs.push_back(outputStructPtr);
220
221    mFunctionType = FunctionType::get(
222     /*Result=*/Type::getVoidTy(mMod->getContext()),
223     /*Params=*/functionTypeArgs,
224     /*isVarArg=*/false);
225}
226
227void PabloCompiler::DeclareFunctions()
228{
229    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
230    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mBitBlockType, NULL);
231    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
232    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
233
234#ifdef USE_UADD_OVERFLOW
235#ifdef USE_TWO_UADD_OVERFLOW
236    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
237    std::vector<Type*>StructTy_0_fields;
238    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
239    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
240    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
241
242    std::vector<Type*>FuncTy_1_args;
243    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
244    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
245    FunctionType* FuncTy_1 = FunctionType::get(
246                                              /*Result=*/StructTy_0,
247                                              /*Params=*/FuncTy_1_args,
248                                              /*isVarArg=*/false);
249
250    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
251                                              std::to_string(BLOCK_SIZE));
252    if (!mFunctionUaddOverflow) {
253        mFunctionUaddOverflow= Function::Create(
254          /*Type=*/ FuncTy_1,
255          /*Linkage=*/ GlobalValue::ExternalLinkage,
256          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
257        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
258    }
259    AttributeSet mFunctionUaddOverflowPAL;
260    {
261        SmallVector<AttributeSet, 4> Attrs;
262        AttributeSet PAS;
263        {
264          AttrBuilder B;
265          B.addAttribute(Attribute::NoUnwind);
266          B.addAttribute(Attribute::ReadNone);
267          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
268        }
269
270        Attrs.push_back(PAS);
271        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
272    }
273    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
274#else
275    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
276    std::vector<Type*>StructTy_0_fields;
277    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
278    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
279    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
280
281    std::vector<Type*>FuncTy_1_args;
282    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
283    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
284    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
285    FunctionType* FuncTy_1 = FunctionType::get(
286                                              /*Result=*/StructTy_0,
287                                              /*Params=*/FuncTy_1_args,
288                                              /*isVarArg=*/false);
289
290    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
291                                              std::to_string(BLOCK_SIZE));
292    if (!mFunctionUaddOverflowCarryin) {
293        mFunctionUaddOverflowCarryin = Function::Create(
294          /*Type=*/ FuncTy_1,
295          /*Linkage=*/ GlobalValue::ExternalLinkage,
296          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
297        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
298    }
299    AttributeSet mFunctionUaddOverflowCarryinPAL;
300    {
301        SmallVector<AttributeSet, 4> Attrs;
302        AttributeSet PAS;
303        {
304          AttrBuilder B;
305          B.addAttribute(Attribute::NoUnwind);
306          B.addAttribute(Attribute::ReadNone);
307          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
308        }
309
310        Attrs.push_back(PAS);
311        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
312    }
313    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
314#endif
315#endif
316
317    //Starts on process_block
318    SmallVector<AttributeSet, 5> Attrs;
319    AttributeSet PAS;
320    {
321        AttrBuilder B;
322        B.addAttribute(Attribute::ReadOnly);
323        B.addAttribute(Attribute::NoCapture);
324        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
325    }
326    Attrs.push_back(PAS);
327    {
328        AttrBuilder B;
329        B.addAttribute(Attribute::NoCapture);
330        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
331    }
332    Attrs.push_back(PAS);
333    {
334        AttrBuilder B;
335        B.addAttribute(Attribute::NoCapture);
336        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
337    }
338    Attrs.push_back(PAS);
339    {
340        AttrBuilder B;
341        B.addAttribute(Attribute::NoCapture);
342        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
343    }
344    Attrs.push_back(PAS);
345    {
346        AttrBuilder B;
347        B.addAttribute(Attribute::NoUnwind);
348        B.addAttribute(Attribute::UWTable);
349        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
350    }
351    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
352
353    //Create the function that will be generated.
354    mFunction = mMod->getFunction("process_block");
355    if (!mFunction) {
356        mFunction = Function::Create(
357            /*Type=*/mFunctionType,
358            /*Linkage=*/GlobalValue::ExternalLinkage,
359            /*Name=*/"process_block", mMod);
360        mFunction->setCallingConv(CallingConv::C);
361    }
362    mFunction->setAttributes(AttrSet);
363}
364
365//
366// CarryNumbering: sequential numbers associated with each
367// carry-generating operation encountered in a traversal of the
368// Pablo AST.    Carry-generating operations are MatchStar, ScanThru,
369// and so on.
370// AdvanceNumbering: sequential numbers associated with each Advance
371// operation encountered in tree traversal, with the following modifications.
372//   (a) an additional AdvanceQueue entry is created for each if-statement
373//       having more than one carry or advance opreation within it.  This
374//       additional entry is a summary entry which must be nonzero to
375//       indicate that there are carry or advance bits associated with
376//       any operation within the if-structure (at any nesting level).
377//   (b) advancing by a large amount may require multiple advance entries.
378//       the number of advance entries for an operation Adv(x, n) is
379//       (n - 1) / BLOCK_SIZE + 1
380//
381// Note that the initial carry/advance numbering is determined by the
382// Examine function.  The values determined at this stage must be consistent
383// with the later numbering calculated during actual statement compilation.
384//
385// Examine precomputes some CarryNumbering and AdvanceNumbering, as
386// well as mMaxNestingDepth of while loops.
387//
388void PabloCompiler::Examine(StatementList & stmts) {
389    for (Statement * stmt : stmts) {
390
391        if (Advance * adv = dyn_cast<Advance>(stmt)) {
392            mAdvanceQueueSize += (((adv->getAdvanceAmount() - 1) / BLOCK_SIZE) + 1);
393        }
394        else if (isa<MatchStar>(stmt) || isa<ScanThru>(stmt)) {
395            ++mCarryQueueSize;
396        }
397        if (Call * call = dyn_cast<Call>(stmt)) {
398            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
399        }
400        else if (If * ifStatement = dyn_cast<If>(stmt)) {
401            const auto preIfCarryCount = mCarryQueueSize;
402            const auto preIfAdvanceCount = mAdvanceQueueSize;
403            Examine(ifStatement->getBody());
404            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
405            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
406            if ((ifCarryCount + ifAdvanceCount) > 1) {
407              ++mAdvanceQueueSize;
408              ++ifAdvanceCount;
409            }
410            ifStatement->setInclusiveCarryCount(ifCarryCount);
411            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
412        }
413        else if (While * whileStatement = dyn_cast<While>(stmt)) {
414            const auto preWhileCarryCount = mCarryQueueSize;
415            const auto preWhileAdvanceCount = mAdvanceQueueSize;
416            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
417            Examine(whileStatement->getBody());
418            --mNestingDepth;
419            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
420            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
421        }
422    }
423}
424
425void PabloCompiler::DeclareCallFunctions() {
426    for (auto mapping : mCalleeMap) {
427        const String * callee = mapping.first;
428        //std::cerr << callee->str() << " to be declared\n";
429        auto ei = mExternalMap.find(callee->value());
430        if (ei != mExternalMap.end()) {
431            void * fn_ptr = ei->second;
432            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
433            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
434            if (LLVM_UNLIKELY(externalValue == nullptr)) {
435                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
436            }
437            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
438            mCalleeMap[callee] = externalValue;
439        }
440        else {
441            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
442        }
443    }
444}
445
446void PabloCompiler::compileStatements(const StatementList & stmts) {
447    for (const Statement * statement : stmts) {
448        compileStatement(statement);
449    }
450}
451
452void PabloCompiler::compileStatement(const Statement * stmt)
453{
454    IRBuilder<> b(mBasicBlock);
455    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
456        Value * expr = compileExpression(assign->getExpr());
457        mMarkerMap[assign] = expr;
458        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
459            SetOutputValue(expr, assign->getOutputIndex());
460        }
461    }
462    else if (const Next * next = dyn_cast<const Next>(stmt)) {
463        Value * expr = compileExpression(next->getExpr());
464        mMarkerMap[next->getInitial()] = expr;
465    }
466    else if (const If * ifStatement = dyn_cast<const If>(stmt))
467    {
468        //
469        //  The If-ElseZero stmt:
470        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
471        //  If the value of the predicate is nonzero, then determine the values of variables
472        //  <var>* by executing the given statements.  Otherwise, the value of the
473        //  variables are all zero.  Requirements: (a) no variable that is defined within
474        //  the body of the if may be accessed outside unless it is explicitly
475        //  listed in the variable list, (b) every variable in the defined list receives
476        //  a value within the body, and (c) the logical consequence of executing
477        //  the statements in the event that the predicate is zero is that the
478        //  values of all defined variables indeed work out to be 0.
479        //
480        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
481        //  is inserted for each variable in the defined variable list.  It receives
482        //  a zero value from the ifentry block and the defined value from the if
483        //  body.
484        //
485
486        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
487        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
488        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
489       
490        const auto baseCarryQueueIdx = mCarryQueueIdx;
491        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
492       
493        int ifCarryCount = ifStatement->getInclusiveCarryCount();
494        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
495        //  Carry/Advance queue strategy.   
496        //  If there are any carries or advances at any nesting level within the
497        //  if statement, then the statement must be executed.   A "summary"
498        //  carryover variable is determined for this purpose, consisting of the
499        //  or of all of the carry and advance variables within the if.
500        //  This variable is determined as follows.
501        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
502        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
503        //       carryover variable is just the single carry queue entry.
504        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
505        //       carryover variable is just the advance carry queue entry.
506        //  (d)  Otherwise, an additional advance queue entry is created for the
507        //       summary variable.
508        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
509        //  variable is just last advance queue entry.
510        //
511       
512        IRBuilder<> b_entry(ifEntryBlock);
513        mBasicBlock = ifEntryBlock;
514        Value* if_test_value = compileExpression(ifStatement->getCondition());
515       
516        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
517            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
518            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
519        }
520        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
521            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
522            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
523        }
524        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
525
526        // Entry processing is complete, now handle the body of the if.
527        mBasicBlock = ifBodyBlock;
528        compileStatements(ifStatement->getBody());
529
530        // If we compiled an If or a While statement, we won't be in the same basic block as before.
531        // Create the branch from the current basic block to the end block.
532        IRBuilder<> bIfBody(mBasicBlock);
533        // After the recursive compile, now insert the code to compute the summary
534        // carry over variable.
535       
536        if ((ifCarryCount + ifAdvanceCount) > 1) {
537            // A summary variable is needed.
538
539            Value * carry_summary = mZeroInitializer;
540            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++) {
541                int s = mCarryQueueSummaryIdx[c];
542                if (s == -1) {
543                    Value* carryq_value = mCarryQueueVector[c];
544                    if (carry_summary == mZeroInitializer) {
545                        carry_summary = carryq_value;
546                    }
547                    else {
548                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
549                    }
550                    mCarryQueueSummaryIdx[c] = mAdvanceQueueIdx;
551                }
552            }
553            // Note that the limit in the following uses -1, because
554            // last entry of the advance queue is for the summary variable.
555            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++) {
556                int s = mAdvanceQueueSummaryIdx[c];
557                if (s == -1 ) {
558                    Value* advance_q_value = mAdvanceQueueVector[c];
559                    if (carry_summary == mZeroInitializer) {
560                        carry_summary = advance_q_value;
561                    }
562                    else {
563                        carry_summary = bIfBody.CreateOr(carry_summary, advance_q_value);
564                    }
565                    mAdvanceQueueSummaryIdx[c] = mAdvanceQueueIdx;
566                }
567            }
568            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++);
569        }
570        bIfBody.CreateBr(ifEndBlock);
571        //End Block
572        IRBuilder<> bEnd(ifEndBlock);
573        for (const PabloAST * node : ifStatement->getDefined()) {
574            const Assign * assign = cast<Assign>(node);
575            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
576            auto f = mMarkerMap.find(assign);
577            assert (f != mMarkerMap.end());
578            phi->addIncoming(mZeroInitializer, ifEntryBlock);
579            phi->addIncoming(f->second, mBasicBlock);
580            mMarkerMap[assign] = phi;
581        }
582        // Create the phi Node for the summary variable.
583        if (ifAdvanceCount >= 1) {
584            // final AdvanceQ entry is summary variable.
585            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
586            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
587            summary_phi->addIncoming(mAdvanceQueueVector[mAdvanceQueueIdx-1], mBasicBlock);
588            mAdvanceQueueVector[mAdvanceQueueIdx-1] = summary_phi;
589        }
590        else if (ifCarryCount == 1) {
591            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
592            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
593            summary_phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx], mBasicBlock);
594            mCarryQueueVector[baseCarryQueueIdx] = summary_phi;
595        }
596       
597        // Set the basic block to the new end block
598        mBasicBlock = ifEndBlock;
599    }
600    else if (const While * whileStatement = dyn_cast<const While>(stmt))
601    {
602        const auto baseCarryQueueIdx = mCarryQueueIdx;
603        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
604        if (mNestingDepth == 0) {
605            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
606                genCarryInLoad(baseCarryQueueIdx + i);
607            }
608            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
609                genAdvanceInLoad(baseAdvanceQueueIdx + i);
610            }
611        }
612
613        SmallVector<const Next*, 4> nextNodes;
614        for (const PabloAST * node : whileStatement->getBody()) {
615            if (isa<Next>(node)) {
616                nextNodes.push_back(cast<Next>(node));
617            }
618        }
619
620        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
621        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
622        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
623        // will refer to the previous value.
624
625        ++mNestingDepth;
626
627        compileStatements(whileStatement->getBody());
628
629        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
630        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
631        // but works for now.
632        mCarryQueueIdx = baseCarryQueueIdx;
633        mAdvanceQueueIdx = baseAdvanceQueueIdx;
634
635        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
636        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
637        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
638
639        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
640        // may not be same one that we entered the function with.
641        IRBuilder<> bEntry(mBasicBlock);
642        bEntry.CreateBr(whileCondBlock);
643
644        // CONDITION BLOCK
645        IRBuilder<> bCond(whileCondBlock);
646        // generate phi nodes for any carry propogating instruction
647        int whileCarryCount = whileStatement->getInclusiveCarryCount();
648        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
649        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
650        unsigned index = 0;
651        for (index = 0; index != whileCarryCount; ++index) {
652            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
653            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
654            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
655            phiNodes[index] = phi;
656        }
657        for (int i = 0; i != whileAdvanceCount; ++i) {
658            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
659            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
660            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
661            phiNodes[index++] = phi;
662        }
663        // and for any Next nodes in the loop body
664        for (const Next * n : nextNodes) {
665            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
666            auto f = mMarkerMap.find(n->getInitial());
667            assert (f != mMarkerMap.end());
668            phi->addIncoming(f->second, mBasicBlock);
669            mMarkerMap[n->getInitial()] = phi;
670            phiNodes[index++] = phi;
671        }
672
673        mBasicBlock = whileCondBlock;
674        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
675
676        // BODY BLOCK
677        mBasicBlock = whileBodyBlock;
678        compileStatements(whileStatement->getBody());
679        // update phi nodes for any carry propogating instruction
680        IRBuilder<> bWhileBody(mBasicBlock);
681        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
682            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
683            PHINode * phi = phiNodes[index];
684            phi->addIncoming(carryOut, mBasicBlock);
685            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
686        }
687        for (int i = 0; i != whileAdvanceCount; ++i) {
688            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
689            PHINode * phi = phiNodes[index++];
690            phi->addIncoming(advOut, mBasicBlock);
691            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
692        }
693        // and for any Next nodes in the loop body
694        for (const Next * n : nextNodes) {
695            auto f = mMarkerMap.find(n->getInitial());
696            assert (f != mMarkerMap.end());
697            PHINode * phi = phiNodes[index++];
698            phi->addIncoming(f->second, mBasicBlock);
699            mMarkerMap[n->getInitial()] = phi;
700        }
701
702        bWhileBody.CreateBr(whileCondBlock);
703
704        // EXIT BLOCK
705        mBasicBlock = whileEndBlock;
706        if (--mNestingDepth == 0) {
707            for (index = 0; index != whileCarryCount; ++index) {
708                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
709            }
710            for (index = 0; index != whileAdvanceCount; ++index) {
711                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
712            }
713        }
714    }
715    else if (const Call* call = dyn_cast<Call>(stmt)) {
716        //Call the callee once and store the result in the marker map.
717        auto mi = mMarkerMap.find(call);
718        if (mi == mMarkerMap.end()) {
719            auto ci = mCalleeMap.find(call->getCallee());
720            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
721                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
722            }
723            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
724        }
725        // return mi->second;
726    }
727    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
728        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
729        mMarkerMap[pablo_and] = expr;
730        // return expr;
731    }
732    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
733        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
734        mMarkerMap[pablo_or] = expr;
735        // return expr;
736    }
737    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
738        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
739        mMarkerMap[pablo_xor] = expr;
740        // return expr;
741    }
742    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
743        Value* ifMask = compileExpression(sel->getCondition());
744        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
745        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
746        Value * expr = b.CreateOr(ifTrue, ifFalse);
747        mMarkerMap[sel] = expr;
748        // return expr;
749    }
750    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
751        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
752        mMarkerMap[pablo_not] = expr;
753        // return expr;
754    }
755    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
756        Value* strm_value = compileExpression(adv->getExpr());
757        int shift = adv->getAdvanceAmount();
758        Value * expr = genAdvanceWithCarry(strm_value, shift);
759        mMarkerMap[adv] = expr;
760        // return expr;
761    }
762    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
763    {
764        Value * marker = compileExpression(mstar->getMarker());
765        Value * cc = compileExpression(mstar->getCharClass());
766        Value * marker_and_cc = b.CreateAnd(marker, cc);
767        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
768        mMarkerMap[mstar] = expr;
769        // return expr;
770    }
771    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
772    {
773        Value * marker_expr = compileExpression(sthru->getScanFrom());
774        Value * cc_expr = compileExpression(sthru->getScanThru());
775        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
776        mMarkerMap[sthru] = expr;
777        // return expr;
778    }
779    else {
780        PabloPrinter::print(stmt, std::cerr);
781        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
782    }
783}
784
785Value * PabloCompiler::compileExpression(const PabloAST * expr) {
786    if (isa<Ones>(expr)) {
787        return mOneInitializer;
788    }
789    else if (isa<Zeroes>(expr)) {
790        return mZeroInitializer;
791    }
792    else if (const Next * next = dyn_cast<Next>(expr)) {
793        expr = next->getInitial();
794    }
795    auto f = mMarkerMap.find(expr);
796    if (f == mMarkerMap.end()) {
797        std::stringstream str;
798        str << "\"";
799        PabloPrinter::print(expr, str);
800        str << "\" was used before definition!";
801        throw std::runtime_error(str.str());
802    }
803    return f->second;
804}
805
806#ifdef USE_UADD_OVERFLOW
807#ifdef USE_TWO_UADD_OVERFLOW
808PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
809    std::vector<Value*> struct_res_params;
810    struct_res_params.push_back(int128_e1);
811    struct_res_params.push_back(int128_e2);
812    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
813    struct_res->setCallingConv(CallingConv::C);
814    struct_res->setTailCall(false);
815    AttributeSet struct_res_PAL;
816    struct_res->setAttributes(struct_res_PAL);
817
818    SumWithOverflowPack ret;
819
820    std::vector<unsigned> int128_sum_indices;
821    int128_sum_indices.push_back(0);
822    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
823
824    std::vector<unsigned> int1_obit_indices;
825    int1_obit_indices.push_back(1);
826    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
827
828    return ret;
829}
830#else
831PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
832    std::vector<Value*> struct_res_params;
833    struct_res_params.push_back(int128_e1);
834    struct_res_params.push_back(int128_e2);
835    struct_res_params.push_back(int1_cin);
836    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
837    struct_res->setCallingConv(CallingConv::C);
838    struct_res->setTailCall(false);
839    AttributeSet struct_res_PAL;
840    struct_res->setAttributes(struct_res_PAL);
841
842    SumWithOverflowPack ret;
843
844    std::vector<unsigned> int128_sum_indices;
845    int128_sum_indices.push_back(0);
846    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
847
848    std::vector<unsigned> int1_obit_indices;
849    int1_obit_indices.push_back(1);
850    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
851
852    return ret;
853}
854#endif
855#endif
856
857Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
858    IRBuilder<> b(mBasicBlock);
859
860    //CarryQ - carry in.
861    const int carryIdx = mCarryQueueIdx++;
862    Value* carryq_value = genCarryInLoad(carryIdx);
863#ifdef USE_TWO_UADD_OVERFLOW
864    //This is the ideal implementation, which uses two uadd.with.overflow
865    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
866    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
867    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
868    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
869
870    SumWithOverflowPack sumpack0, sumpack1;
871
872    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
873    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
874
875    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
876    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
877
878    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
879    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
880    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
881    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
882    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
883
884#elif defined USE_UADD_OVERFLOW
885    //use llvm.uadd.with.overflow.i128 or i256
886    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
887    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
888
889    //get i1 carryin from iBLOCK_SIZE
890    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
891    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
892    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
893
894    SumWithOverflowPack sumpack0;
895    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
896    Value* obit = sumpack0.obit;
897    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
898
899    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
900    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
901    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
902    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
903#elif (BLOCK_SIZE == 128)
904    //calculate carry through logical ops
905    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
906    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
907    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
908    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
909    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
910    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
911
912    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
913    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
914#else
915    //BLOCK_SIZE == 256, there is no other implementation
916    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
917#endif //USE_TWO_UADD_OVERFLOW
918
919    genCarryOutStore(carry_out, carryIdx);
920    return sum;
921}
922
923Value* PabloCompiler::genCarryInLoad(const unsigned index) {
924    assert (index < mCarryQueueVector.size());
925    if (mNestingDepth == 0) {
926        IRBuilder<> b(mBasicBlock);
927        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
928    }
929    return mCarryQueueVector[index];
930}
931
932void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
933    assert (carryOut);
934    assert (index < mCarryQueueVector.size());
935    if (mNestingDepth == 0) {
936        IRBuilder<> b(mBasicBlock);
937        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
938    }
939    mCarryQueueSummaryIdx[index] = -1;
940    mCarryQueueVector[index] = carryOut;
941}
942
943Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
944    assert (index < mAdvanceQueueVector.size());
945    if (mNestingDepth == 0) {
946        IRBuilder<> b(mBasicBlock);
947        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
948    }
949    return mAdvanceQueueVector[index];
950}
951
952void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
953    assert (advanceOut);
954    assert (index < mAdvanceQueueVector.size());
955    if (mNestingDepth == 0) {
956        IRBuilder<> b(mBasicBlock);
957        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
958    }
959    mAdvanceQueueSummaryIdx[index] = -1;
960    mAdvanceQueueVector[index] = advanceOut;
961}
962
963inline Value* PabloCompiler::genBitBlockAny(Value* test) {
964    IRBuilder<> b(mBasicBlock);
965    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
966    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
967}
968
969Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
970    IRBuilder<> b(mBasicBlock);
971    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
972    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
973}
974
975Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
976    IRBuilder<> b(mBasicBlock);
977    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
978    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
979}
980
981inline Value* PabloCompiler::genNot(Value* expr) {
982    IRBuilder<> b(mBasicBlock);
983    return b.CreateXor(expr, mOneInitializer, "not");
984}
985
986Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
987    IRBuilder<> b(mBasicBlock);
988    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
989    int block_shift = shift_amount % BLOCK_SIZE;
990    const auto storeIdx = mAdvanceQueueIdx;
991    const auto loadIdx = mAdvanceQueueIdx + advEntries - 1;
992    mAdvanceQueueIdx += advEntries;
993    Value* result_value;
994   
995#ifdef USE_LONG_INTEGER_SHIFT
996    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
997    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
998    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
999    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1000    genAdvanceOutStore(strm_value, storeIdx);
1001
1002    return result_value;
1003#elif (BLOCK_SIZE == 128)
1004    if (advEntries == 1) {
1005        if (block_shift == 0) { 
1006            result_value = genAdvanceInLoad(loadIdx);
1007            //b.CreateCall(mFunc_print_register, result_value);
1008        }
1009        if (block_shift == 1) {
1010            Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(loadIdx));
1011            Value* srli_1_value = b.CreateLShr(strm_value, 63);
1012            Value* packed_shuffle;
1013            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1014            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1015            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1016
1017            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1018            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1019
1020            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1021            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1022        }
1023        else { //if (block_shift < BLOCK_SIZE) {
1024            // This is the preferred logic, but is too slow for the general case.
1025            // We need to speed up our custom LLVM for this code.
1026            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1027            Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1028            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1029            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1030        }
1031    }
1032    else {
1033        if (block_shift == 0) {
1034            result_value = genAdvanceInLoad(loadIdx);
1035        }
1036        else { 
1037            // The advance is based on the two oldest bit blocks in the advance queue.
1038            Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1039            Value* strm_longint = b.CreateBitCast(genAdvanceInLoad(loadIdx-1), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1040            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1041            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1042            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx));
1043            //b.CreateCall(mFunc_print_register, genAdvanceInLoad(loadIdx-1));
1044            //b.CreateCall(mFunc_print_register, result_value);
1045        }
1046        // copy entries from previous blocks forward
1047        for (int i = loadIdx; i > storeIdx; i--) {
1048            genAdvanceOutStore(genAdvanceInLoad(i-1), i);
1049        }
1050    }
1051    genAdvanceOutStore(strm_value, storeIdx);
1052    return result_value;
1053#else
1054    //BLOCK_SIZE == 256
1055    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1056#endif //USE_LONG_INTEGER_SHIFT
1057}
1058
1059void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1060    IRBuilder<> b(mBasicBlock);
1061    if (marker->getType()->isPointerTy()) {
1062        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1063    }
1064    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1065    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1066    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1067}
1068
1069CompiledPabloFunction::CompiledPabloFunction(unsigned carryQSize, unsigned advanceQSize, Function * function, ExecutionEngine * executionEngine)
1070: CarryQueueSize(carryQSize)
1071, AdvanceQueueSize(advanceQSize)
1072, FunctionPointer(executionEngine->getPointerToFunction(function))
1073, mFunction(function)
1074, mExecutionEngine(executionEngine)
1075{
1076
1077}
1078
1079// Clean up the memory for the compiled function once we're finished using it.
1080CompiledPabloFunction::~CompiledPabloFunction() {
1081    if (mExecutionEngine) {
1082        assert (mFunction);
1083        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1084        delete mExecutionEngine;
1085    }
1086}
1087
1088}
Note: See TracBrowser for help on using the repository browser.