source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4552

Last change on this file since 4552 was 4552, checked in by nmedfort, 4 years ago

Possible fix for 256-bit mode

File size: 47.5 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
53static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
54static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
55
56extern "C" {
57  void wrapped_print_register(char * regName, BitBlock bit_block) {
58      print_register<BitBlock>(regName, bit_block);
59  }
60}
61
62namespace pablo {
63
64PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
65: mBasisBits(basisBits)
66, mMod(new Module("icgrep", getGlobalContext()))
67, mBasicBlock(nullptr)
68, mExecutionEngine(nullptr)
69, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
70, mBasisBitsInputPtr(nullptr)
71, mCarryDataPtr(nullptr)
72, mBlockNo(nullptr)
73, mWhileDepth(0)
74, mIfDepth(0)
75, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
76, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
77, mFunctionType(nullptr)
78, mFunction(nullptr)
79, mBasisBitsAddr(nullptr)
80, mOutputAddrPtr(nullptr)
81, mMaxWhileDepth(0)
82, mPrintRegisterFunction(nullptr)
83{
84    //Create the jit execution engine.up
85    InitializeNativeTarget();
86    InitializeNativeTargetAsmPrinter();
87    InitializeNativeTargetAsmParser();
88    DefineTypes();
89}
90
91PabloCompiler::~PabloCompiler()
92{
93
94}
95   
96void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
97    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
98}
99
100void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
101    IRBuilder <> b(mBasicBlock);
102    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
103    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
104                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
105                                                   /*isConstant=*/ true,
106                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
107                                                   /*Initializer=*/ regNameData);
108    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
109    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
110}
111
112CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
113{
114    mWhileDepth = 0;
115    mIfDepth = 0;
116    mMaxWhileDepth = 0;
117    unsigned totalCarryDataSize = Examine(pb, 0); 
118    mCarryInVector.resize(totalCarryDataSize);
119    mCarryOutVector.resize(totalCarryDataSize);
120    mCarryDataSummaryIdx.resize(totalCarryDataSize);
121    std::string errMessage;
122    EngineBuilder builder(mMod);
123    builder.setErrorStr(&errMessage);
124    builder.setMCPU(sys::getHostCPUName());
125    builder.setUseMCJIT(true);
126    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
127    mExecutionEngine = builder.create();
128    if (mExecutionEngine == nullptr) {
129        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
130    }
131    DeclareFunctions();
132
133    DeclareCallFunctions();
134
135    Function::arg_iterator args = mFunction->arg_begin();
136    mBasisBitsAddr = args++;
137    mBasisBitsAddr->setName("basis_bits");
138    mCarryDataPtr = args++;
139    mCarryDataPtr->setName("carry_data");
140    mOutputAddrPtr = args++;
141    mOutputAddrPtr->setName("output");
142
143    mWhileDepth = 0;
144    mIfDepth = 0;
145    mMaxWhileDepth = 0;
146    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
147    IRBuilder<> b(mBasicBlock);
148
149    //The basis bits structure
150    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
151        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
152        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
153        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
154        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
155    }
156   
157    // The block number is a 64-bit integer at the end of the carry data area.
158    Value * blockNoPtr = b.CreateBitCast(b.CreateGEP(mCarryDataPtr, b.getInt64(totalCarryDataSize)), Type::getInt64PtrTy(b.getContext()));
159    mBlockNo = b.CreateLoad(blockNoPtr);
160    //Generate the IR instructions for the function.
161    compileBlock(pb);
162    {   IRBuilder<> b(mBasicBlock);  // may be in new basic block, set builder
163        b.CreateStore(b.CreateAdd(mBlockNo, b.getInt64(1)), blockNoPtr);
164    }
165
166    if (DumpTrace || TraceNext) {
167        genPrintRegister("blockNo", genCarryDataLoad(totalCarryDataSize));
168    }
169    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
170        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
171    }
172
173    //Terminate the block
174    ReturnInst::Create(mMod->getContext(), mBasicBlock);
175
176    //Display the IR that has been generated by this module.
177    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
178        mMod->dump();
179    }
180    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
181    verifyModule(*mMod, &dbgs());
182
183    mExecutionEngine->finalizeObject();
184
185    //Return the required size of the carry data area to the process_block function.
186    // Reserve 1 element in the carry data area for current block number (future). TODO
187    return CompiledPabloFunction((totalCarryDataSize + 1) * sizeof(BitBlock), mFunction, mExecutionEngine);
188}
189
190void PabloCompiler::DefineTypes()
191{
192    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
193    if (structBasisBits == nullptr) {
194        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
195    }
196    std::vector<Type*>StructTy_struct_Basis_bits_fields;
197    for (int i = 0; i != mBasisBits.size(); i++)
198    {
199        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
200    }
201    if (structBasisBits->isOpaque()) {
202        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
203    }
204    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
205
206    std::vector<Type*>functionTypeArgs;
207    functionTypeArgs.push_back(mBasisBitsInputPtr);
208
209    //The carry data array.
210    //A pointer to the BitBlock vector.
211    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
212
213    //The output structure.
214    StructType * outputStruct = mMod->getTypeByName("struct.Output");
215    if (!outputStruct) {
216        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
217    }
218    if (outputStruct->isOpaque()) {
219        std::vector<Type*>fields;
220        fields.push_back(mBitBlockType);
221        fields.push_back(mBitBlockType);
222        outputStruct->setBody(fields, /*isPacked=*/false);
223    }
224    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
225
226    //The &output parameter.
227    functionTypeArgs.push_back(outputStructPtr);
228
229    mFunctionType = FunctionType::get(
230     /*Result=*/Type::getVoidTy(mMod->getContext()),
231     /*Params=*/functionTypeArgs,
232     /*isVarArg=*/false);
233}
234
235void PabloCompiler::DeclareFunctions()
236{
237    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
238    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
239    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
240    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
241
242#ifdef USE_UADD_OVERFLOW
243#ifdef USE_TWO_UADD_OVERFLOW
244    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
245    std::vector<Type*>StructTy_0_fields;
246    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
247    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
248    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
249
250    std::vector<Type*>FuncTy_1_args;
251    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
252    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
253    FunctionType* FuncTy_1 = FunctionType::get(
254                                              /*Result=*/StructTy_0,
255                                              /*Params=*/FuncTy_1_args,
256                                              /*isVarArg=*/false);
257
258    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
259                                              std::to_string(BLOCK_SIZE));
260    if (!mFunctionUaddOverflow) {
261        mFunctionUaddOverflow= Function::Create(
262          /*Type=*/ FuncTy_1,
263          /*Linkage=*/ GlobalValue::ExternalLinkage,
264          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
265        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
266    }
267    AttributeSet mFunctionUaddOverflowPAL;
268    {
269        SmallVector<AttributeSet, 4> Attrs;
270        AttributeSet PAS;
271        {
272          AttrBuilder B;
273          B.addAttribute(Attribute::NoUnwind);
274          B.addAttribute(Attribute::ReadNone);
275          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
276        }
277
278        Attrs.push_back(PAS);
279        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
280    }
281    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
282#else
283    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
284    std::vector<Type*>StructTy_0_fields;
285    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
286    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
287    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
288
289    std::vector<Type*>FuncTy_1_args;
290    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
291    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
292    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
293    FunctionType* FuncTy_1 = FunctionType::get(
294                                              /*Result=*/StructTy_0,
295                                              /*Params=*/FuncTy_1_args,
296                                              /*isVarArg=*/false);
297
298    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
299                                              std::to_string(BLOCK_SIZE));
300    if (!mFunctionUaddOverflowCarryin) {
301        mFunctionUaddOverflowCarryin = Function::Create(
302          /*Type=*/ FuncTy_1,
303          /*Linkage=*/ GlobalValue::ExternalLinkage,
304          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
305        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
306    }
307    AttributeSet mFunctionUaddOverflowCarryinPAL;
308    {
309        SmallVector<AttributeSet, 4> Attrs;
310        AttributeSet PAS;
311        {
312          AttrBuilder B;
313          B.addAttribute(Attribute::NoUnwind);
314          B.addAttribute(Attribute::ReadNone);
315          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
316        }
317
318        Attrs.push_back(PAS);
319        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
320    }
321    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
322#endif
323#endif
324
325    //Starts on process_block
326    SmallVector<AttributeSet, 4> Attrs;
327    AttributeSet PAS;
328    {
329        AttrBuilder B;
330        B.addAttribute(Attribute::ReadOnly);
331        B.addAttribute(Attribute::NoCapture);
332        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
333    }
334    Attrs.push_back(PAS);
335    {
336        AttrBuilder B;
337        B.addAttribute(Attribute::NoCapture);
338        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
339    }
340    Attrs.push_back(PAS);
341    {
342        AttrBuilder B;
343        B.addAttribute(Attribute::NoCapture);
344        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
345    }
346    Attrs.push_back(PAS);
347    {
348        AttrBuilder B;
349        B.addAttribute(Attribute::NoUnwind);
350        B.addAttribute(Attribute::UWTable);
351        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
352    }
353    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
354
355    //Create the function that will be generated.
356    mFunction = mMod->getFunction("process_block");
357    if (!mFunction) {
358        mFunction = Function::Create(
359            /*Type=*/mFunctionType,
360            /*Linkage=*/GlobalValue::ExternalLinkage,
361            /*Name=*/"process_block", mMod);
362        mFunction->setCallingConv(CallingConv::C);
363    }
364    mFunction->setAttributes(AttrSet);
365}
366   
367uint64_t log2ceil (uint64_t v) {
368    unsigned ceil = 1;
369    while (ceil < v) ceil *= 2;
370    return ceil;
371}
372
373unsigned const LongAdvanceBase = BLOCK_SIZE;
374   
375
376   
377// CarryDataNumbering
378//
379// For each PabloBlock, a contiguous CarryData area holds carry,
380// and advance values that are generated in one block for use in the
381// next.  For a given block, the carry data area contains the
382// carries, the advances and the nested data for contained blocks,
383// if any.
384// Notes:
385//   (a) an additional data entry is created for each if-statement
386//       having more than one carry or advance opreation within it.  This
387//       additional entry is a summary entry which must be nonzero to
388//       indicate that there are carry or advance bits associated with
389//       any operation within the if-structure (at any nesting level).
390//   (b) advancing by a large amount may require multiple advance entries.
391//       the number of advance entries for an operation Adv(x, n) is
392//       (n + BLOCK_SIZE - 1) / BLOCK_SIZE
393//
394// Examine precomputes some CarryNumbering and AdvanceNumbering, as
395// well as mMaxWhileDepth of while loops.
396//
397unsigned PabloCompiler::Examine(PabloBlock & blk, unsigned carryDataIndexIn) {
398    // Count local carries and advances at this level.
399    unsigned carryDataIndex = carryDataIndexIn;
400    unsigned localCarries = 0;
401    unsigned localAdvances = 0;
402    unsigned nestedCarryDataSize = 0;
403    for (Statement * stmt : blk) {
404        if (Advance * adv = dyn_cast<Advance>(stmt)) {
405            adv->setLocalAdvanceIndex(localAdvances);
406            unsigned shift_amount = adv->getAdvanceAmount();
407            if (shift_amount >= LongAdvanceBase) {
408                int advEntries = (shift_amount + BLOCK_SIZE - 1) / BLOCK_SIZE;
409                int advCeil = log2ceil(advEntries);
410                localAdvances += advCeil;
411            }
412            else {
413                localAdvances += (shift_amount + BLOCK_SIZE - 1) / BLOCK_SIZE;
414            }
415        }
416        else if (MatchStar * m = dyn_cast<MatchStar>(stmt)) {
417            m->setLocalCarryIndex(localCarries);
418            ++localCarries;
419        }
420        else if (ScanThru * s = dyn_cast<ScanThru>(stmt)) {
421            s->setLocalCarryIndex(localCarries);
422            ++localCarries;
423        }
424    }
425    carryDataIndex += localCarries + localAdvances;
426    for (Statement * stmt : blk) {
427        if (Call * call = dyn_cast<Call>(stmt)) {
428            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
429        }
430        else if (If * ifStatement = dyn_cast<If>(stmt)) {
431            ++mIfDepth;
432            const auto ifCarryDataSize = Examine(ifStatement->getBody(), carryDataIndex);
433            --mIfDepth;
434            nestedCarryDataSize += ifCarryDataSize;
435            carryDataIndex += ifCarryDataSize;
436        }
437        else if (While * whileStatement = dyn_cast<While>(stmt)) {
438            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
439            const auto whileCarryDataSize = Examine(whileStatement->getBody(), carryDataIndex);
440            --mWhileDepth;
441            nestedCarryDataSize += whileCarryDataSize;
442            carryDataIndex += whileCarryDataSize;
443        }
444    }
445    blk.setCarryIndexBase(carryDataIndexIn);
446    blk.setLocalCarryCount(localCarries);
447    blk.setLocalAdvanceCount(localAdvances);
448    unsigned totalCarryDataSize = localCarries + localAdvances + nestedCarryDataSize;
449    if ((mIfDepth > 0) && (totalCarryDataSize > 1)) {
450        // Need extra space for the summary variable, always the last
451        // entry within an if block.
452        totalCarryDataSize += 1;
453    }
454    blk.setTotalCarryDataSize(totalCarryDataSize);
455    return totalCarryDataSize;
456}
457
458void PabloCompiler::DeclareCallFunctions() {
459    for (auto mapping : mCalleeMap) {
460        const String * callee = mapping.first;
461        //std::cerr << callee->str() << " to be declared\n";
462        auto ei = mExternalMap.find(callee->value());
463        if (ei != mExternalMap.end()) {
464            void * fn_ptr = ei->second;
465            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
466            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
467            if (LLVM_UNLIKELY(externalValue == nullptr)) {
468                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
469            }
470            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
471            mCalleeMap[callee] = externalValue;
472        }
473        else {
474            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
475        }
476    }
477}
478
479void PabloCompiler::compileBlock(const PabloBlock & blk) {
480    for (const Statement * statement : blk) {
481        compileStatement(statement);
482    }
483}
484
485
486
487
488void PabloCompiler::compileIf(const If * ifStatement) {       
489        //
490        //  The If-ElseZero stmt:
491        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
492        //  If the value of the predicate is nonzero, then determine the values of variables
493        //  <var>* by executing the given statements.  Otherwise, the value of the
494        //  variables are all zero.  Requirements: (a) no variable that is defined within
495        //  the body of the if may be accessed outside unless it is explicitly
496        //  listed in the variable list, (b) every variable in the defined list receives
497        //  a value within the body, and (c) the logical consequence of executing
498        //  the statements in the event that the predicate is zero is that the
499        //  values of all defined variables indeed work out to be 0.
500        //
501        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
502        //  is inserted for each variable in the defined variable list.  It receives
503        //  a zero value from the ifentry block and the defined value from the if
504        //  body.
505        //
506        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
507        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
508        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
509       
510        IRBuilder<> b_entry(ifEntryBlock);
511        mBasicBlock = ifEntryBlock;
512   
513        const unsigned baseCarryDataIdx = ifStatement->getBody().getCarryIndexBase();
514        const unsigned carryDataSize = ifStatement->getBody().getTotalCarryDataSize();
515        const unsigned carrySummaryIndex = baseCarryDataIdx + carryDataSize - 1;
516       
517        Value* if_test_value = compileExpression(ifStatement->getCondition());
518        if (carryDataSize > 0) {
519            // load the summary variable
520            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
521            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
522        }
523        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
524
525        // Entry processing is complete, now handle the body of the if.
526        mBasicBlock = ifBodyBlock;
527        compileBlock(ifStatement -> getBody());
528
529        // If we compiled an If or a While statement, we won't be in the same basic block as before.
530        // Create the branch from the current basic block to the end block.
531        IRBuilder<> bIfBody(mBasicBlock);
532        // After the recursive compile, now insert the code to compute the summary
533        // carry over variable.
534       
535        if (carryDataSize > 1) {
536            // If there was only one carry entry, then it also serves as the summary variable.
537            // Otherwise, we need to combine entries to compute the summary.
538            Value * carry_summary = mZeroInitializer;
539            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
540                int s = mCarryDataSummaryIdx[c];
541                if (s == -1) {
542                    Value* carryq_value = mCarryOutVector[c];
543                    if (carry_summary == mZeroInitializer) {
544                        carry_summary = carryq_value;
545                    }
546                    else {
547                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
548                    }
549                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
550                }
551            }
552            genCarryDataStore(carry_summary, carrySummaryIndex);
553        }
554        bIfBody.CreateBr(ifEndBlock);
555        //End Block
556        IRBuilder<> bEnd(ifEndBlock);
557        for (const PabloAST * node : ifStatement->getDefined()) {
558            const Assign * assign = cast<Assign>(node);
559            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
560            auto f = mMarkerMap.find(assign);
561            assert (f != mMarkerMap.end());
562            phi->addIncoming(mZeroInitializer, ifEntryBlock);
563            phi->addIncoming(f->second, mBasicBlock);
564            mMarkerMap[assign] = phi;
565        }
566        // Create the phi Node for the summary variable.
567        if (carryDataSize > 0) {
568            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
569            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
570            summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], mBasicBlock);
571            mCarryOutVector[carrySummaryIndex] = summary_phi;
572        }
573       
574        // Set the basic block to the new end block
575        mBasicBlock = ifEndBlock;
576}
577
578void PabloCompiler::compileWhile(const While * whileStatement) {
579        const unsigned baseCarryDataIdx = whileStatement->getBody().getCarryIndexBase();
580        const unsigned carryDataSize = whileStatement->getBody().getTotalCarryDataSize();
581   
582        if (mWhileDepth == 0) {
583            for (auto i = 0; i < carryDataSize; ++i) {
584                genCarryDataLoad(baseCarryDataIdx + i);
585            }
586        }
587
588        SmallVector<const Next*, 4> nextNodes;
589        for (const PabloAST * node : whileStatement->getBody()) {
590            if (isa<Next>(node)) {
591                nextNodes.push_back(cast<Next>(node));
592            }
593        }
594
595        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
596        // mCarryOutVector with the appropriate values. Although we're not actually entering a new basic
597        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
598        // will refer to the previous value.
599
600        ++mWhileDepth;
601
602        compileBlock(whileStatement->getBody());
603
604        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
605        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
606        // but works for now.
607
608        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
609        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
610        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
611
612        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
613        // may not be same one that we entered the function with.
614        IRBuilder<> bEntry(mBasicBlock);
615        bEntry.CreateBr(whileCondBlock);
616
617        // CONDITION BLOCK
618        IRBuilder<> bCond(whileCondBlock);
619        // generate phi nodes for any carry propogating instruction
620        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
621        unsigned index = 0;
622        for (index = 0; index < carryDataSize; ++index) {
623            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
624            phi->addIncoming(mCarryOutVector[baseCarryDataIdx + index], mBasicBlock);
625            mCarryInVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
626            phiNodes[index] = phi;
627        }
628        // and for any Next nodes in the loop body
629        for (const Next * n : nextNodes) {
630            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
631            auto f = mMarkerMap.find(n->getInitial());
632            assert (f != mMarkerMap.end());
633            phi->addIncoming(f->second, mBasicBlock);
634            mMarkerMap[n->getInitial()] = phi;
635            phiNodes[index++] = phi;
636        }
637
638        mBasicBlock = whileCondBlock;
639        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
640
641        // BODY BLOCK
642        //std::cerr << "Compile loop body\n";
643        mBasicBlock = whileBodyBlock;
644        compileBlock(whileStatement->getBody());
645        // update phi nodes for any carry propogating instruction
646        IRBuilder<> bWhileBody(mBasicBlock);
647        for (index = 0; index < carryDataSize; ++index) {
648            PHINode * phi = phiNodes[index];
649            Value * carryOut = bWhileBody.CreateOr(phi, mCarryOutVector[baseCarryDataIdx + index]);
650            phi->addIncoming(carryOut, mBasicBlock);
651            mCarryOutVector[baseCarryDataIdx + index] = phi;
652        }
653       
654        // and for any Next nodes in the loop body
655        for (const Next * n : nextNodes) {
656            auto f = mMarkerMap.find(n->getInitial());
657            assert (f != mMarkerMap.end());
658            PHINode * phi = phiNodes[index++];
659            phi->addIncoming(f->second, mBasicBlock);
660            mMarkerMap[n->getInitial()] = phi;
661        }
662
663        bWhileBody.CreateBr(whileCondBlock);
664
665        // EXIT BLOCK
666        mBasicBlock = whileEndBlock;
667        if (--mWhileDepth == 0) {
668            for (index = 0; index < carryDataSize; ++index) {
669                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
670            }
671        }
672 
673}
674
675void PabloCompiler::compileStatement(const Statement * stmt)
676{
677    IRBuilder<> b(mBasicBlock);
678    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
679        Value * expr = compileExpression(assign->getExpr());
680        if (DumpTrace) {
681            genPrintRegister(assign->getName()->to_string(), expr);
682        }
683        mMarkerMap[assign] = expr;
684        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
685            SetOutputValue(expr, assign->getOutputIndex());
686        }
687    }
688    else if (const Next * next = dyn_cast<const Next>(stmt)) {
689        Value * expr = compileExpression(next->getExpr());
690        if (TraceNext) {
691            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
692        }
693        mMarkerMap[next->getInitial()] = expr;
694    }
695    else if (const If * ifStatement = dyn_cast<const If>(stmt))
696    {
697        compileIf(ifStatement);
698    }
699    else if (const While * whileStatement = dyn_cast<const While>(stmt))
700    {
701        compileWhile(whileStatement);
702    }
703    else if (const Call* call = dyn_cast<Call>(stmt)) {
704        //Call the callee once and store the result in the marker map.
705        auto mi = mMarkerMap.find(call);
706        if (mi == mMarkerMap.end()) {
707            auto ci = mCalleeMap.find(call->getCallee());
708            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
709                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
710            }
711            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
712        }
713        // return mi->second;
714    }
715    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
716        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
717        if (DumpTrace) {
718            genPrintRegister(stmt->getName()->to_string(), expr);
719        }
720        mMarkerMap[pablo_and] = expr;
721        // return expr;
722    }
723    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
724        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
725        if (DumpTrace) {
726            genPrintRegister(stmt->getName()->to_string(), expr);
727        }
728        mMarkerMap[pablo_or] = expr;
729        // return expr;
730    }
731    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
732        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
733        mMarkerMap[pablo_xor] = expr;
734        // return expr;
735    }
736    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
737        Value* ifMask = compileExpression(sel->getCondition());
738        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
739        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
740        Value * expr = b.CreateOr(ifTrue, ifFalse);
741        if (DumpTrace) {
742            genPrintRegister(stmt->getName()->to_string(), expr);
743        }
744        mMarkerMap[sel] = expr;
745        // return expr;
746    }
747    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
748        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
749        if (DumpTrace) {
750            genPrintRegister(stmt->getName()->to_string(), expr);
751        }
752        mMarkerMap[pablo_not] = expr;
753        // return expr;
754    }
755    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
756        Value* strm_value = compileExpression(adv->getExpr());
757        int shift = adv->getAdvanceAmount();
758        unsigned advance_index = adv->getLocalAdvanceIndex();
759        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
760        if (DumpTrace) {
761            genPrintRegister(stmt->getName()->to_string(), expr);
762        }
763        mMarkerMap[adv] = expr;
764        // return expr;
765    }
766    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
767    {
768        Value * marker = compileExpression(mstar->getMarker());
769        Value * cc = compileExpression(mstar->getCharClass());
770        Value * marker_and_cc = b.CreateAnd(marker, cc);
771        unsigned carry_index = mstar->getLocalCarryIndex();
772        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
773        if (DumpTrace) {
774            genPrintRegister(stmt->getName()->to_string(), expr);
775        }
776        mMarkerMap[mstar] = expr;
777        // return expr;
778    }
779    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
780    {
781        Value * marker_expr = compileExpression(sthru->getScanFrom());
782        Value * cc_expr = compileExpression(sthru->getScanThru());
783        unsigned carry_index = sthru->getLocalCarryIndex();
784        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
785        if (DumpTrace) {
786            genPrintRegister(stmt->getName()->to_string(), expr);
787        }
788        mMarkerMap[sthru] = expr;
789        // return expr;
790    }
791    else {
792        PabloPrinter::print(stmt, std::cerr);
793        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
794    }
795}
796
797Value * PabloCompiler::compileExpression(const PabloAST * expr) {
798    if (isa<Ones>(expr)) {
799        return mOneInitializer;
800    }
801    else if (isa<Zeroes>(expr)) {
802        return mZeroInitializer;
803    }
804    else if (const Next * next = dyn_cast<Next>(expr)) {
805        expr = next->getInitial();
806    }
807    auto f = mMarkerMap.find(expr);
808    if (f == mMarkerMap.end()) {
809        std::stringstream str;
810        str << "\"";
811        PabloPrinter::print(expr, str);
812        str << "\" was used before definition!";
813        throw std::runtime_error(str.str());
814    }
815    return f->second;
816}
817
818
819#ifdef USE_UADD_OVERFLOW
820#ifdef USE_TWO_UADD_OVERFLOW
821PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
822    std::vector<Value*> struct_res_params;
823    struct_res_params.push_back(int128_e1);
824    struct_res_params.push_back(int128_e2);
825    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
826    struct_res->setCallingConv(CallingConv::C);
827    struct_res->setTailCall(false);
828    AttributeSet struct_res_PAL;
829    struct_res->setAttributes(struct_res_PAL);
830
831    SumWithOverflowPack ret;
832
833    std::vector<unsigned> int128_sum_indices;
834    int128_sum_indices.push_back(0);
835    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
836
837    std::vector<unsigned> int1_obit_indices;
838    int1_obit_indices.push_back(1);
839    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
840
841    return ret;
842}
843#else
844PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
845    std::vector<Value*> struct_res_params;
846    struct_res_params.push_back(int128_e1);
847    struct_res_params.push_back(int128_e2);
848    struct_res_params.push_back(int1_cin);
849    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
850    struct_res->setCallingConv(CallingConv::C);
851    struct_res->setTailCall(false);
852    AttributeSet struct_res_PAL;
853    struct_res->setAttributes(struct_res_PAL);
854
855    SumWithOverflowPack ret;
856
857    std::vector<unsigned> int128_sum_indices;
858    int128_sum_indices.push_back(0);
859    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
860
861    std::vector<unsigned> int1_obit_indices;
862    int1_obit_indices.push_back(1);
863    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
864
865    return ret;
866}
867#endif
868#endif
869
870
871Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
872    IRBuilder<> b(mBasicBlock);
873
874    //CarryQ - carry in.
875    const int carryIdx = blk->getCarryIndexBase() + localIndex;
876    Value* carryq_value = genCarryDataLoad(carryIdx);
877#ifdef USE_TWO_UADD_OVERFLOW
878    //This is the ideal implementation, which uses two uadd.with.overflow
879    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
880    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
881    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
882    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
883
884    SumWithOverflowPack sumpack0, sumpack1;
885
886    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
887    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
888
889    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
890    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
891
892    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
893    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
894    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
895    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
896    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
897
898#elif defined USE_UADD_OVERFLOW
899    //use llvm.uadd.with.overflow.i128 or i256
900    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
901    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
902
903    //get i1 carryin from iBLOCK_SIZE
904    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
905    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
906    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
907
908    SumWithOverflowPack sumpack0;
909    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
910    Value* obit = sumpack0.obit;
911    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
912
913    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
914    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
915    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
916    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
917#elif (BLOCK_SIZE == 128)
918    //calculate carry through logical ops
919    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
920    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
921    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
922    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
923    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
924    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
925
926    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
927    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
928#else
929    //BLOCK_SIZE == 256, there is no other implementation
930    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
931#endif //USE_TWO_UADD_OVERFLOW
932
933    genCarryDataStore(carry_out, carryIdx);
934    return sum;
935}
936//#define CARRY_DEBUG
937Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
938    assert (index < mCarryInVector.size());
939    if (mWhileDepth == 0) {
940        IRBuilder<> b(mBasicBlock);
941        mCarryInVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
942    }
943#ifdef CARRY_DEBUG
944    genPrintRegister("carry_in_" + std::to_string(index), mCarryInVector[index]);
945#endif
946    return mCarryInVector[index];
947}
948
949void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
950    assert (carryOut);
951    assert (index < mCarryOutVector.size());
952    if (mWhileDepth == 0) {
953        IRBuilder<> b(mBasicBlock);
954        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
955    }
956    mCarryDataSummaryIdx[index] = -1;
957#ifdef CARRY_DEBUG
958    genPrintRegister("carry_out_" + std::to_string(index), mCarryOutVector[index]);
959#endif
960    mCarryOutVector[index] = carryOut;
961    //std::cerr << "mCarryOutVector[" << index << "]]\n";
962}
963
964inline Value* PabloCompiler::genBitBlockAny(Value* test) {
965    IRBuilder<> b(mBasicBlock);
966    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
967    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
968}
969
970Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
971    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
972    IRBuilder<> b(mBasicBlock);
973    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
974    Value * v = b.CreateBitCast(op, vType);
975    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
976}
977
978Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
979    IRBuilder<> b(mBasicBlock);
980    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
981    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
982}
983
984inline Value* PabloCompiler::genNot(Value* expr) {
985    IRBuilder<> b(mBasicBlock);
986    return b.CreateXor(expr, mOneInitializer, "not");
987}
988
989Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
990    if (shift_amount >= LongAdvanceBase) {
991        return genLongAdvanceWithCarry(strm_value, shift_amount, localIndex, blk);
992    }
993    IRBuilder<> b(mBasicBlock);
994    const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
995    Value* result_value;
996   
997    if (shift_amount == 0) {
998        result_value = genCarryDataLoad(advanceIndex);
999        //b.CreateCall(mFunc_print_register, result_value);
1000    }
1001#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
1002    if (shift_amount == 1) {
1003        Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(advanceIndex));
1004        Value* srli_1_value = b.CreateLShr(strm_value, 63);
1005        Value* packed_shuffle;
1006        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1007        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1008        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1009       
1010        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1011        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1012       
1013        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1014        result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1015    }
1016    else { //if (block_shift < BLOCK_SIZE) {
1017        // This is the preferred logic, but is too slow for the general case.
1018        // We need to speed up our custom LLVM for this code.
1019        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
1020        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1021        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1022        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1023    }
1024#else
1025    Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
1026    Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1027    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1028    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1029   
1030#endif
1031    genCarryDataStore(strm_value, advanceIndex);
1032    return result_value;
1033}
1034
1035//
1036// Generate code for long advances >= LongAdvanceBase
1037//
1038Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
1039    IRBuilder<> b(mBasicBlock);
1040    const unsigned advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
1041    const unsigned advanceEntries = (shift_amount + BLOCK_SIZE - 1) / BLOCK_SIZE;
1042    const unsigned block_shift = shift_amount % BLOCK_SIZE;
1043    const unsigned advanceCeil = log2ceil(advanceEntries);
1044    Value * indexMask = b.getInt64(advanceCeil - 1);
1045    Value * advBaseIndex = b.getInt64(advanceIndex);
1046    Value * storeIndex = b.CreateAdd(b.CreateAnd(mBlockNo, indexMask), advBaseIndex);
1047    Value * loadIndex = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries)), indexMask), advBaseIndex);
1048    Value * storePtr = b.CreateGEP(mCarryDataPtr, storeIndex);
1049    Value * loadPtr = b.CreateGEP(mCarryDataPtr, loadIndex);
1050    Value* result_value;
1051
1052    if (block_shift == 0) {
1053        result_value = b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8);
1054    }
1055    else if (advanceEntries == 1) {
1056        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1057        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1058        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1059        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1060    }
1061    else {
1062        // The advance is based on the two oldest bit blocks in the advance buffer.
1063        // The buffer is maintained as a circular buffer of size advanceCeil.
1064        // Indexes within the buffer are computed by bitwise and with the indexMask.
1065        Value * loadIndex2 = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries-1)), indexMask), advBaseIndex);
1066        Value * loadPtr2 = b.CreateGEP(mCarryDataPtr, loadIndex2);
1067        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1068        //genPrintRegister("advanceq_longint", b.CreateBitCast(advanceq_longint, mBitBlockType));
1069        Value* strm_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr2, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1070        //genPrintRegister("strm_longint", b.CreateBitCast(strm_longint, mBitBlockType));
1071        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1072        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1073    }
1074    b.CreateAlignedStore(strm_value, storePtr, BLOCK_SIZE/8);
1075    return result_value;
1076}
1077   
1078void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1079    IRBuilder<> b(mBasicBlock);
1080    if (marker->getType()->isPointerTy()) {
1081        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1082    }
1083    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1084    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1085    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1086}
1087
1088CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1089: CarryDataSize(carryDataSize)
1090, FunctionPointer(executionEngine->getPointerToFunction(function))
1091, mFunction(function)
1092, mExecutionEngine(executionEngine)
1093{
1094
1095}
1096
1097// Clean up the memory for the compiled function once we're finished using it.
1098CompiledPabloFunction::~CompiledPabloFunction() {
1099    if (mExecutionEngine) {
1100        assert (mFunction);
1101        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1102        delete mExecutionEngine;
1103    }
1104}
1105
1106}
Note: See TracBrowser for help on using the repository browser.