source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4567

Last change on this file since 4567 was 4567, checked in by cameron, 4 years ago

Use llvm:raw_ostream in Pablo printer

File size: 45.2 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/carry_data.h>
10#include <pablo/printer_pablos.h>
11#include <cc/cc_namemap.hpp>
12#include <re/re_name.h>
13#include <stdexcept>
14#include <include/simd-lib/bitblock.hpp>
15#include <sstream>
16#include <llvm/IR/Verifier.h>
17#include <llvm/Pass.h>
18#include <llvm/PassManager.h>
19#include <llvm/ADT/SmallVector.h>
20#include <llvm/Analysis/Passes.h>
21#include <llvm/IR/BasicBlock.h>
22#include <llvm/IR/CallingConv.h>
23#include <llvm/IR/Constants.h>
24#include <llvm/IR/DataLayout.h>
25#include <llvm/IR/DerivedTypes.h>
26#include <llvm/IR/Function.h>
27#include <llvm/IR/GlobalVariable.h>
28#include <llvm/IR/InlineAsm.h>
29#include <llvm/IR/Instructions.h>
30#include <llvm/IR/LLVMContext.h>
31#include <llvm/IR/Module.h>
32#include <llvm/Support/FormattedStream.h>
33#include <llvm/Support/MathExtras.h>
34#include <llvm/Support/Casting.h>
35#include <llvm/Support/Compiler.h>
36#include <llvm/Support/Debug.h>
37#include <llvm/Support/TargetSelect.h>
38#include <llvm/Support/Host.h>
39#include <llvm/Transforms/Scalar.h>
40#include <llvm/ExecutionEngine/ExecutionEngine.h>
41#include <llvm/ExecutionEngine/MCJIT.h>
42#include <llvm/IRReader/IRReader.h>
43#include <llvm/Bitcode/ReaderWriter.h>
44#include <llvm/Support/MemoryBuffer.h>
45#include <llvm/IR/IRBuilder.h>
46#include <llvm/Support/CommandLine.h>
47#include <llvm/ADT/Twine.h>
48#include <iostream>
49
50static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
51static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
52
53static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
54static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
55static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
56
57extern "C" {
58  void wrapped_print_register(char * regName, BitBlock bit_block) {
59      print_register<BitBlock>(regName, bit_block);
60  }
61}
62
63namespace pablo {
64
65PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
66: mBasisBits(basisBits)
67, mMod(new Module("icgrep", getGlobalContext()))
68, mBasicBlock(nullptr)
69, mExecutionEngine(nullptr)
70, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
71, mBasisBitsInputPtr(nullptr)
72, mCarryDataPtr(nullptr)
73, mBlockNo(nullptr)
74, mWhileDepth(0)
75, mIfDepth(0)
76, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
77, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
78, mFunctionType(nullptr)
79, mFunction(nullptr)
80, mBasisBitsAddr(nullptr)
81, mOutputAddrPtr(nullptr)
82, mMaxWhileDepth(0)
83, mPrintRegisterFunction(nullptr)
84{
85    //Create the jit execution engine.up
86    InitializeNativeTarget();
87    InitializeNativeTargetAsmPrinter();
88    InitializeNativeTargetAsmParser();
89    DefineTypes();
90}
91
92PabloCompiler::~PabloCompiler()
93{
94
95}
96   
97void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
98    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
99}
100
101void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
102    IRBuilder <> b(mBasicBlock);
103    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
104    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
105                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
106                                                   /*isConstant=*/ true,
107                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
108                                                   /*Initializer=*/ regNameData);
109    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
110    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
111}
112
113CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
114{
115    mWhileDepth = 0;
116    mIfDepth = 0;
117    mMaxWhileDepth = 0;
118    // Get the total number of carry entries; add 1 extra element for the block number.
119    unsigned totalCarryDataSize = pb.carryData.enumerate(pb) + 1;
120    Examine(pb); 
121    mCarryInVector.resize(totalCarryDataSize);
122    mCarryOutVector.resize(totalCarryDataSize);
123    mCarryDataSummaryIdx.resize(totalCarryDataSize);
124    std::string errMessage;
125    EngineBuilder builder(mMod);
126    builder.setErrorStr(&errMessage);
127    builder.setMCPU(sys::getHostCPUName());
128    builder.setUseMCJIT(true);
129    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
130    mExecutionEngine = builder.create();
131    if (mExecutionEngine == nullptr) {
132        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
133    }
134    DeclareFunctions();
135
136    DeclareCallFunctions();
137
138    Function::arg_iterator args = mFunction->arg_begin();
139    mBasisBitsAddr = args++;
140    mBasisBitsAddr->setName("basis_bits");
141    mCarryDataPtr = args++;
142    mCarryDataPtr->setName("carry_data");
143    mOutputAddrPtr = args++;
144    mOutputAddrPtr->setName("output");
145
146    mWhileDepth = 0;
147    mIfDepth = 0;
148    mMaxWhileDepth = 0;
149    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
150    IRBuilder<> b(mBasicBlock);
151
152    //The basis bits structure
153    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
154        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
155        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
156        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
157        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
158    }
159   
160    // The block number is a 64-bit integer at the end of the carry data area.
161    Value * blockNoPtr = b.CreateBitCast(b.CreateGEP(mCarryDataPtr, b.getInt64(totalCarryDataSize)), Type::getInt64PtrTy(b.getContext()));
162    mBlockNo = b.CreateLoad(blockNoPtr);
163    //Generate the IR instructions for the function.
164    compileBlock(pb);
165    {   IRBuilder<> b(mBasicBlock);  // may be in new basic block, set builder
166        b.CreateStore(b.CreateAdd(mBlockNo, b.getInt64(1)), blockNoPtr);
167    }
168
169    if (DumpTrace || TraceNext) {
170        genPrintRegister("blockNo", genCarryDataLoad(totalCarryDataSize));
171    }
172    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
173        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
174    }
175
176    //Terminate the block
177    ReturnInst::Create(mMod->getContext(), mBasicBlock);
178
179    //Display the IR that has been generated by this module.
180    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
181        mMod->dump();
182    }
183    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
184    verifyModule(*mMod, &dbgs());
185
186    mExecutionEngine->finalizeObject();
187
188    //Return the required size of the carry data area to the process_block function.
189    return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, mExecutionEngine);
190}
191
192void PabloCompiler::DefineTypes()
193{
194    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
195    if (structBasisBits == nullptr) {
196        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
197    }
198    std::vector<Type*>StructTy_struct_Basis_bits_fields;
199    for (int i = 0; i != mBasisBits.size(); i++)
200    {
201        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
202    }
203    if (structBasisBits->isOpaque()) {
204        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
205    }
206    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
207
208    std::vector<Type*>functionTypeArgs;
209    functionTypeArgs.push_back(mBasisBitsInputPtr);
210
211    //The carry data array.
212    //A pointer to the BitBlock vector.
213    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
214
215    //The output structure.
216    StructType * outputStruct = mMod->getTypeByName("struct.Output");
217    if (!outputStruct) {
218        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
219    }
220    if (outputStruct->isOpaque()) {
221        std::vector<Type*>fields;
222        fields.push_back(mBitBlockType);
223        fields.push_back(mBitBlockType);
224        outputStruct->setBody(fields, /*isPacked=*/false);
225    }
226    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
227
228    //The &output parameter.
229    functionTypeArgs.push_back(outputStructPtr);
230
231    mFunctionType = FunctionType::get(
232     /*Result=*/Type::getVoidTy(mMod->getContext()),
233     /*Params=*/functionTypeArgs,
234     /*isVarArg=*/false);
235}
236
237void PabloCompiler::DeclareFunctions()
238{
239    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
240    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
241    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
242    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
243
244#ifdef USE_UADD_OVERFLOW
245#ifdef USE_TWO_UADD_OVERFLOW
246    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
247    std::vector<Type*>StructTy_0_fields;
248    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
249    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
250    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
251
252    std::vector<Type*>FuncTy_1_args;
253    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
254    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
255    FunctionType* FuncTy_1 = FunctionType::get(
256                                              /*Result=*/StructTy_0,
257                                              /*Params=*/FuncTy_1_args,
258                                              /*isVarArg=*/false);
259
260    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
261                                              std::to_string(BLOCK_SIZE));
262    if (!mFunctionUaddOverflow) {
263        mFunctionUaddOverflow= Function::Create(
264          /*Type=*/ FuncTy_1,
265          /*Linkage=*/ GlobalValue::ExternalLinkage,
266          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
267        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
268    }
269    AttributeSet mFunctionUaddOverflowPAL;
270    {
271        SmallVector<AttributeSet, 4> Attrs;
272        AttributeSet PAS;
273        {
274          AttrBuilder B;
275          B.addAttribute(Attribute::NoUnwind);
276          B.addAttribute(Attribute::ReadNone);
277          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
278        }
279
280        Attrs.push_back(PAS);
281        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
282    }
283    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
284#else
285    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
286    std::vector<Type*>StructTy_0_fields;
287    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
288    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
289    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
290
291    std::vector<Type*>FuncTy_1_args;
292    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
293    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
294    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
295    FunctionType* FuncTy_1 = FunctionType::get(
296                                              /*Result=*/StructTy_0,
297                                              /*Params=*/FuncTy_1_args,
298                                              /*isVarArg=*/false);
299
300    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
301                                              std::to_string(BLOCK_SIZE));
302    if (!mFunctionUaddOverflowCarryin) {
303        mFunctionUaddOverflowCarryin = Function::Create(
304          /*Type=*/ FuncTy_1,
305          /*Linkage=*/ GlobalValue::ExternalLinkage,
306          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
307        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
308    }
309    AttributeSet mFunctionUaddOverflowCarryinPAL;
310    {
311        SmallVector<AttributeSet, 4> Attrs;
312        AttributeSet PAS;
313        {
314          AttrBuilder B;
315          B.addAttribute(Attribute::NoUnwind);
316          B.addAttribute(Attribute::ReadNone);
317          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
318        }
319
320        Attrs.push_back(PAS);
321        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
322    }
323    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
324#endif
325#endif
326
327    //Starts on process_block
328    SmallVector<AttributeSet, 4> Attrs;
329    AttributeSet PAS;
330    {
331        AttrBuilder B;
332        B.addAttribute(Attribute::ReadOnly);
333        B.addAttribute(Attribute::NoCapture);
334        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
335    }
336    Attrs.push_back(PAS);
337    {
338        AttrBuilder B;
339        B.addAttribute(Attribute::NoCapture);
340        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
341    }
342    Attrs.push_back(PAS);
343    {
344        AttrBuilder B;
345        B.addAttribute(Attribute::NoCapture);
346        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
347    }
348    Attrs.push_back(PAS);
349    {
350        AttrBuilder B;
351        B.addAttribute(Attribute::NoUnwind);
352        B.addAttribute(Attribute::UWTable);
353        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
354    }
355    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
356
357    //Create the function that will be generated.
358    mFunction = mMod->getFunction("process_block");
359    if (!mFunction) {
360        mFunction = Function::Create(
361            /*Type=*/mFunctionType,
362            /*Linkage=*/GlobalValue::ExternalLinkage,
363            /*Name=*/"process_block", mMod);
364        mFunction->setCallingConv(CallingConv::C);
365    }
366    mFunction->setAttributes(AttrSet);
367}
368   
369void PabloCompiler::Examine(PabloBlock & blk) {
370    for (Statement * stmt : blk) {
371        if (Call * call = dyn_cast<Call>(stmt)) {
372            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
373        }
374        else if (If * ifStatement = dyn_cast<If>(stmt)) {
375            ++mIfDepth;
376            Examine(ifStatement->getBody());
377            --mIfDepth;
378        }
379        else if (While * whileStatement = dyn_cast<While>(stmt)) {
380            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
381            Examine(whileStatement->getBody());
382            --mWhileDepth;
383        }
384    }
385}
386
387void PabloCompiler::DeclareCallFunctions() {
388    for (auto mapping : mCalleeMap) {
389        const String * callee = mapping.first;
390        //std::cerr << callee->str() << " to be declared\n";
391        auto ei = mExternalMap.find(callee->value());
392        if (ei != mExternalMap.end()) {
393            void * fn_ptr = ei->second;
394            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
395            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
396            if (LLVM_UNLIKELY(externalValue == nullptr)) {
397                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
398            }
399            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
400            mCalleeMap[callee] = externalValue;
401        }
402        else {
403            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
404        }
405    }
406}
407
408void PabloCompiler::compileBlock(const PabloBlock & blk) {
409    for (const Statement * statement : blk) {
410        compileStatement(statement);
411    }
412}
413
414
415
416
417void PabloCompiler::compileIf(const If * ifStatement) {       
418        //
419        //  The If-ElseZero stmt:
420        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
421        //  If the value of the predicate is nonzero, then determine the values of variables
422        //  <var>* by executing the given statements.  Otherwise, the value of the
423        //  variables are all zero.  Requirements: (a) no variable that is defined within
424        //  the body of the if may be accessed outside unless it is explicitly
425        //  listed in the variable list, (b) every variable in the defined list receives
426        //  a value within the body, and (c) the logical consequence of executing
427        //  the statements in the event that the predicate is zero is that the
428        //  values of all defined variables indeed work out to be 0.
429        //
430        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
431        //  is inserted for each variable in the defined variable list.  It receives
432        //  a zero value from the ifentry block and the defined value from the if
433        //  body.
434        //
435        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
436        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
437        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
438       
439        IRBuilder<> b_entry(ifEntryBlock);
440        mBasicBlock = ifEntryBlock;
441        const PabloBlockCarryData & cd = ifStatement -> getBody().carryData;
442   
443        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
444        const unsigned carryDataSize = cd.getTotalCarryDataSize();
445        const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
446       
447        Value* if_test_value = compileExpression(ifStatement->getCondition());
448        if (carryDataSize > 0) {
449            // load the summary variable
450            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
451            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
452        }
453        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
454
455        // Entry processing is complete, now handle the body of the if.
456        mBasicBlock = ifBodyBlock;
457        compileBlock(ifStatement -> getBody());
458
459        // If we compiled an If or a While statement, we won't be in the same basic block as before.
460        // Create the branch from the current basic block to the end block.
461        IRBuilder<> bIfBody(mBasicBlock);
462        // After the recursive compile, now insert the code to compute the summary
463        // carry over variable.
464       
465        if (carryDataSize > 1) {
466            // If there was only one carry entry, then it also serves as the summary variable.
467            // Otherwise, we need to combine entries to compute the summary.
468            Value * carry_summary = mZeroInitializer;
469            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
470                int s = mCarryDataSummaryIdx[c];
471                if (s == -1) {
472                    Value* carryq_value = mCarryOutVector[c];
473                    if (carry_summary == mZeroInitializer) {
474                        carry_summary = carryq_value;
475                    }
476                    else {
477                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
478                    }
479                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
480                }
481            }
482            genCarryDataStore(carry_summary, carrySummaryIndex);
483        }
484        bIfBody.CreateBr(ifEndBlock);
485        //End Block
486        IRBuilder<> bEnd(ifEndBlock);
487        for (const PabloAST * node : ifStatement->getDefined()) {
488            const Assign * assign = cast<Assign>(node);
489            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
490            auto f = mMarkerMap.find(assign);
491            assert (f != mMarkerMap.end());
492            phi->addIncoming(mZeroInitializer, ifEntryBlock);
493            phi->addIncoming(f->second, mBasicBlock);
494            mMarkerMap[assign] = phi;
495        }
496        // Create the phi Node for the summary variable.
497        if (carryDataSize > 0) {
498            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
499            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
500            summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], mBasicBlock);
501            mCarryOutVector[carrySummaryIndex] = summary_phi;
502        }
503       
504        // Set the basic block to the new end block
505        mBasicBlock = ifEndBlock;
506}
507
508void PabloCompiler::compileWhile(const While * whileStatement) {
509        const PabloBlockCarryData & cd = whileStatement -> getBody().carryData;
510        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
511        const unsigned carryDataSize = cd.getTotalCarryDataSize();
512   
513        if (mWhileDepth == 0) {
514            for (auto i = 0; i < carryDataSize; ++i) {
515                genCarryDataLoad(baseCarryDataIdx + i);
516            }
517        }
518
519        SmallVector<const Next*, 4> nextNodes;
520        for (const PabloAST * node : whileStatement->getBody()) {
521            if (isa<Next>(node)) {
522                nextNodes.push_back(cast<Next>(node));
523            }
524        }
525
526        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
527        // mCarryOutVector with the appropriate values. Although we're not actually entering a new basic
528        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
529        // will refer to the previous value.
530
531        ++mWhileDepth;
532
533        compileBlock(whileStatement->getBody());
534
535        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
536        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
537        // but works for now.
538
539        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
540        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
541        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
542
543        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
544        // may not be same one that we entered the function with.
545        IRBuilder<> bEntry(mBasicBlock);
546        bEntry.CreateBr(whileCondBlock);
547
548        // CONDITION BLOCK
549        IRBuilder<> bCond(whileCondBlock);
550        // generate phi nodes for any carry propogating instruction
551        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
552        unsigned index = 0;
553        for (index = 0; index < carryDataSize; ++index) {
554            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
555            phi->addIncoming(mCarryOutVector[baseCarryDataIdx + index], mBasicBlock);
556            mCarryInVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
557            phiNodes[index] = phi;
558        }
559        // and for any Next nodes in the loop body
560        for (const Next * n : nextNodes) {
561            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
562            auto f = mMarkerMap.find(n->getInitial());
563            assert (f != mMarkerMap.end());
564            phi->addIncoming(f->second, mBasicBlock);
565            mMarkerMap[n->getInitial()] = phi;
566            phiNodes[index++] = phi;
567        }
568
569        mBasicBlock = whileCondBlock;
570        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
571
572        // BODY BLOCK
573        //std::cerr << "Compile loop body\n";
574        mBasicBlock = whileBodyBlock;
575        compileBlock(whileStatement->getBody());
576        // update phi nodes for any carry propogating instruction
577        IRBuilder<> bWhileBody(mBasicBlock);
578        for (index = 0; index < carryDataSize; ++index) {
579            PHINode * phi = phiNodes[index];
580            Value * carryOut = bWhileBody.CreateOr(phi, mCarryOutVector[baseCarryDataIdx + index]);
581            phi->addIncoming(carryOut, mBasicBlock);
582            mCarryOutVector[baseCarryDataIdx + index] = phi;
583        }
584       
585        // and for any Next nodes in the loop body
586        for (const Next * n : nextNodes) {
587            auto f = mMarkerMap.find(n->getInitial());
588            assert (f != mMarkerMap.end());
589            PHINode * phi = phiNodes[index++];
590            phi->addIncoming(f->second, mBasicBlock);
591            mMarkerMap[n->getInitial()] = phi;
592        }
593
594        bWhileBody.CreateBr(whileCondBlock);
595
596        // EXIT BLOCK
597        mBasicBlock = whileEndBlock;
598        if (--mWhileDepth == 0) {
599            for (index = 0; index < carryDataSize; ++index) {
600                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
601            }
602        }
603 
604}
605
606void PabloCompiler::compileStatement(const Statement * stmt)
607{
608    IRBuilder<> b(mBasicBlock);
609    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
610        Value * expr = compileExpression(assign->getExpr());
611        if (DumpTrace) {
612            genPrintRegister(assign->getName()->to_string(), expr);
613        }
614        mMarkerMap[assign] = expr;
615        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
616            SetOutputValue(expr, assign->getOutputIndex());
617        }
618    }
619    else if (const Next * next = dyn_cast<const Next>(stmt)) {
620        Value * expr = compileExpression(next->getExpr());
621        if (TraceNext) {
622            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
623        }
624        mMarkerMap[next->getInitial()] = expr;
625    }
626    else if (const If * ifStatement = dyn_cast<const If>(stmt))
627    {
628        compileIf(ifStatement);
629    }
630    else if (const While * whileStatement = dyn_cast<const While>(stmt))
631    {
632        compileWhile(whileStatement);
633    }
634    else if (const Call* call = dyn_cast<Call>(stmt)) {
635        //Call the callee once and store the result in the marker map.
636        auto mi = mMarkerMap.find(call);
637        if (mi == mMarkerMap.end()) {
638            auto ci = mCalleeMap.find(call->getCallee());
639            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
640                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
641            }
642            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
643        }
644        // return mi->second;
645    }
646    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
647        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
648        if (DumpTrace) {
649            genPrintRegister(stmt->getName()->to_string(), expr);
650        }
651        mMarkerMap[pablo_and] = expr;
652        // return expr;
653    }
654    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
655        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
656        if (DumpTrace) {
657            genPrintRegister(stmt->getName()->to_string(), expr);
658        }
659        mMarkerMap[pablo_or] = expr;
660        // return expr;
661    }
662    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
663        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
664        mMarkerMap[pablo_xor] = expr;
665        // return expr;
666    }
667    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
668        Value* ifMask = compileExpression(sel->getCondition());
669        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
670        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
671        Value * expr = b.CreateOr(ifTrue, ifFalse);
672        if (DumpTrace) {
673            genPrintRegister(stmt->getName()->to_string(), expr);
674        }
675        mMarkerMap[sel] = expr;
676        // return expr;
677    }
678    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
679        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
680        if (DumpTrace) {
681            genPrintRegister(stmt->getName()->to_string(), expr);
682        }
683        mMarkerMap[pablo_not] = expr;
684        // return expr;
685    }
686    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
687        Value* strm_value = compileExpression(adv->getExpr());
688        int shift = adv->getAdvanceAmount();
689        unsigned advance_index = adv->getLocalAdvanceIndex();
690        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
691        if (DumpTrace) {
692            genPrintRegister(stmt->getName()->to_string(), expr);
693        }
694        mMarkerMap[adv] = expr;
695        // return expr;
696    }
697    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
698    {
699        Value * marker = compileExpression(mstar->getMarker());
700        Value * cc = compileExpression(mstar->getCharClass());
701        Value * marker_and_cc = b.CreateAnd(marker, cc);
702        unsigned carry_index = mstar->getLocalCarryIndex();
703        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
704        if (DumpTrace) {
705            genPrintRegister(stmt->getName()->to_string(), expr);
706        }
707        mMarkerMap[mstar] = expr;
708        // return expr;
709    }
710    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
711    {
712        Value * marker_expr = compileExpression(sthru->getScanFrom());
713        Value * cc_expr = compileExpression(sthru->getScanThru());
714        unsigned carry_index = sthru->getLocalCarryIndex();
715        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
716        if (DumpTrace) {
717            genPrintRegister(stmt->getName()->to_string(), expr);
718        }
719        mMarkerMap[sthru] = expr;
720        // return expr;
721    }
722    else {
723        llvm::raw_os_ostream cerr(std::cerr);
724        PabloPrinter::print(stmt, cerr);
725        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
726    }
727}
728
729Value * PabloCompiler::compileExpression(const PabloAST * expr) {
730    if (isa<Ones>(expr)) {
731        return mOneInitializer;
732    }
733    else if (isa<Zeroes>(expr)) {
734        return mZeroInitializer;
735    }
736    else if (const Next * next = dyn_cast<Next>(expr)) {
737        expr = next->getInitial();
738    }
739    auto f = mMarkerMap.find(expr);
740    if (f == mMarkerMap.end()) {
741        std::string o;
742        llvm::raw_string_ostream str(o);
743        str << "\"";
744        PabloPrinter::print(expr, str);
745        str << "\" was used before definition!";
746        throw std::runtime_error(str.str());
747    }
748    return f->second;
749}
750
751
752#ifdef USE_UADD_OVERFLOW
753#ifdef USE_TWO_UADD_OVERFLOW
754PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
755    std::vector<Value*> struct_res_params;
756    struct_res_params.push_back(int128_e1);
757    struct_res_params.push_back(int128_e2);
758    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
759    struct_res->setCallingConv(CallingConv::C);
760    struct_res->setTailCall(false);
761    AttributeSet struct_res_PAL;
762    struct_res->setAttributes(struct_res_PAL);
763
764    SumWithOverflowPack ret;
765
766    std::vector<unsigned> int128_sum_indices;
767    int128_sum_indices.push_back(0);
768    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
769
770    std::vector<unsigned> int1_obit_indices;
771    int1_obit_indices.push_back(1);
772    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
773
774    return ret;
775}
776#else
777PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
778    std::vector<Value*> struct_res_params;
779    struct_res_params.push_back(int128_e1);
780    struct_res_params.push_back(int128_e2);
781    struct_res_params.push_back(int1_cin);
782    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
783    struct_res->setCallingConv(CallingConv::C);
784    struct_res->setTailCall(false);
785    AttributeSet struct_res_PAL;
786    struct_res->setAttributes(struct_res_PAL);
787
788    SumWithOverflowPack ret;
789
790    std::vector<unsigned> int128_sum_indices;
791    int128_sum_indices.push_back(0);
792    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
793
794    std::vector<unsigned> int1_obit_indices;
795    int1_obit_indices.push_back(1);
796    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
797
798    return ret;
799}
800#endif
801#endif
802
803
804Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
805    IRBuilder<> b(mBasicBlock);
806    const PabloBlockCarryData & cd = blk->carryData;
807    const unsigned carryIdx = cd.carryOpCarryDataOffset(localIndex);
808    Value* carryq_value = genCarryDataLoad(carryIdx);
809#ifdef USE_TWO_UADD_OVERFLOW
810    //This is the ideal implementation, which uses two uadd.with.overflow
811    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
812    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
813    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
814    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
815
816    SumWithOverflowPack sumpack0, sumpack1;
817
818    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
819    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
820
821    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
822    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
823
824    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
825    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
826    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
827    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
828    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
829
830#elif defined USE_UADD_OVERFLOW
831    //use llvm.uadd.with.overflow.i128 or i256
832    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
833    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
834
835    //get i1 carryin from iBLOCK_SIZE
836    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
837    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
838    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
839
840    SumWithOverflowPack sumpack0;
841    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
842    Value* obit = sumpack0.obit;
843    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
844
845    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
846    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
847    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
848    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
849#elif (BLOCK_SIZE == 128)
850    //calculate carry through logical ops
851    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
852    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
853    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
854    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
855    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
856    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
857
858    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
859    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
860#else
861    //BLOCK_SIZE == 256, there is no other implementation
862    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
863#endif //USE_TWO_UADD_OVERFLOW
864
865    genCarryDataStore(carry_out, carryIdx);
866    return sum;
867}
868//#define CARRY_DEBUG
869Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
870    assert (index < mCarryInVector.size());
871    if (mWhileDepth == 0) {
872        IRBuilder<> b(mBasicBlock);
873        mCarryInVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
874    }
875#ifdef CARRY_DEBUG
876    std::cerr << "genCarryDataLoad " << index << std::endl;
877    genPrintRegister("carry_in_" + std::to_string(index), mCarryInVector[index]);
878#endif
879    return mCarryInVector[index];
880}
881
882void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
883    assert (carryOut);
884    assert (index < mCarryOutVector.size());
885    if (mWhileDepth == 0) {
886        IRBuilder<> b(mBasicBlock);
887        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
888    }
889    mCarryDataSummaryIdx[index] = -1;
890    mCarryOutVector[index] = carryOut;
891#ifdef CARRY_DEBUG
892    std::cerr << "genCarryDataStore " << index << std::endl;
893    genPrintRegister("carry_out_" + std::to_string(index), mCarryOutVector[index]);
894#endif
895    //std::cerr << "mCarryOutVector[" << index << "]]\n";
896}
897
898inline Value* PabloCompiler::genBitBlockAny(Value* test) {
899    IRBuilder<> b(mBasicBlock);
900    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
901    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
902}
903
904Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
905    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
906    IRBuilder<> b(mBasicBlock);
907    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
908    Value * v = b.CreateBitCast(op, vType);
909    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
910}
911
912Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
913    IRBuilder<> b(mBasicBlock);
914    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
915    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
916}
917
918inline Value* PabloCompiler::genNot(Value* expr) {
919    IRBuilder<> b(mBasicBlock);
920    return b.CreateXor(expr, mOneInitializer, "not");
921}
922
923Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
924    if (shift_amount >= LongAdvanceBase) {
925        return genLongAdvanceWithCarry(strm_value, shift_amount, localIndex, blk);
926    }
927    else if (shift_amount == 1) {
928        return genUnitAdvanceWithCarry(strm_value, localIndex, blk);
929    }
930    IRBuilder<> b(mBasicBlock);
931    const PabloBlockCarryData & cd = blk->carryData;
932    const auto advanceIndex = cd.shortAdvanceCarryDataOffset(localIndex);
933    Value* result_value;
934   
935    if (shift_amount == 0) {
936        result_value = genCarryDataLoad(advanceIndex);
937    }
938    else {
939        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
940        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
941        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
942        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
943    }
944    genCarryDataStore(strm_value, advanceIndex);
945    return result_value;
946}
947                   
948Value* PabloCompiler::genUnitAdvanceWithCarry(Value* strm_value, unsigned localIndex, const PabloBlock * blk) {
949    IRBuilder<> b(mBasicBlock);
950    const PabloBlockCarryData & cd = blk->carryData;
951    const auto advanceIndex = cd.unitAdvanceCarryDataOffset(localIndex);
952    Value* result_value;
953   
954#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
955    Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(advanceIndex));
956    Value* srli_1_value = b.CreateLShr(strm_value, 63);
957    Value* packed_shuffle;
958    Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
959    Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
960    packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
961   
962    Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
963    Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
964   
965    Value* shl_value = b.CreateShl(strm_value, const_packed_2);
966    result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
967#else
968    Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
969    Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
970    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, 1), b.CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
971    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
972   
973#endif
974    genCarryDataStore(strm_value, advanceIndex);
975    return result_value;
976}
977                   
978                    //
979// Generate code for long advances >= LongAdvanceBase
980//
981Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
982    IRBuilder<> b(mBasicBlock);
983    const PabloBlockCarryData & cd = blk->carryData;
984    const unsigned block_shift = shift_amount % BLOCK_SIZE;
985    const unsigned advanceEntries = cd.longAdvanceEntries(shift_amount);
986    const unsigned bufsize = cd.longAdvanceBufferSize(shift_amount);
987    std::cerr << "shift_amount = " << shift_amount << " bufsize = " << bufsize << std::endl;
988    Value * indexMask = b.getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
989    Value * advBaseIndex = b.getInt64(cd.longAdvanceCarryDataOffset(localIndex));
990    Value * storeIndex = b.CreateAdd(b.CreateAnd(mBlockNo, indexMask), advBaseIndex);
991    Value * loadIndex = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries)), indexMask), advBaseIndex);
992    Value * storePtr = b.CreateGEP(mCarryDataPtr, storeIndex);
993    Value * loadPtr = b.CreateGEP(mCarryDataPtr, loadIndex);
994    Value* result_value;
995
996    if (block_shift == 0) {
997        result_value = b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8);
998    }
999    else if (advanceEntries == 1) {
1000        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1001        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1002        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1003        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1004    }
1005    else {
1006        // The advance is based on the two oldest bit blocks in the advance buffer.
1007        // The buffer is maintained as a circular buffer of size bufsize.
1008        // Indexes within the buffer are computed by bitwise and with the indexMask.
1009        Value * loadIndex2 = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries-1)), indexMask), advBaseIndex);
1010        Value * loadPtr2 = b.CreateGEP(mCarryDataPtr, loadIndex2);
1011        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1012        //genPrintRegister("advanceq_longint", b.CreateBitCast(advanceq_longint, mBitBlockType));
1013        Value* strm_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr2, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1014        //genPrintRegister("strm_longint", b.CreateBitCast(strm_longint, mBitBlockType));
1015        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1016        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1017    }
1018    b.CreateAlignedStore(strm_value, storePtr, BLOCK_SIZE/8);
1019    return result_value;
1020}
1021   
1022void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1023    IRBuilder<> b(mBasicBlock);
1024    if (marker->getType()->isPointerTy()) {
1025        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1026    }
1027    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1028    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1029    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1030}
1031
1032CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1033: CarryDataSize(carryDataSize)
1034, FunctionPointer(executionEngine->getPointerToFunction(function))
1035, mFunction(function)
1036, mExecutionEngine(executionEngine)
1037{
1038
1039}
1040
1041// Clean up the memory for the compiled function once we're finished using it.
1042CompiledPabloFunction::~CompiledPabloFunction() {
1043    if (mExecutionEngine) {
1044        assert (mFunction);
1045        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1046        delete mExecutionEngine;
1047    }
1048}
1049
1050}
Note: See TracBrowser for help on using the repository browser.